commit a131fac6aa2d167f06fe41b6700e6aadfc5b11e1 Author: ModelHub XC Date: Fri May 22 21:02:10 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: W-61/llama3-hh-helpful-qt045-b0p3-20260429-085449 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..b4d39c5 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +library_name: transformers +base_model: W-61/llama-3-8b-base-sft-hh-helpful-4xh200 +tags: +- alignment-handbook +- new-dpo +- generated_from_trainer +datasets: +- Anthropic/hh-rlhf +model-index: +- name: llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449 + results: [] +--- + + + +# llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449 + +This model is a fine-tuned version of [W-61/llama-3-8b-base-sft-hh-helpful-4xh200](https://huggingface.co/W-61/llama-3-8b-base-sft-hh-helpful-4xh200) on the Anthropic/hh-rlhf dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-07 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 64 +- total_eval_batch_size: 32 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + + + +### Framework versions + +- Transformers 4.51.0 +- Pytorch 2.3.1+cu121 +- Datasets 2.21.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..51d2a7a --- /dev/null +++ b/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 0.0, + "train_loss": 1.095842420442164, + "train_runtime": 1736.9553, + "train_samples": 43598, + "train_samples_per_second": 25.1, + "train_steps_per_second": 0.392 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5092b09 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.51.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..76247c9 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.0" +} diff --git a/margin_logs/margins.jsonl b/margin_logs/margins.jsonl new file mode 100644 index 0000000..03e38dd --- /dev/null +++ b/margin_logs/margins.jsonl @@ -0,0 +1,681 @@ +{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.02287048101425171, "std": 0.42023447155952454, "min": -1.4034271240234375, "p10": -0.46674575805664065, "median": 0.04234886169433594, "p90": 0.4323463439941407, "max": 0.89263916015625, "pos_frac": 0.53125, "sample": [-0.06523895263671875, 0.436798095703125, 0.27811431884765625, -0.9194221496582031, 0.018890380859375, 0.20587158203125, 0.18878173828125, -0.3968696594238281, 0.26206207275390625, 0.2470550537109375, -0.040912628173828125, 0.4394989013671875, -0.44133758544921875, -0.39148712158203125, 0.2764854431152344, 0.89263916015625, -0.42584991455078125, -0.46125030517578125, -0.8638992309570312, -0.3508758544921875, 0.371368408203125, 0.887847900390625, -0.382904052734375, 0.36145782470703125, -0.4890003204345703, 0.052455902099609375, -0.036136627197265625, 0.23079299926757812, 0.2469482421875, 0.1643218994140625, -0.07129669189453125, 0.2790794372558594, 0.3637123107910156, -0.8916168212890625, 0.03298759460449219, -0.2790107727050781, -0.17860984802246094, 0.23892593383789062, 0.05171012878417969, -0.2564239501953125, -0.14655303955078125, 0.27777862548828125, 0.0810394287109375, -1.4034271240234375, -0.28739166259765625, -0.1489429473876953, 0.44918060302734375, 0.1693286895751953, 0.10933303833007812, -0.14766693115234375, -0.40944671630859375, -0.18532562255859375, 0.6261310577392578, -0.20856857299804688, 0.602569580078125, 0.05538177490234375, 0.1505279541015625, 0.1313800811767578, -0.006317138671875, 0.42195892333984375, -0.29936981201171875, -0.4691009521484375, 0.16705322265625, -0.5789260864257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000001.npy"} +{"epoch": 0.0014684287812041115, "step": 2, "batch_size": 64, "mean": -0.06572240591049194, "std": 0.3523969054222107, "min": -0.9291305541992188, "p10": -0.46334152221679686, "median": -0.05502510070800781, "p90": 0.3672500610351563, "max": 1.0444793701171875, "pos_frac": 0.4375, "sample": [-0.2829437255859375, 0.3027191162109375, -0.19867706298828125, -0.3062286376953125, 0.10318756103515625, 0.20131683349609375, -0.34906005859375, 0.2802886962890625, 0.1914520263671875, -0.31072998046875, 0.08922195434570312, 0.10284614562988281, -0.03655242919921875, -0.0604095458984375, -0.06208038330078125, 0.32562255859375, -0.37982177734375, 0.2746162414550781, -0.049640655517578125, 0.3752174377441406, -0.103973388671875, 0.0699462890625, 0.36417388916015625, -0.033428192138671875, 0.37265777587890625, -0.3787078857421875, -0.6610565185546875, 0.4720420837402344, 0.47701263427734375, -0.27928924560546875, -0.44719696044921875, -0.0965118408203125, -0.7628555297851562, 0.046764373779296875, 0.06670379638671875, -0.9291305541992188, -0.7122802734375, -0.16554832458496094, 0.1485595703125, -0.07539939880371094, 0.2588920593261719, 0.039890289306640625, 0.201690673828125, 0.0623016357421875, 1.0444793701171875, -0.37696075439453125, -0.02794647216796875, -0.223297119140625, -0.35730743408203125, -0.1309051513671875, -0.3106689453125, -0.11409187316894531, -0.1669769287109375, 0.131317138671875, -0.2361297607421875, 0.4093780517578125, -0.6485977172851562, 0.36856842041015625, -0.1951904296875, -0.4702606201171875, -0.7624168395996094, 0.008928298950195312, -0.31630706787109375, 0.022550582885742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000002.npy"} +{"epoch": 0.002936857562408223, "step": 3, "batch_size": 64, "mean": 0.04345354437828064, "std": 0.3514525890350342, "min": -0.9645233154296875, "p10": -0.36395721435546874, "median": 0.018914222717285156, "p90": 0.5056064605712892, "max": 0.94873046875, "pos_frac": 0.5625, "sample": [-0.10715484619140625, -0.08351516723632812, 0.5889053344726562, 0.1807098388671875, 0.011278152465820312, -0.2550392150878906, 0.1979045867919922, 0.23459625244140625, 0.009063720703125, 0.0009822845458984375, 0.03350830078125, -0.06809806823730469, 0.5584564208984375, -0.3628997802734375, -0.08093833923339844, 0.5604400634765625, -0.250335693359375, -0.3695068359375, 0.14037704467773438, -0.1947460174560547, -0.43294525146484375, 0.3804931640625, 0.47283172607421875, 0.41748046875, -0.5289688110351562, -0.06763267517089844, 0.20258331298828125, 0.09070014953613281, 0.21934127807617188, -0.0230255126953125, 0.33235931396484375, 0.519134521484375, 0.15324020385742188, 0.94873046875, 0.04927253723144531, -0.24961090087890625, -0.27283477783203125, 0.07157325744628906, 0.13065338134765625, 0.2204875946044922, 0.0110015869140625, 0.02655029296875, -0.0211181640625, 0.29084014892578125, 0.36540985107421875, -0.01886749267578125, 0.18338775634765625, 0.4740409851074219, -0.9645233154296875, -0.3450508117675781, 0.3157062530517578, -0.2563018798828125, 0.2875175476074219, -0.04862213134765625, 0.08238410949707031, 0.771209716796875, -0.18851470947265625, -0.4689178466796875, -0.279205322265625, 0.6390380859375, -0.18415069580078125, -0.364410400390625, -0.207855224609375, -0.6963729858398438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000003.npy"} +{"epoch": 0.004405286343612335, "step": 4, "batch_size": 64, "mean": -0.04964029788970947, "std": 0.3945470452308655, "min": -0.9305191040039062, "p10": -0.5812477111816406, "median": -0.03455543518066406, "p90": 0.47739105224609407, "max": 0.8212814331054688, "pos_frac": 0.484375, "sample": [0.061351776123046875, -0.23756790161132812, 0.05895423889160156, -0.0815277099609375, 0.16788482666015625, 0.157867431640625, 0.6239471435546875, -0.520538330078125, -0.5852718353271484, -0.10882568359375, 0.010541915893554688, 0.2277679443359375, -0.03057861328125, 0.157440185546875, -0.305206298828125, -0.09641456604003906, -0.5810928344726562, -0.12966156005859375, -0.5813140869140625, 0.15638351440429688, -0.6052703857421875, 0.1003570556640625, -0.337432861328125, -0.0485687255859375, 0.02715301513671875, 0.04534912109375, 0.0626983642578125, 0.2958869934082031, -0.12537193298339844, -0.490386962890625, 0.018581390380859375, 0.026273727416992188, -0.8528518676757812, 0.036243438720703125, -0.23577308654785156, 0.398895263671875, -0.9305191040039062, 0.76495361328125, -0.15886688232421875, 0.5110321044921875, 0.04460906982421875, -0.11618995666503906, 0.6486167907714844, -0.077972412109375, -0.3894920349121094, 0.2182159423828125, -0.919036865234375, 0.78155517578125, -0.2024059295654297, -0.6825180053710938, -0.038532257080078125, 0.154815673828125, 0.15358734130859375, -0.3866119384765625, 0.04154205322265625, 0.3796272277832031, 0.8212814331054688, -0.2986297607421875, 0.7134170532226562, 0.12494659423828125, -0.3333892822265625, -0.2069530487060547, -0.4066925048828125, -0.067291259765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000004.npy"} +{"epoch": 0.005873715124816446, "step": 5, "batch_size": 64, "mean": -0.0020435750484466553, "std": 0.3818022608757019, "min": -1.0519561767578125, "p10": -0.4678718566894531, "median": -0.013476371765136719, "p90": 0.4520347595214844, "max": 1.4437942504882812, "pos_frac": 0.5, "sample": [-0.5406723022460938, 0.5097732543945312, -0.0779876708984375, 0.28507232666015625, -0.19936370849609375, -0.3983001708984375, 0.21741104125976562, -0.053516387939453125, -0.1872577667236328, -0.6293258666992188, 0.03513336181640625, 0.151123046875, 0.010675430297851562, 0.5047073364257812, -0.07842254638671875, -0.2365264892578125, 0.07879638671875, -0.2579765319824219, 0.07564544677734375, -0.140960693359375, -0.04438018798828125, 0.40729522705078125, 0.3504905700683594, -0.04123687744140625, 0.042324066162109375, -0.22156524658203125, 0.3556060791015625, -0.49768829345703125, -0.25897216796875, -0.1252288818359375, 0.04027557373046875, 0.07487106323242188, 0.439788818359375, -0.10175704956054688, 0.05572700500488281, 0.1066436767578125, 0.6526260375976562, -0.037628173828125, 0.45728302001953125, 0.232147216796875, 0.2482452392578125, 0.30036163330078125, 0.5230636596679688, -1.0519561767578125, 1.4437942504882812, -0.20098876953125, -0.36346435546875, 0.17290496826171875, -0.2476654052734375, -0.6851654052734375, 0.3643608093261719, -0.24559783935546875, -0.30460357666015625, -0.2303619384765625, 0.09027099609375, 0.11786651611328125, -0.240325927734375, -0.549072265625, -0.21100616455078125, 0.13555908203125, -0.5061874389648438, 0.202789306640625, -0.3787994384765625, 0.5305404663085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000005.npy"} +{"epoch": 0.007342143906020558, "step": 6, "batch_size": 64, "mean": 0.0013370811939239502, "std": 0.3965916335582733, "min": -0.884521484375, "p10": -0.5403009414672851, "median": 0.041778564453125, "p90": 0.43555145263671885, "max": 0.876556396484375, "pos_frac": 0.53125, "sample": [0.2621002197265625, 0.14189720153808594, -0.061717987060546875, -0.3359222412109375, 0.08560943603515625, 0.778656005859375, -0.5179119110107422, -0.210540771484375, 0.288482666015625, -0.2541351318359375, 0.2830543518066406, 0.17195510864257812, -0.354583740234375, 0.37310218811035156, -0.3335304260253906, -0.5501861572265625, -0.7698211669921875, -0.1726207733154297, 0.3455390930175781, -0.02059173583984375, -0.0502166748046875, -0.02367401123046875, 0.758575439453125, 0.00298309326171875, 0.07989501953125, 0.064056396484375, 0.40869903564453125, 0.32279205322265625, 0.08235931396484375, 0.3210716247558594, -0.168914794921875, 0.09842109680175781, 0.6276321411132812, 0.31247711181640625, 0.44705963134765625, -0.332733154296875, -0.884521484375, -0.5848236083984375, 0.18653106689453125, -0.76116943359375, -0.475982666015625, -0.145843505859375, 0.276641845703125, 0.6478900909423828, 0.23475265502929688, 0.06482315063476562, 0.2370166778564453, 0.357208251953125, 0.6431732177734375, -0.3962554931640625, -0.4061279296875, 0.06789398193359375, -0.549896240234375, -0.1432647705078125, 0.876556396484375, -0.2908058166503906, -0.034820556640625, -0.6832275390625, -0.296661376953125, 0.13542938232421875, 0.019500732421875, -0.23724746704101562, 0.35137939453125, -0.221893310546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000006.npy"} +{"epoch": 0.00881057268722467, "step": 7, "batch_size": 64, "mean": 0.008150070905685425, "std": 0.3914256691932678, "min": -0.8945465087890625, "p10": -0.5682880401611328, "median": 0.015428543090820312, "p90": 0.5658348083496099, "max": 0.7113742828369141, "pos_frac": 0.515625, "sample": [-0.20085525512695312, -0.5913543701171875, 0.07504653930664062, -0.08150863647460938, -0.8945465087890625, -0.04071807861328125, -0.4536552429199219, 0.362091064453125, -0.2231597900390625, 0.6511383056640625, 0.64111328125, -0.22243499755859375, 0.03455352783203125, 0.23462677001953125, -0.1164398193359375, 0.3066558837890625, 0.09305191040039062, -0.24335289001464844, -0.3175926208496094, -0.8755645751953125, 0.23351669311523438, 0.0633544921875, 0.297119140625, 0.6741256713867188, 0.00726318359375, -0.08202362060546875, -0.07105636596679688, -0.16195297241210938, -0.5248985290527344, 0.11455917358398438, 0.023593902587890625, 0.6302337646484375, -0.13311386108398438, 0.152801513671875, 0.3394317626953125, 0.44362640380859375, -0.14771270751953125, 0.25092506408691406, -0.752410888671875, 0.0811920166015625, -0.00994873046875, -0.467681884765625, 0.33849334716796875, -0.586883544921875, 0.412628173828125, 0.21759033203125, 0.4282798767089844, -0.6349372863769531, 0.258087158203125, -0.42649078369140625, 0.3928375244140625, -0.14083099365234375, -0.5999908447265625, 0.6182098388671875, -0.04772186279296875, 0.08692169189453125, -0.1614837646484375, -0.055530548095703125, 0.7113742828369141, 0.224334716796875, 0.6289138793945312, 0.2887115478515625, -0.402435302734375, -0.1265106201171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000007.npy"} +{"epoch": 0.010279001468428781, "step": 8, "batch_size": 64, "mean": -0.06675609946250916, "std": 0.47908222675323486, "min": -1.8746337890625, "p10": -0.4663200378417968, "median": -0.03059864044189453, "p90": 0.46268768310546887, "max": 1.20831298828125, "pos_frac": 0.4375, "sample": [0.496002197265625, 0.57891845703125, -0.39013671875, -0.22651100158691406, -0.00994110107421875, -0.05598640441894531, 0.0227508544921875, 0.20383453369140625, 0.811737060546875, 0.3819694519042969, -0.03282928466796875, 0.4108619689941406, 0.358612060546875, 0.027980804443359375, 0.5872116088867188, -0.49897003173828125, 0.14293670654296875, -0.22927474975585938, 0.4324493408203125, -0.684356689453125, 0.0155487060546875, -1.37628173828125, -0.11887359619140625, -0.1641998291015625, -0.028367996215820312, -0.1390533447265625, -0.365631103515625, -0.24326133728027344, -0.17742919921875, -0.9927978515625, 0.180084228515625, -0.28577423095703125, -0.16224288940429688, 0.47564697265625, 0.07024383544921875, -0.8657684326171875, -0.15587615966796875, 0.170562744140625, -0.12148284912109375, 0.1013946533203125, 0.5554084777832031, -0.312652587890625, -0.17198944091796875, 0.303375244140625, -0.024442672729492188, -1.8746337890625, -1.02923583984375, 0.05600738525390625, 0.05843925476074219, 0.05811309814453125, -0.3547935485839844, -0.02099609375, -0.3747406005859375, -0.2860527038574219, 1.20831298828125, -0.24434661865234375, -0.344573974609375, 0.02495574951171875, 0.24810791015625, -0.1152801513671875, -0.08338165283203125, 0.39325904846191406, -0.12433242797851562, 0.0393829345703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000008.npy"} +{"epoch": 0.011747430249632892, "step": 9, "batch_size": 64, "mean": 0.04486680030822754, "std": 0.4689461886882782, "min": -0.9081192016601562, "p10": -0.4981060028076172, "median": -0.015382766723632812, "p90": 0.7157386779785161, "max": 1.370941162109375, "pos_frac": 0.453125, "sample": [-0.04701995849609375, -0.08383941650390625, 0.5558547973632812, 0.0529632568359375, 0.1340179443359375, -0.31787109375, -0.123291015625, -0.010379791259765625, 0.23369598388671875, 0.573211669921875, -0.9081192016601562, -0.135833740234375, -0.3350372314453125, -0.38775634765625, -0.498626708984375, -0.542694091796875, 0.5618057250976562, -0.2587928771972656, -0.2041473388671875, -0.46242523193359375, -0.4968910217285156, 0.10725975036621094, 0.25360107421875, 0.39278411865234375, 0.428375244140625, 0.7698440551757812, 0.5016555786132812, -0.17193603515625, -0.10378074645996094, 0.7988090515136719, -0.26000213623046875, -0.3418560028076172, -0.1324310302734375, 0.34798431396484375, -0.1807384490966797, -0.7216110229492188, -0.08886528015136719, -0.010862350463867188, 0.15771484375, 0.8534622192382812, -0.28131866455078125, -0.102783203125, -0.64422607421875, 0.2999744415283203, 0.8234481811523438, -0.4159126281738281, -0.109832763671875, -0.282135009765625, -0.01845550537109375, 0.13504600524902344, 0.1037139892578125, 0.4398345947265625, 0.5894927978515625, 0.7965068817138672, 0.3904743194580078, 0.2211761474609375, 0.513824462890625, 0.1080780029296875, -0.012310028076171875, 1.370941162109375, -0.8741912841796875, 0.9115676879882812, -0.6856536865234375, -0.30401611328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000009.npy"} +{"epoch": 0.013215859030837005, "step": 10, "batch_size": 64, "mean": 0.010898619890213013, "std": 0.3509206771850586, "min": -0.92779541015625, "p10": -0.43809661865234373, "median": 0.034397125244140625, "p90": 0.3877540588378907, "max": 0.947113037109375, "pos_frac": 0.578125, "sample": [-0.1223907470703125, -0.15765380859375, -0.11557197570800781, 0.0459747314453125, -0.6266326904296875, 0.24504852294921875, 0.08158302307128906, 0.1091461181640625, 0.0791015625, -0.75146484375, -0.24169158935546875, 0.2315521240234375, 0.08174896240234375, 0.4204864501953125, -0.9104690551757812, 0.272796630859375, -0.02364349365234375, 0.00060272216796875, 0.18499183654785156, 0.04277801513671875, 0.19731903076171875, -0.16713714599609375, 0.3962249755859375, 0.08259391784667969, -0.14957809448242188, -0.1997833251953125, -0.1796722412109375, -0.4497833251953125, 0.07325935363769531, 0.4877586364746094, -0.001190185546875, 0.13477325439453125, 0.0260162353515625, 0.291748046875, -0.15260696411132812, -0.3619956970214844, -0.92779541015625, 0.746246337890625, 0.29174041748046875, 0.016330718994140625, -0.09088897705078125, -0.41082763671875, -0.13222122192382812, -0.561553955078125, 0.1157989501953125, 0.49604034423828125, 0.30355072021484375, 0.36798858642578125, -0.5321273803710938, -0.1103973388671875, 0.29192352294921875, -0.011606216430664062, 0.0076446533203125, 0.3494148254394531, -0.17657470703125, -0.07057380676269531, 0.5417098999023438, 0.1631317138671875, 0.006866455078125, 0.947113037109375, 0.2855377197265625, -0.38059043884277344, 0.1522502899169922, 0.1451416015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000010.npy"} +{"epoch": 0.014684287812041116, "step": 11, "batch_size": 64, "mean": -0.0021438300609588623, "std": 0.42058265209198, "min": -1.602783203125, "p10": -0.47995758056640625, "median": 0.02982330322265625, "p90": 0.4522550582885745, "max": 1.2443389892578125, "pos_frac": 0.53125, "sample": [-0.5373077392578125, 0.8910675048828125, 0.48175048828125, 0.2575492858886719, -0.00738525390625, 0.05219268798828125, 0.21181488037109375, 0.16829681396484375, -0.12884140014648438, 0.01953125, 0.2110748291015625, 0.11391830444335938, -0.295013427734375, 0.37500762939453125, 0.1318359375, 0.0591583251953125, 0.2777099609375, -0.08592605590820312, 0.531402587890625, 0.3409767150878906, 0.3834857940673828, 0.19478607177734375, -0.43502235412597656, -0.039947509765625, -0.342010498046875, 0.32387542724609375, 0.09609222412109375, -0.58294677734375, 0.026214599609375, 0.3353004455566406, 0.3179779052734375, -0.746307373046875, 0.3101844787597656, -0.184783935546875, -0.1096649169921875, 0.03814697265625, 0.48172760009765625, 0.5416793823242188, -0.4799041748046875, -0.1508159637451172, -0.24485397338867188, -0.11817169189453125, -0.16374969482421875, -0.716796875, 0.20481109619140625, 0.15570831298828125, 0.6775741577148438, -0.06166267395019531, -1.602783203125, 0.08678436279296875, -0.13307952880859375, -0.47998046875, 0.0334320068359375, -0.6517715454101562, 0.11272048950195312, -0.2563133239746094, -0.12106704711914062, -0.21692657470703125, -0.11600494384765625, 1.2443389892578125, -0.3728179931640625, 0.06646919250488281, -0.09986114501953125, -0.4100837707519531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000011.npy"} +{"epoch": 0.016152716593245228, "step": 12, "batch_size": 64, "mean": 0.0008526891469955444, "std": 0.34676823019981384, "min": -0.8912353515625, "p10": -0.45575485229492185, "median": 0.024866104125976562, "p90": 0.39233551025390634, "max": 0.801666259765625, "pos_frac": 0.53125, "sample": [0.801666259765625, -0.052764892578125, 0.30525970458984375, -0.6031646728515625, -0.1665496826171875, -0.01507568359375, 0.1254425048828125, 0.25146484375, -0.10025787353515625, 0.6085395812988281, 0.1500091552734375, -0.5539703369140625, -0.398529052734375, 0.15702056884765625, -0.081756591796875, 0.5852432250976562, -0.009552001953125, 0.205902099609375, -0.07745170593261719, -0.0780029296875, -0.8912353515625, -0.3318023681640625, -0.4866943359375, -0.6842575073242188, -0.295196533203125, 0.15395736694335938, 0.2031707763671875, 0.4018707275390625, 0.0743255615234375, -0.47104644775390625, 0.2057952880859375, 0.2466278076171875, 0.06932449340820312, 0.2429676055908203, 0.370086669921875, 0.18226242065429688, 0.20254898071289062, -0.24981689453125, -0.29582977294921875, -0.1760730743408203, -0.367950439453125, 0.17668533325195312, -0.4052753448486328, 0.2955818176269531, 0.11685943603515625, 0.04346466064453125, 0.7606964111328125, 0.042652130126953125, 0.20645999908447266, 0.0650787353515625, -0.03275299072265625, 0.7742538452148438, 0.13767242431640625, 0.41765594482421875, -0.207977294921875, -0.49383544921875, 0.007080078125, -0.2987823486328125, -0.06473350524902344, -0.14650726318359375, 0.105255126953125, -0.18754196166992188, -0.420074462890625, 0.0061492919921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000012.npy"} +{"epoch": 0.01762114537444934, "step": 13, "batch_size": 64, "mean": 0.061279088258743286, "std": 0.4573952555656433, "min": -1.561431884765625, "p10": -0.37520751953125, "median": 0.016699790954589844, "p90": 0.46484146118164077, "max": 1.6549301147460938, "pos_frac": 0.53125, "sample": [0.1220855712890625, -0.10704994201660156, 0.25603485107421875, -0.3028717041015625, 0.229827880859375, 0.4806938171386719, 0.3888702392578125, -0.04447364807128906, 0.7571029663085938, 0.19808578491210938, 0.5288619995117188, 0.9396209716796875, -0.39174652099609375, -0.33661651611328125, -0.2950172424316406, 1.6549301147460938, -0.4322662353515625, -0.171875, -0.09416389465332031, 0.339508056640625, -0.1836109161376953, 0.3298492431640625, -0.4017486572265625, -0.027362823486328125, 0.07223320007324219, -0.1067657470703125, 0.024929046630859375, -0.1934223175048828, -0.3205680847167969, -0.3932342529296875, -0.20317840576171875, 0.202972412109375, 0.15372467041015625, 0.10444259643554688, -1.561431884765625, -0.23455047607421875, -0.2807769775390625, 0.4817047119140625, -0.2641315460205078, 0.103912353515625, 0.4278526306152344, 0.15764617919921875, -0.13421630859375, -0.23464202880859375, 0.36466217041015625, 0.008470535278320312, -0.02608489990234375, 0.24178504943847656, 0.3563423156738281, 0.00714874267578125, -0.528106689453125, 0.2627716064453125, 0.093170166015625, -0.0226287841796875, -0.29701995849609375, 0.2069091796875, 0.251708984375, 0.37561798095703125, 1.50335693359375, -0.0155792236328125, -0.2869110107421875, 0.3036346435546875, 0.4151153564453125, -0.5316696166992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000013.npy"} +{"epoch": 0.01908957415565345, "step": 14, "batch_size": 64, "mean": 0.021842211484909058, "std": 0.34489062428474426, "min": -0.734710693359375, "p10": -0.43193359374999996, "median": 0.04969596862792969, "p90": 0.4058456420898439, "max": 0.7517547607421875, "pos_frac": 0.53125, "sample": [-0.44061279296875, -0.02402496337890625, 0.13491249084472656, -0.2089691162109375, 0.7517547607421875, -0.2097930908203125, -0.734710693359375, -0.41130828857421875, 0.7032012939453125, 0.27880859375, 0.36696624755859375, 0.06336212158203125, 0.6293182373046875, 0.248687744140625, -0.4853248596191406, -0.08844757080078125, -0.08235931396484375, -0.23180389404296875, 0.051288604736328125, -0.011896133422851562, 0.3152008056640625, -0.06915283203125, 0.42250823974609375, -0.058929443359375, -0.323577880859375, -0.1138458251953125, -0.6356201171875, 0.19847488403320312, 0.2648468017578125, 0.5945663452148438, 0.08269882202148438, -0.1856365203857422, 0.2945709228515625, 0.11574935913085938, 0.21610450744628906, -0.02217864990234375, 0.22323989868164062, -0.35321044921875, -0.10290718078613281, 0.04810333251953125, 0.350555419921875, -0.41168212890625, -0.4467010498046875, 0.48981475830078125, 0.2503929138183594, 0.33637237548828125, 0.3461132049560547, -0.6488571166992188, -0.254302978515625, -0.32155609130859375, -0.7107086181640625, 0.17986297607421875, 0.2840118408203125, 0.16788482666015625, -0.224578857421875, 0.5309600830078125, 0.28179931640625, 0.0439910888671875, -0.35358428955078125, 0.28192138671875, -0.1885814666748047, 0.11767387390136719, 0.12062835693359375, -0.03358268737792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000014.npy"} +{"epoch": 0.020558002936857563, "step": 15, "batch_size": 64, "mean": -0.017623186111450195, "std": 0.4012446403503418, "min": -1.5726242065429688, "p10": -0.5454650878906249, "median": 0.01195526123046875, "p90": 0.39934329986572287, "max": 0.7709579467773438, "pos_frac": 0.515625, "sample": [-0.05806922912597656, -0.20361328125, 0.20895004272460938, 0.2935657501220703, 0.278656005859375, 0.1763439178466797, -0.13550949096679688, -0.028835296630859375, -0.059326171875, 0.72149658203125, -0.2027740478515625, -0.252471923828125, 0.11908721923828125, -0.5892181396484375, 0.41908836364746094, 0.324737548828125, 0.33556365966796875, -0.27362823486328125, 0.10589790344238281, 0.06233978271484375, 0.119903564453125, -1.5726242065429688, -0.33148956298828125, 0.6001358032226562, -0.6218128204345703, -0.10301971435546875, 0.23191070556640625, 0.14885711669921875, -0.29769134521484375, 0.7108688354492188, -0.019683837890625, -0.9639434814453125, -0.6679000854492188, -0.05470466613769531, 0.039093017578125, 0.0514678955078125, -0.4433746337890625, 0.7709579467773438, 0.0060882568359375, -0.22031021118164062, -0.0323638916015625, 0.090850830078125, 0.1665515899658203, 0.21370506286621094, -0.02274322509765625, -0.08484268188476562, 0.2681446075439453, -0.24361419677734375, 0.5279617309570312, -0.14565277099609375, 0.1660003662109375, 0.017822265625, 0.5286865234375, 0.11658477783203125, 0.1647186279296875, 0.353271484375, -0.254791259765625, -0.1146240234375, 0.16510772705078125, -0.6669769287109375, -0.40872955322265625, -0.7394676208496094, -0.020977020263671875, 0.202484130859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000015.npy"} +{"epoch": 0.022026431718061675, "step": 16, "batch_size": 64, "mean": 0.016786575317382812, "std": 0.38721632957458496, "min": -0.863250732421875, "p10": -0.46660041809082026, "median": 0.014174461364746094, "p90": 0.3950428009033204, "max": 1.3713531494140625, "pos_frac": 0.53125, "sample": [0.1791515350341797, 1.3713531494140625, -0.2656402587890625, 0.05759429931640625, 0.3819236755371094, 0.9162368774414062, -0.193695068359375, -0.03982353210449219, 0.128875732421875, 0.18773651123046875, -0.5764312744140625, 0.01496124267578125, 0.18662643432617188, -0.03804779052734375, 0.013387680053710938, -0.598175048828125, -0.3739051818847656, -0.4165191650390625, -0.30437469482421875, 0.32678985595703125, 0.26335906982421875, 0.2675895690917969, 0.4814796447753906, 0.36592864990234375, 0.2686119079589844, -0.3044929504394531, -0.49190521240234375, -0.4841499328613281, -0.07135200500488281, -0.26750946044921875, 0.22983551025390625, 0.2878303527832031, 0.000148773193359375, -0.20579147338867188, 0.20528411865234375, 0.12607574462890625, 0.46538543701171875, 0.2749481201171875, -0.23508453369140625, -0.029541015625, 0.369781494140625, 0.1952362060546875, 0.12644004821777344, -0.02191162109375, 0.378692626953125, -0.14931106567382812, -0.2695732116699219, -0.20809555053710938, 0.16027069091796875, 0.143402099609375, -0.42565155029296875, -0.688873291015625, -0.33123779296875, -0.3338356018066406, -0.09099960327148438, -0.5596084594726562, 0.29944801330566406, 0.400665283203125, 0.5903892517089844, 0.5263824462890625, 0.2093505859375, -0.22084808349609375, -0.2671966552734375, -0.863250732421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000016.npy"} +{"epoch": 0.023494860499265784, "step": 17, "batch_size": 64, "mean": 0.09929636120796204, "std": 0.3600854277610779, "min": -0.827545166015625, "p10": -0.43471813201904297, "median": 0.12248420715332031, "p90": 0.5412647247314454, "max": 0.823699951171875, "pos_frac": 0.703125, "sample": [-0.72216796875, -0.015764236450195312, 0.5465049743652344, 0.04975128173828125, -0.7012481689453125, -0.15966796875, 0.25244140625, -0.827545166015625, 0.3618316650390625, -0.5089950561523438, 0.6592178344726562, 0.3532867431640625, 0.140472412109375, 0.76605224609375, 0.2473602294921875, 0.122955322265625, 0.12201309204101562, 0.2874794006347656, 0.16664886474609375, 0.49005889892578125, 0.3737831115722656, 0.2732582092285156, 0.10744094848632812, -0.01690673828125, 0.00827789306640625, 0.15050506591796875, 0.2850456237792969, -0.11480712890625, 0.7548065185546875, -0.171661376953125, -0.03678131103515625, 0.5290374755859375, 0.10514068603515625, 0.30893707275390625, -0.1547393798828125, 0.7432022094726562, -0.24654769897460938, -0.4406147003173828, 0.19457244873046875, 0.05718994140625, 0.2081298828125, 0.18880462646484375, 0.19055557250976562, 0.037639617919921875, -0.6755828857421875, 0.0610809326171875, -0.04666900634765625, -0.4948883056640625, 0.00666046142578125, 0.06600189208984375, 0.097686767578125, 0.13376235961914062, 0.07684326171875, 0.305877685546875, 0.260223388671875, -0.42095947265625, 0.29229736328125, 0.5550880432128906, 0.823699951171875, -0.22843170166015625, 0.19370079040527344, -0.1668548583984375, 0.49776458740234375, 0.05271148681640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000017.npy"} +{"epoch": 0.024963289280469897, "step": 18, "batch_size": 64, "mean": 0.05686900019645691, "std": 0.3101457953453064, "min": -0.80499267578125, "p10": -0.29288291931152344, "median": 0.049887657165527344, "p90": 0.4292278289794922, "max": 0.684967041015625, "pos_frac": 0.5625, "sample": [0.04390716552734375, -0.46121978759765625, 0.6697158813476562, -0.13033294677734375, -0.1910247802734375, -0.041126251220703125, -0.22491455078125, -0.019622802734375, -0.1902923583984375, 0.4796028137207031, -0.80499267578125, 0.09406280517578125, 0.431304931640625, 0.3408660888671875, -0.1706390380859375, 0.533355712890625, -0.2976036071777344, 0.07506179809570312, -0.2165679931640625, 0.24311065673828125, -0.052318572998046875, -0.3029022216796875, 0.2725677490234375, -0.08175849914550781, 0.4243812561035156, 0.25745391845703125, 0.3671722412109375, 0.09857940673828125, -0.009614944458007812, 0.06471824645996094, -0.07248687744140625, 0.21966934204101562, -0.28186798095703125, 0.188629150390625, 0.4890289306640625, -0.032299041748046875, 0.4019927978515625, 0.3745536804199219, -0.02312469482421875, 0.05461883544921875, 0.45305824279785156, 0.09679412841796875, -0.45407867431640625, -0.06961822509765625, 0.0535888671875, 0.684967041015625, 0.18536758422851562, 0.33693504333496094, -0.072998046875, 0.11085891723632812, 0.3647499084472656, -0.11977195739746094, -0.7802734375, -0.25081634521484375, 0.35318756103515625, 0.21102142333984375, 0.3770599365234375, 0.015565872192382812, -0.5023651123046875, 0.2822151184082031, 0.04618644714355469, -0.19122695922851562, 0.04555511474609375, -0.05599021911621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000018.npy"} +{"epoch": 0.02643171806167401, "step": 19, "batch_size": 64, "mean": 0.11293420195579529, "std": 0.39604896306991577, "min": -1.2044830322265625, "p10": -0.3118961334228515, "median": 0.12566566467285156, "p90": 0.6064468383789064, "max": 0.9469451904296875, "pos_frac": 0.671875, "sample": [0.2720794677734375, 0.2032794952392578, -0.06836700439453125, 0.0017547607421875, -0.26239776611328125, -0.084014892578125, 0.1305999755859375, -0.4168205261230469, 0.14213180541992188, 0.20325469970703125, 0.17994308471679688, -0.2732276916503906, 0.20635604858398438, 0.429931640625, 0.3072357177734375, -1.2044830322265625, 0.34710693359375, 0.30889892578125, -0.2469024658203125, -0.095611572265625, 0.517852783203125, -0.0109405517578125, 0.2151641845703125, 0.75067138671875, 0.18287086486816406, 0.1921367645263672, -0.5120468139648438, 0.19799041748046875, 0.1470794677734375, -0.01998138427734375, 0.39429473876953125, 0.009111404418945312, 0.010652542114257812, 0.12073135375976562, 0.9238510131835938, -0.32846832275390625, -0.4732513427734375, 0.0370025634765625, 0.1173248291015625, 0.493804931640625, 0.28314208984375, 0.70184326171875, -0.0031890869140625, -0.1812744140625, 0.03410530090332031, 0.9469451904296875, 0.3265571594238281, 0.630126953125, 0.05078125, 0.4975128173828125, 0.912811279296875, -0.08620834350585938, 0.13402557373046875, -0.09822463989257812, 0.81195068359375, 0.07309150695800781, -0.2617340087890625, -0.3322410583496094, 0.0462799072265625, -1.026336669921875, 0.5511932373046875, -0.020610809326171875, 0.15721893310546875, 0.03342437744140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000019.npy"} +{"epoch": 0.027900146842878122, "step": 20, "batch_size": 64, "mean": 0.0998302698135376, "std": 0.3317307233810425, "min": -1.0104293823242188, "p10": -0.28984909057617186, "median": 0.0730733871459961, "p90": 0.4701065063476563, "max": 0.9316864013671875, "pos_frac": 0.671875, "sample": [0.057582855224609375, 0.23141098022460938, -0.006488800048828125, 0.18634033203125, 0.01474761962890625, 0.3693084716796875, 0.29482269287109375, -0.13469696044921875, 0.11853790283203125, 0.43012237548828125, -0.5548477172851562, 0.1746978759765625, 0.22563934326171875, 0.2854461669921875, 0.21934127807617188, 0.01229095458984375, 0.07136154174804688, 0.2881050109863281, -0.3321876525878906, -0.106292724609375, 0.4467315673828125, -0.08416748046875, 0.066162109375, -0.0679779052734375, -0.5048370361328125, 0.030992507934570312, 0.4786529541015625, 0.2269439697265625, -0.122711181640625, 0.07478523254394531, -0.10586357116699219, -0.2783775329589844, -0.3799705505371094, 0.5604457855224609, 0.35283660888671875, -0.5430145263671875, 0.5656585693359375, -1.0104293823242188, -0.2947654724121094, -0.1632843017578125, 0.24573516845703125, 0.0325775146484375, 0.03688812255859375, -0.027629852294921875, -0.030521392822265625, 0.14986419677734375, 0.5467681884765625, 0.9080352783203125, 0.6250076293945312, -0.12308502197265625, 0.0514373779296875, 0.37094879150390625, 0.30149078369140625, 0.03759765625, 0.9316864013671875, -0.2056293487548828, 0.450164794921875, 0.2695941925048828, -0.07779502868652344, 0.07985305786132812, 0.21150588989257812, 0.10394287109375, 0.3458442687988281, 0.06180381774902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000020.npy"} +{"epoch": 0.02936857562408223, "step": 21, "batch_size": 64, "mean": 0.10462629795074463, "std": 0.37146177887916565, "min": -0.9378890991210938, "p10": -0.2201000213623047, "median": 0.06570816040039062, "p90": 0.666172027587891, "max": 1.0224609375, "pos_frac": 0.59375, "sample": [0.546722412109375, 0.1352672576904297, 0.58477783203125, 0.84478759765625, 0.7371978759765625, -0.08111000061035156, -0.03908538818359375, 0.053863525390625, 0.697723388671875, 0.1389007568359375, 0.257904052734375, 0.3295440673828125, 0.25969696044921875, 0.20929336547851562, 0.3227500915527344, 0.5925521850585938, 0.1277618408203125, -0.10798263549804688, 0.023080825805664062, 0.12097930908203125, -0.231658935546875, 0.277435302734375, 0.218597412109375, -0.08354568481445312, -0.10222434997558594, 1.0224609375, -0.4655914306640625, -0.12164878845214844, -0.13383865356445312, 0.0146026611328125, -0.9378890991210938, 0.19686126708984375, -0.15532302856445312, 0.22036361694335938, 0.18634796142578125, -0.17905807495117188, -0.00746917724609375, 0.5629405975341797, 0.7080230712890625, -0.1606597900390625, 0.04274749755859375, 0.07755279541015625, -0.2193603515625, 0.023344039916992188, 0.10590362548828125, -0.22041702270507812, -0.19925308227539062, -0.06640625, 0.04005241394042969, -0.8892745971679688, -0.0294647216796875, 0.3585662841796875, 0.8631935119628906, 0.1910858154296875, -0.06293487548828125, -0.2796440124511719, -0.0287628173828125, -0.1072998046875, 0.7784347534179688, 0.10295295715332031, 0.09047698974609375, -0.5554428100585938, 0.15814208984375, -0.06146049499511719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000021.npy"} +{"epoch": 0.030837004405286344, "step": 22, "batch_size": 64, "mean": 0.22439703345298767, "std": 0.37970617413520813, "min": -0.6068878173828125, "p10": -0.2606744766235351, "median": 0.22269058227539062, "p90": 0.7412811279296876, "max": 1.0443038940429688, "pos_frac": 0.734375, "sample": [0.23342132568359375, 0.40447998046875, 0.3388786315917969, 0.1977100372314453, 0.219329833984375, -0.394561767578125, 0.17913818359375, 0.17531776428222656, 0.0039005279541015625, 0.491668701171875, 1.0342864990234375, 0.36395263671875, -0.3720703125, 0.7935714721679688, -0.14838600158691406, -0.13037872314453125, 0.33269500732421875, 0.2882404327392578, -0.160003662109375, -0.13531494140625, 0.2823944091796875, 0.803497314453125, 0.133453369140625, -0.0624847412109375, 0.08047866821289062, 0.19295310974121094, 1.0443038940429688, -0.5489463806152344, 0.1584014892578125, 0.1956024169921875, 0.38654136657714844, 0.339141845703125, 0.385223388671875, -0.08498382568359375, 0.4972057342529297, 0.22605133056640625, 0.15508651733398438, -0.35044097900390625, -0.12910842895507812, 0.721649169921875, 0.28094482421875, 0.45227813720703125, 0.2640247344970703, 0.026012420654296875, -0.1954059600830078, 0.700531005859375, 0.20636749267578125, -0.6068878173828125, 0.7005767822265625, 0.23359298706054688, 0.020486831665039062, 0.953399658203125, -0.2886466979980469, -0.05322074890136719, -0.024169921875, 0.645538330078125, 0.4238739013671875, 0.1481170654296875, 0.5058975219726562, 0.308349609375, 0.3148193359375, 0.9700851440429688, 0.74969482421875, -0.5167446136474609], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000022.npy"} +{"epoch": 0.032305433186490456, "step": 23, "batch_size": 64, "mean": 0.1467902660369873, "std": 0.46301591396331787, "min": -1.548126220703125, "p10": -0.3024099349975585, "median": 0.12230587005615234, "p90": 0.6603431701660156, "max": 1.5241928100585938, "pos_frac": 0.734375, "sample": [0.003879547119140625, 0.03089141845703125, 0.5865936279296875, 0.06989669799804688, 0.3020133972167969, 0.5431556701660156, 0.13614654541015625, 0.0372772216796875, -0.4854774475097656, -0.243743896484375, 0.480255126953125, 1.5241928100585938, 0.947113037109375, 0.00716400146484375, 0.36661529541015625, 0.1964111328125, -0.2470989227294922, 0.3145484924316406, 0.2441558837890625, -0.17110824584960938, 0.6646347045898438, -0.347686767578125, 0.678466796875, -0.08047866821289062, -0.6335887908935547, 0.234649658203125, -0.7099685668945312, -0.9809074401855469, 0.5538406372070312, -0.15313720703125, 0.0182037353515625, 0.525787353515625, 0.37078094482421875, -0.09206199645996094, 0.10846519470214844, 0.05461883544921875, 0.2638702392578125, 0.26892852783203125, 0.0679168701171875, 0.500396728515625, 0.08583450317382812, 0.05757331848144531, 0.045093536376953125, 0.5511245727539062, 0.14159774780273438, 0.9228057861328125, -0.12976455688476562, -1.548126220703125, 0.1843719482421875, -0.136627197265625, 0.4026813507080078, 0.044315338134765625, 0.0230560302734375, -0.3261146545410156, 0.1442127227783203, 0.25913238525390625, 0.25376129150390625, 0.44989013671875, -0.1340503692626953, 0.65032958984375, 0.0323486328125, -0.1537933349609375, 0.776824951171875, 0.8424873352050781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000023.npy"} +{"epoch": 0.033773861967694566, "step": 24, "batch_size": 64, "mean": 0.22320020198822021, "std": 0.402773380279541, "min": -0.6936569213867188, "p10": -0.2961654663085937, "median": 0.19123077392578125, "p90": 0.7158939361572266, "max": 1.315521240234375, "pos_frac": 0.734375, "sample": [0.4739990234375, 0.5150337219238281, -0.07292747497558594, -0.041263580322265625, 0.127471923828125, 0.008403778076171875, 0.899505615234375, -0.3472900390625, 0.467681884765625, 0.2538719177246094, 0.200042724609375, -0.4091911315917969, 0.09105873107910156, 0.7195549011230469, 0.09796524047851562, -0.0875091552734375, 0.030376434326171875, 0.07292556762695312, 0.7073516845703125, -0.264068603515625, 0.4104766845703125, -0.6936569213867188, -0.23108863830566406, 0.12831878662109375, 0.2064971923828125, 0.698883056640625, 0.15404319763183594, 0.21343994140625, 0.5084075927734375, 0.5788421630859375, 1.315521240234375, 0.854248046875, -0.08236312866210938, -0.4307708740234375, 0.07297515869140625, -0.55816650390625, -0.16912841796875, 0.127197265625, 0.06626129150390625, -0.0202484130859375, 0.9213104248046875, 0.2735252380371094, 0.1824188232421875, 0.3262481689453125, 0.34758758544921875, 1.17071533203125, 0.6554412841796875, 0.01650238037109375, 0.90380859375, 0.21118545532226562, 0.6292495727539062, -0.053363800048828125, 0.30200958251953125, 0.25659751892089844, 0.2701568603515625, -0.060272216796875, 0.4400615692138672, 0.36255645751953125, 0.43004608154296875, -0.36792755126953125, 0.1646728515625, -0.3099212646484375, 0.0395660400390625, 0.5799560546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000024.npy"} +{"epoch": 0.03524229074889868, "step": 25, "batch_size": 64, "mean": 0.22116953134536743, "std": 0.4490810036659241, "min": -0.61004638671875, "p10": -0.2357868194580078, "median": 0.1379718780517578, "p90": 0.7662322998046877, "max": 2.06427001953125, "pos_frac": 0.671875, "sample": [0.21707534790039062, 0.3070793151855469, 0.805419921875, -0.23722076416015625, 0.14038467407226562, -0.15633010864257812, -0.20439910888671875, 2.06427001953125, -0.212615966796875, 0.06940078735351562, -0.0272674560546875, 0.29193115234375, -0.004238128662109375, 0.2109832763671875, 0.9476852416992188, -0.5475368499755859, -0.24004554748535156, 0.6098098754882812, 0.10115814208984375, 1.0054473876953125, -0.2262115478515625, 0.1264495849609375, 0.1828899383544922, 0.09453582763671875, 0.107177734375, 0.2188720703125, 0.5089874267578125, 0.6143646240234375, -0.2612152099609375, 0.009927749633789062, 0.25208282470703125, 0.6233291625976562, 0.053714752197265625, 0.4053497314453125, -0.22042083740234375, -0.23235321044921875, 0.05461692810058594, 0.8665771484375, -0.1255340576171875, 0.34125518798828125, -0.33049583435058594, 0.6986465454101562, 0.28397369384765625, 0.2572212219238281, -0.23244094848632812, -0.18737030029296875, 0.06612396240234375, 0.373046875, 0.5647125244140625, 0.4203071594238281, 1.0803680419921875, 0.7772369384765625, 0.13555908203125, -0.61004638671875, 0.623748779296875, 0.11607933044433594, -0.036041259765625, 0.7367095947265625, 0.7405548095703125, -0.0752716064453125, 0.37126731872558594, 0.3094329833984375, -0.40122222900390625, -0.0626373291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000025.npy"} +{"epoch": 0.03671071953010279, "step": 26, "batch_size": 64, "mean": 0.40158015489578247, "std": 0.5813688635826111, "min": -1.1492462158203125, "p10": -0.23418464660644528, "median": 0.2835073471069336, "p90": 1.3271484375000004, "max": 1.807586669921875, "pos_frac": 0.734375, "sample": [0.26837158203125, 0.07583808898925781, 0.18416213989257812, 0.5594310760498047, 1.108123779296875, 0.26165008544921875, -0.1209259033203125, 0.8783721923828125, -0.294891357421875, -0.2542304992675781, 1.2155303955078125, 0.07894134521484375, 1.807586669921875, 0.38097381591796875, -0.023162841796875, 1.72015380859375, 0.9464645385742188, 0.17942047119140625, 0.581939697265625, -0.24375534057617188, 0.1301746368408203, -0.1689453125, 0.4237213134765625, 0.7709121704101562, -0.019287109375, 1.3927154541015625, 0.1481781005859375, -1.1492462158203125, 0.2986431121826172, 0.7373046875, -0.10262298583984375, -0.00041961669921875, 0.3846263885498047, 0.3659553527832031, -0.09452247619628906, 0.3298492431640625, 1.562591552734375, 0.26409912109375, 0.4042930603027344, 1.3749847412109375, 0.42971038818359375, 0.2078094482421875, 1.734130859375, 0.5643768310546875, 0.35453033447265625, 1.4014434814453125, -0.3791465759277344, 0.1725006103515625, 0.1091766357421875, -0.0050983428955078125, -0.020763397216796875, 0.11617279052734375, -0.2761974334716797, 0.5325393676757812, -0.21185302734375, 0.46038818359375, 0.9353866577148438, 1.000152587890625, 1.0299835205078125, 0.06497573852539062, 0.16949081420898438, -0.3465385437011719, 0.5629196166992188, 0.7320404052734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000026.npy"} +{"epoch": 0.0381791483113069, "step": 27, "batch_size": 64, "mean": 0.5132263898849487, "std": 0.5736640691757202, "min": -0.8043670654296875, "p10": -0.08888587951660153, "median": 0.40970325469970703, "p90": 1.170953369140625, "max": 2.3391799926757812, "pos_frac": 0.84375, "sample": [0.10695648193359375, 0.9900588989257812, 0.7182083129882812, 0.6809616088867188, 0.6716270446777344, 1.1599884033203125, 0.16571044921875, -0.0552520751953125, 1.4199905395507812, -0.2798442840576172, 0.83612060546875, 0.32613372802734375, 0.325897216796875, 0.2969036102294922, 0.402740478515625, 0.243865966796875, 2.3391799926757812, 0.38199615478515625, 0.4209403991699219, 0.17266845703125, -0.18584442138671875, -0.0567169189453125, 0.35794830322265625, -0.311614990234375, 0.35890960693359375, 0.41666603088378906, 0.03230857849121094, 0.03765869140625, 0.8281822204589844, 2.1421127319335938, 0.3919830322265625, 0.015802383422851562, -0.10267257690429688, 2.071136474609375, 0.558441162109375, 1.2005538940429688, 1.0322265625, 0.587188720703125, 0.45034027099609375, 0.3512077331542969, -0.4025726318359375, 0.8390274047851562, 0.200225830078125, 0.6928939819335938, 0.20025634765625, 0.65185546875, -0.8043670654296875, 0.4685039520263672, 1.1647262573242188, 0.9558868408203125, 1.327880859375, 0.2576484680175781, 0.2883453369140625, 0.690093994140625, 0.67999267578125, 0.49684906005859375, 1.1736221313476562, -0.13791465759277344, 0.5488929748535156, 0.246917724609375, 0.47397613525390625, 0.9851303100585938, -0.042266845703125, 0.3902130126953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000027.npy"} +{"epoch": 0.039647577092511016, "step": 28, "batch_size": 64, "mean": 0.32913774251937866, "std": 0.6369958519935608, "min": -2.04827880859375, "p10": -0.310635757446289, "median": 0.2778129577636719, "p90": 1.1896972656250002, "max": 1.8759918212890625, "pos_frac": 0.671875, "sample": [0.5061454772949219, 0.2642974853515625, -0.012115478515625, 0.5277786254882812, 1.0087451934814453, -0.1164703369140625, -0.2759246826171875, 0.16132736206054688, -0.3745765686035156, 0.2512054443359375, -0.13594818115234375, 0.8706512451171875, 1.2159423828125, -0.10254669189453125, 0.6830978393554688, -0.3255119323730469, 0.2859649658203125, 0.7792816162109375, -0.0644073486328125, 0.26966094970703125, 0.3609733581542969, -0.46894073486328125, 0.03980255126953125, 1.2099761962890625, -0.01279449462890625, 0.7466354370117188, 0.7201080322265625, 0.87298583984375, 1.2788772583007812, 0.5294952392578125, 0.39263153076171875, 0.524566650390625, -0.10486602783203125, 0.119110107421875, 0.7442359924316406, 0.3870735168457031, 0.7718906402587891, 1.2410125732421875, 0.11983108520507812, -0.11624908447265625, 0.12928390502929688, 0.45693397521972656, -0.020122528076171875, 0.873504638671875, 1.5539703369140625, 1.250091552734375, 0.7324600219726562, 0.29053688049316406, 0.9674148559570312, 0.007904052734375, 1.0177268981933594, -0.921630859375, -0.4544486999511719, -0.00827789306640625, 0.032825469970703125, 0.41602325439453125, 0.12654876708984375, 1.1423797607421875, 1.8759918212890625, -0.1390838623046875, -0.10292816162109375, -2.04827880859375, -0.7722625732421875, -0.1147003173828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000028.npy"} +{"epoch": 0.041116005873715125, "step": 29, "batch_size": 64, "mean": 0.5947141647338867, "std": 0.5538198947906494, "min": -0.3796348571777344, "p10": -0.06402015686035153, "median": 0.5235109329223633, "p90": 1.4383010864257815, "max": 1.9280853271484375, "pos_frac": 0.84375, "sample": [1.1150321960449219, 0.0172576904296875, 0.4709930419921875, 0.5981292724609375, -0.0771331787109375, 0.6295299530029297, 0.186767578125, 1.1628799438476562, 0.5758399963378906, 0.224273681640625, 0.47406768798828125, 0.06081581115722656, 0.9008026123046875, 0.6577301025390625, -0.10687255859375, 1.10638427734375, 1.5722808837890625, -0.3796348571777344, -0.04295158386230469, 1.5940017700195312, 0.0515899658203125, 0.661224365234375, 0.4817771911621094, 0.5319347381591797, 1.1547088623046875, 0.7646636962890625, -0.03336334228515625, 0.2674827575683594, 1.211822509765625, 0.406768798828125, -0.0236663818359375, 1.1988296508789062, 1.9280853271484375, 0.01522064208984375, 0.7146148681640625, 0.14791488647460938, 0.56634521484375, -0.228668212890625, 1.3050537109375, 1.45489501953125, 0.769622802734375, 1.3995819091796875, 0.5002593994140625, 0.6626663208007812, 0.40545082092285156, -0.07304954528808594, 1.1971588134765625, -0.085845947265625, 0.6910057067871094, 0.016588211059570312, 0.3462028503417969, 0.23114013671875, 1.0866622924804688, 0.2610931396484375, 0.7463111877441406, 1.669281005859375, -0.1955718994140625, 0.43012237548828125, 0.4223365783691406, 0.67315673828125, 1.58050537109375, 1.493743896484375, 0.00077056884765625, 0.5150871276855469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000029.npy"} +{"epoch": 0.042584434654919234, "step": 30, "batch_size": 64, "mean": 0.6149011850357056, "std": 0.5518370866775513, "min": -0.50909423828125, "p10": -0.017597007751464833, "median": 0.6128025054931641, "p90": 1.268630981445313, "max": 2.1337890625, "pos_frac": 0.875, "sample": [0.7926712036132812, 0.5622406005859375, -0.13998031616210938, 1.1211929321289062, 1.038787841796875, -0.47267913818359375, 0.08787155151367188, 0.568572998046875, -0.0049571990966796875, 0.690765380859375, 0.589508056640625, 0.8612594604492188, 0.3692169189453125, 1.0328636169433594, 0.084075927734375, 1.5294189453125, 0.5402374267578125, 0.56732177734375, -0.023014068603515625, 1.619781494140625, 0.814056396484375, 0.9200286865234375, 2.1337890625, 1.0419464111328125, 1.0943832397460938, 0.3710765838623047, 0.2862548828125, 0.3070220947265625, 0.05962562561035156, 0.10809326171875, 0.53076171875, -0.0350494384765625, 0.7018814086914062, -0.1667957305908203, 0.07328987121582031, 1.0692214965820312, 1.158935546875, 0.8731689453125, 1.4751739501953125, 0.7425613403320312, 0.08707237243652344, 0.9431610107421875, 0.6669960021972656, 0.5367813110351562, -0.50909423828125, 1.315643310546875, 0.9628753662109375, 0.13909149169921875, 1.1071281433105469, 0.7194671630859375, 1.3461685180664062, 0.9292831420898438, 0.45667457580566406, -0.4483623504638672, 0.25360107421875, 0.8258514404296875, 0.6975860595703125, 0.13879776000976562, 0.7302932739257812, 0.1351490020751953, 0.10467529296875, 2.0146484375, 0.6161270141601562, 0.6094779968261719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000030.npy"} +{"epoch": 0.04405286343612335, "step": 31, "batch_size": 64, "mean": 0.4925113320350647, "std": 0.7040557861328125, "min": -1.121368408203125, "p10": -0.24380531311035153, "median": 0.352630615234375, "p90": 1.419279479980469, "max": 3.0770263671875, "pos_frac": 0.78125, "sample": [-0.08755683898925781, 1.6299400329589844, 0.14349365234375, 0.19375991821289062, 1.70068359375, 0.38356781005859375, -0.3941383361816406, -0.206146240234375, 0.19841766357421875, -0.1540985107421875, 3.0770263671875, 0.1778564453125, 0.259521484375, 0.2762584686279297, 0.024951934814453125, 0.12266349792480469, 0.8970947265625, 0.3632831573486328, 1.5557098388671875, 1.00860595703125, 0.09022140502929688, -0.2599449157714844, 0.22733306884765625, -0.5262832641601562, 0.35422515869140625, 0.3695793151855469, 1.1988258361816406, 0.3629798889160156, 0.5652198791503906, 0.3303031921386719, 1.06414794921875, 0.6981964111328125, 1.455841064453125, -0.325408935546875, -0.017589569091796875, 0.2162628173828125, 0.17481231689453125, 0.8938064575195312, -0.0204010009765625, 1.06695556640625, 1.0282135009765625, 1.2874298095703125, -0.3101348876953125, 0.414825439453125, 0.461395263671875, -1.121368408203125, -0.2919464111328125, 0.7305221557617188, 0.015995025634765625, -0.12321090698242188, 0.35103607177734375, 1.2177581787109375, 1.5823211669921875, -0.115692138671875, 0.0234222412109375, 0.32241058349609375, 0.5273513793945312, 0.202972412109375, 1.2421302795410156, 0.4684181213378906, 1.3339691162109375, 2.2417526245117188, 0.578094482421875, 0.3630828857421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000031.npy"} +{"epoch": 0.04552129221732746, "step": 32, "batch_size": 64, "mean": 0.6792212724685669, "std": 0.877557635307312, "min": -1.164154052734375, "p10": -0.17584266662597653, "median": 0.45807743072509766, "p90": 1.9187961578369146, "max": 3.9172821044921875, "pos_frac": 0.84375, "sample": [0.2296905517578125, 0.307342529296875, 0.47939300537109375, 0.9735488891601562, 0.5958671569824219, 0.2294464111328125, 0.108978271484375, 0.8360137939453125, 0.5835247039794922, 1.2364273071289062, 1.1043701171875, 0.3771858215332031, -0.26372528076171875, 0.4613361358642578, -1.164154052734375, 0.7550544738769531, -0.8285484313964844, 0.3837318420410156, -0.25974273681640625, 2.076171875, 0.6710433959960938, 1.8055343627929688, 1.966766357421875, 0.3139495849609375, 1.114654541015625, 0.4114227294921875, 0.085662841796875, 0.03652191162109375, -0.122802734375, 1.152008056640625, 0.3178558349609375, 0.2995624542236328, 0.1996631622314453, 2.75750732421875, 1.8068656921386719, 0.17986679077148438, 0.4132537841796875, 0.32537841796875, -0.19601058959960938, 0.5958023071289062, 1.76318359375, 1.0230998992919922, -0.1287841796875, 0.4548187255859375, 3.9172821044921875, 0.8180770874023438, 0.48433685302734375, 2.4698867797851562, 2.1191864013671875, -0.40593719482421875, -0.015886306762695312, 0.116363525390625, 2.43682861328125, 0.8096847534179688, -0.3775634765625, 0.2006664276123047, 1.109039306640625, 1.446502685546875, 0.24913787841796875, 0.749908447265625, 0.25478363037109375, 0.4938201904296875, 1.017913818359375, 0.10739517211914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000032.npy"} +{"epoch": 0.04698972099853157, "step": 33, "batch_size": 64, "mean": 0.6040312051773071, "std": 0.631873607635498, "min": -0.6550674438476562, "p10": -0.18494796752929685, "median": 0.5633697509765625, "p90": 1.482192611694336, "max": 2.240386962890625, "pos_frac": 0.84375, "sample": [-0.16852569580078125, 0.3718223571777344, 0.5716533660888672, 0.5697784423828125, 2.240386962890625, 0.0491180419921875, -0.191986083984375, 0.72576904296875, 0.3926048278808594, 0.4575824737548828, 0.9939193725585938, 0.9843215942382812, 1.5687255859375, 1.1679534912109375, 0.58148193359375, 0.39234161376953125, 0.6475677490234375, -0.3611602783203125, 0.7010040283203125, -0.353546142578125, -0.12920379638671875, 1.479583740234375, 1.4833106994628906, 1.7376022338867188, 0.3271217346191406, 0.21031951904296875, 1.4783706665039062, 0.2850189208984375, 0.0069122314453125, 2.0924072265625, 0.0276031494140625, 0.27457427978515625, 1.7858161926269531, -0.1527862548828125, 0.01816558837890625, 0.7325439453125, 0.5294342041015625, 0.3733711242675781, 0.7808761596679688, 0.4319267272949219, -0.4797172546386719, 1.5446701049804688, 0.10248565673828125, 0.4569664001464844, 0.5369110107421875, 1.0376129150390625, 0.1985015869140625, 1.423095703125, 0.8095035552978516, -0.6550674438476562, 0.5061492919921875, -0.21682167053222656, 1.3922309875488281, 0.6453018188476562, 0.67071533203125, 0.7316551208496094, 1.31219482421875, -0.32996368408203125, 0.6071624755859375, 0.5569610595703125, 0.5889625549316406, 0.4457550048828125, 0.8989524841308594, 0.7599983215332031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000033.npy"} +{"epoch": 0.048458149779735685, "step": 34, "batch_size": 64, "mean": 0.8045632839202881, "std": 0.9114744663238525, "min": -0.44609832763671875, "p10": -0.11390285491943357, "median": 0.7149772644042969, "p90": 2.0186904907226566, "max": 3.5767822265625, "pos_frac": 0.828125, "sample": [2.0665130615234375, -0.4026908874511719, 1.166748046875, -0.19202804565429688, 2.61199951171875, 0.5196056365966797, 1.112274169921875, 1.2245941162109375, 0.094573974609375, 0.162017822265625, 0.02552032470703125, -0.07274436950683594, 0.841094970703125, 0.5996856689453125, 1.251129150390625, 3.1798553466796875, 0.16124916076660156, -0.1221466064453125, 1.8323822021484375, 0.0601806640625, 1.1736984252929688, -0.046726226806640625, 1.0916728973388672, 1.8738822937011719, 1.26092529296875, 1.4129257202148438, 1.0841598510742188, -0.44609832763671875, -0.04595947265625, 1.0825462341308594, 0.00736236572265625, 0.942169189453125, 1.460601806640625, 0.7122039794921875, 2.31024169921875, 0.7559242248535156, 0.7177505493164062, 2.898590087890625, 0.1437225341796875, -0.4295234680175781, 0.12296295166015625, 0.3782539367675781, 1.1457996368408203, 1.4564666748046875, 0.6991424560546875, 1.2125816345214844, 0.2834300994873047, -0.19642257690429688, 1.2548370361328125, 0.0059967041015625, 0.03930091857910156, -0.441070556640625, 0.8147506713867188, 0.34149169921875, 0.88934326171875, -0.09466743469238281, 1.9071044921875, 0.16660690307617188, 0.0777435302734375, 0.809967041015625, 2.3755340576171875, 3.5767822265625, 0.3882312774658203, 0.19800186157226562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000034.npy"} +{"epoch": 0.049926578560939794, "step": 35, "batch_size": 64, "mean": 1.2318875789642334, "std": 1.166860580444336, "min": -0.40288543701171875, "p10": 0.17706909179687502, "median": 0.8730354309082031, "p90": 2.526919555664063, "max": 6.984893798828125, "pos_frac": 0.9375, "sample": [1.393341064453125, 0.8610191345214844, 3.527435302734375, 1.8893394470214844, 0.3377685546875, 0.8242721557617188, 1.325469970703125, -0.40288543701171875, 0.0856170654296875, 1.05963134765625, 1.8327484130859375, 0.16969680786132812, 0.9691238403320312, 0.7475166320800781, 0.44173431396484375, 0.766082763671875, 2.1292572021484375, 3.7020263671875, 0.45067596435546875, 0.24059104919433594, 0.6979618072509766, 1.8591995239257812, 2.7803497314453125, 0.23663330078125, 0.28598785400390625, 0.8842697143554688, 0.2741661071777344, 1.0394821166992188, 0.39359283447265625, 0.8618011474609375, 6.984893798828125, 1.5983200073242188, 0.4998779296875, 1.49798583984375, 0.6525592803955078, 3.3240966796875, 0.19427108764648438, 1.53900146484375, 2.2891693115234375, 2.370697021484375, 0.753204345703125, 2.5938720703125, 1.39678955078125, 1.4980545043945312, 0.8133659362792969, -0.024892807006835938, 1.6983184814453125, 2.2559967041015625, 1.29437255859375, 0.5192680358886719, 0.7052383422851562, 0.10711669921875, 0.7245655059814453, 1.83209228515625, 1.1169509887695312, 1.1543865203857422, 0.7160491943359375, -0.23319625854492188, 1.8105545043945312, -0.13494110107421875, 0.7295303344726562, 2.6264190673828125, 1.9766845703125, 0.29622650146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000035.npy"} +{"epoch": 0.0513950073421439, "step": 36, "batch_size": 64, "mean": 1.1320552825927734, "std": 1.2440820932388306, "min": -2.3649826049804688, "p10": -0.1616352081298828, "median": 0.9098854064941406, "p90": 2.8355712890625004, "max": 4.244781494140625, "pos_frac": 0.84375, "sample": [1.012542724609375, -0.10187339782714844, 1.1936893463134766, 1.129425048828125, 1.2757949829101562, 0.5376453399658203, 0.9230422973632812, -0.7307891845703125, 0.05372047424316406, 3.77166748046875, 2.0768508911132812, 0.2398395538330078, -0.1875762939453125, 0.41735076904296875, 0.29390525817871094, 1.3709945678710938, -0.14603805541992188, 1.72283935546875, 1.380523681640625, 0.610076904296875, -0.0508575439453125, -0.38620758056640625, -0.1683197021484375, 0.2493610382080078, 2.86138916015625, 3.863170623779297, 1.15277099609375, -0.217559814453125, 3.5848388671875, 0.7128524780273438, 0.7873477935791016, 1.3217544555664062, 0.2239990234375, 2.0942611694335938, 0.010753631591796875, 0.07952499389648438, 3.194366455078125, 0.7137603759765625, 2.143096923828125, 0.41086578369140625, 2.456268310546875, 1.1379241943359375, 2.5555267333984375, 0.8164710998535156, 2.77532958984375, 1.3341064453125, 0.021820068359375, 4.244781494140625, 1.0206966400146484, -2.3649826049804688, 1.4710044860839844, 0.7163314819335938, 2.7306671142578125, 0.6087150573730469, 2.5670852661132812, 1.2133255004882812, -0.2903900146484375, 0.5756072998046875, 2.8803558349609375, 0.896728515625, 1.6515960693359375, 2.694366455078125, 0.7706222534179688, 0.5427818298339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000036.npy"} +{"epoch": 0.05286343612334802, "step": 37, "batch_size": 64, "mean": 0.9501620531082153, "std": 1.3475593328475952, "min": -2.3604736328125, "p10": -0.38649749755859375, "median": 0.5825166702270508, "p90": 2.631443023681641, "max": 6.00604248046875, "pos_frac": 0.75, "sample": [1.7803421020507812, 0.1666126251220703, 1.2348556518554688, 1.3459854125976562, 0.06350326538085938, -0.24201202392578125, -0.47327423095703125, 0.39640045166015625, 1.459747314453125, 0.27457237243652344, 0.571563720703125, 3.428863525390625, 0.5081672668457031, -0.721710205078125, -0.39306640625, 0.31517791748046875, 0.7195644378662109, 0.10567665100097656, 1.9476757049560547, 6.00604248046875, 0.9227371215820312, 4.06781005859375, 1.177978515625, 1.0313911437988281, 0.9603652954101562, 1.3326416015625, 0.5750045776367188, 0.3632850646972656, 1.3086090087890625, 1.494873046875, 0.5900287628173828, -0.3711700439453125, 0.4417762756347656, -1.0695571899414062, 0.75286865234375, -0.08498764038085938, 0.377593994140625, 2.052978515625, 1.5491256713867188, 3.0401611328125, -0.31938743591308594, 2.53875732421875, 0.2938385009765625, 1.76629638671875, -0.0414276123046875, 0.5025253295898438, -0.10210037231445312, -0.5984039306640625, 0.63824462890625, 1.8736457824707031, 2.4405288696289062, -0.020355224609375, 2.6711654663085938, 2.3302993774414062, 0.28510284423828125, 2.7378005981445312, 1.8312873840332031, -2.3604736328125, -0.036792755126953125, -0.5692138671875, 2.3036766052246094, 3.2936553955078125, -0.0384063720703125, 0.38190460205078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000037.npy"} +{"epoch": 0.05433186490455213, "step": 38, "batch_size": 64, "mean": 1.3563368320465088, "std": 1.7058887481689453, "min": -1.2567977905273438, "p10": -0.20231876373291013, "median": 0.9430198669433594, "p90": 3.180750274658205, "max": 7.969146728515625, "pos_frac": 0.796875, "sample": [0.35718536376953125, 7.969146728515625, 4.14862060546875, 1.8074226379394531, -0.16968917846679688, -0.004253387451171875, 0.9245529174804688, -0.4014739990234375, 1.4210128784179688, 2.30621337890625, 1.5365982055664062, 0.96148681640625, 1.0836944580078125, 2.6604843139648438, 1.4103622436523438, -0.21630287170410156, -0.0821990966796875, 0.0489959716796875, -0.11724853515625, 2.4453201293945312, 0.6276130676269531, 1.6512908935546875, 2.7146759033203125, 0.24423789978027344, 2.715057373046875, 0.26593589782714844, 2.42608642578125, 2.6100616455078125, 1.6014060974121094, -0.2786216735839844, 0.5016136169433594, 3.3803329467773438, -1.2567977905273438, 1.292327880859375, 0.6267185211181641, 0.5487060546875, 0.8697509765625, 6.1746826171875, 0.7405548095703125, -0.03708648681640625, 3.8610153198242188, 5.322479248046875, 1.6212081909179688, 0.2769317626953125, 0.3282585144042969, -0.6648712158203125, 0.39415740966796875, 0.2005462646484375, 0.8535919189453125, 1.0558929443359375, -0.8385772705078125, 0.2808685302734375, -0.08730316162109375, 1.6823310852050781, 1.0840473175048828, 1.90216064453125, 1.790740966796875, 5.464012145996094, 1.943603515625, 0.38134002685546875, 2.223339080810547, -0.363433837890625, 0.525054931640625, 2.059680938720703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000038.npy"} +{"epoch": 0.055800293685756244, "step": 39, "batch_size": 64, "mean": 1.661270260810852, "std": 1.4308812618255615, "min": -2.043365478515625, "p10": 0.2959400177001954, "median": 1.3179140090942383, "p90": 3.6941799163818367, "max": 5.5604400634765625, "pos_frac": 0.921875, "sample": [2.28466796875, 2.1719207763671875, 5.153289794921875, 2.0862045288085938, 1.5940933227539062, -0.582794189453125, 3.2378692626953125, 2.4223594665527344, 1.2639408111572266, 2.6162776947021484, 0.62249755859375, 3.3892135620117188, 1.3951873779296875, 1.033651351928711, 4.059379577636719, 1.0279464721679688, 0.3799285888671875, 0.812103271484375, 0.262908935546875, 1.258453369140625, -0.1173248291015625, 1.6656417846679688, 2.4299545288085938, 0.8299026489257812, 0.8090534210205078, 2.1975021362304688, -0.1520843505859375, 1.6841983795166016, 4.789306640625, 0.3730125427246094, 0.738311767578125, 0.5928821563720703, 2.9193954467773438, 2.1005096435546875, -2.043365478515625, 2.1294898986816406, 0.5549774169921875, 1.37188720703125, 3.7797164916992188, 5.5604400634765625, 1.1661853790283203, 1.8416290283203125, 4.111000061035156, 0.9280319213867188, 1.2473030090332031, 0.7549896240234375, 0.0735321044921875, 0.8406753540039062, 1.7746658325195312, 0.5743770599365234, 0.8733367919921875, 3.4945945739746094, 2.5603866577148438, 2.5959320068359375, 0.6188449859619141, 4.272987365722656, 3.0732269287109375, 0.9506931304931641, 1.7112789154052734, 0.5737876892089844, 0.7633323669433594, -0.012180328369140625, 0.5238437652587891, 2.306337356567383], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000039.npy"} +{"epoch": 0.05726872246696035, "step": 40, "batch_size": 64, "mean": 1.5557358264923096, "std": 1.7941007614135742, "min": -0.9403076171875, "p10": -0.16397781372070305, "median": 1.1564788818359375, "p90": 3.891345596313477, "max": 8.24774169921875, "pos_frac": 0.859375, "sample": [-0.601104736328125, 0.15721893310546875, 0.6253204345703125, 1.4404792785644531, 1.1649284362792969, 0.8715133666992188, 0.6986541748046875, 1.0828704833984375, 0.019121170043945312, 2.002256393432617, 7.3303985595703125, 3.8150634765625, 0.8016777038574219, 3.9561424255371094, 1.2317047119140625, -0.4428443908691406, 2.0833892822265625, 1.565582275390625, 0.09300041198730469, -0.9403076171875, 3.0159149169921875, -0.19671249389648438, 0.646942138671875, 0.1615447998046875, 2.3714752197265625, 1.551605224609375, 0.14413070678710938, 0.9357833862304688, 0.4636192321777344, -0.3660888671875, 3.7685317993164062, -0.07369613647460938, 3.367645263671875, -0.44769287109375, 0.6054286956787109, 4.268035888671875, 0.06793594360351562, 3.9240379333496094, 0.504547119140625, 1.0905494689941406, 4.752647399902344, 0.9206924438476562, 1.3084259033203125, 8.24774169921875, -0.4919548034667969, 1.5835800170898438, 0.4697418212890625, 2.0676116943359375, 2.6245155334472656, 2.2839813232421875, 1.148712158203125, 2.1335983276367188, 3.3432998657226562, 1.7840957641601562, 1.16424560546875, 1.4571456909179688, 5.0824737548828125, 0.01499176025390625, 1.4302597045898438, -0.08759689331054688, 0.03656768798828125, 2.2022171020507812, 2.5958938598632812, 0.7355995178222656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000040.npy"} +{"epoch": 0.05873715124816446, "step": 41, "batch_size": 64, "mean": 1.8329112529754639, "std": 2.006847620010376, "min": -2.9318389892578125, "p10": -0.09852333068847643, "median": 1.4682912826538086, "p90": 4.234089660644532, "max": 8.529579162597656, "pos_frac": 0.890625, "sample": [0.9262008666992188, 0.8810882568359375, 4.0255889892578125, 1.15167236328125, 1.1148300170898438, 2.2075424194335938, 3.7003173828125, 2.1318588256835938, -0.650909423828125, 3.5461807250976562, 5.9547119140625, 0.6876068115234375, 1.9691162109375, 8.529579162597656, 0.25676918029785156, 0.3228302001953125, 0.9218368530273438, 1.145111083984375, -0.150604248046875, -2.09442138671875, 1.600067138671875, 3.595561981201172, 4.460334777832031, 0.8341941833496094, 7.0517578125, 1.7307510375976562, 4.0932159423828125, 4.294464111328125, 0.8145027160644531, 0.4136314392089844, 2.2312164306640625, 0.545318603515625, 6.084808349609375, 0.7645359039306641, 1.3651256561279297, 3.22955322265625, 1.9459609985351562, 2.24432373046875, -0.27001953125, 2.3298797607421875, 3.7579498291015625, 1.8153133392333984, -2.9318389892578125, 3.2721405029296875, 0.340606689453125, 0.72271728515625, 2.3917160034179688, 2.0280723571777344, 2.170642852783203, 0.19733810424804688, 1.259246826171875, 0.8467788696289062, 1.6796112060546875, 1.5714569091796875, 0.6752872467041016, 1.0916194915771484, -0.20726776123046875, 3.707866668701172, 0.48320770263671875, 0.14780235290527344, 0.022998809814453125, -0.33703041076660156, 1.7411270141601562, 4.9228668212890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000041.npy"} +{"epoch": 0.06020558002936858, "step": 42, "batch_size": 64, "mean": 2.4430971145629883, "std": 2.269852638244629, "min": -1.5936965942382812, "p10": 0.18851280212402352, "median": 2.023622512817383, "p90": 5.565797424316407, "max": 10.49078369140625, "pos_frac": 0.921875, "sample": [1.6454544067382812, 0.28168487548828125, 2.2425384521484375, 9.124908447265625, 10.49078369140625, 2.329345703125, 0.15639495849609375, 5.585540771484375, 2.2704544067382812, 2.8065414428710938, 1.9865951538085938, 6.225669860839844, 2.7176151275634766, 6.267242431640625, 0.6770420074462891, 2.0773239135742188, 2.9420013427734375, -0.04108428955078125, 2.68206787109375, 2.9412384033203125, 1.8058700561523438, 1.4852008819580078, 3.4903182983398438, 0.42372894287109375, 1.2425079345703125, 2.6503143310546875, 0.0562896728515625, 2.232147216796875, 1.8798637390136719, -0.42188262939453125, 2.060649871826172, 5.3470611572265625, 0.4117889404296875, 6.060089111328125, 2.2157058715820312, 1.269317626953125, 0.6838493347167969, 1.3069534301757812, 1.8498687744140625, -0.62652587890625, 3.9682769775390625, -0.453521728515625, 1.4656982421875, 5.5197296142578125, 1.6385841369628906, 4.257347106933594, 0.7619361877441406, 0.2634544372558594, 1.2707023620605469, 1.5637741088867188, 0.6918487548828125, 5.96429443359375, -1.5936965942382812, 0.7961406707763672, 2.6426773071289062, 3.5308303833007812, 0.2882728576660156, 1.6505279541015625, 4.829032897949219, 2.5759124755859375, 3.854400634765625, 5.009391784667969, 0.6690692901611328, 4.361053466796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000042.npy"} +{"epoch": 0.06167400881057269, "step": 43, "batch_size": 64, "mean": 2.442809581756592, "std": 1.9496551752090454, "min": -1.4351043701171875, "p10": 0.26261138916015625, "median": 2.2545957565307617, "p90": 5.022203826904297, "max": 10.052215576171875, "pos_frac": 0.953125, "sample": [0.2620964050292969, 0.5307502746582031, -0.07110214233398438, 3.4310989379882812, 3.6869430541992188, 0.99639892578125, 3.8132858276367188, 2.6451568603515625, 2.6368579864501953, 6.619773864746094, 0.17012786865234375, 2.149648666381836, 0.9883899688720703, 1.6595954895019531, 3.3522415161132812, 1.1884346008300781, 3.429168701171875, 5.036918640136719, 1.852447509765625, 0.38503074645996094, 5.931884765625, 2.4200439453125, 5.0406036376953125, 1.2578964233398438, 2.9281234741210938, 2.493377685546875, 0.2638130187988281, 2.2650928497314453, 0.10572433471679688, 4.9878692626953125, 1.5696563720703125, 2.5766754150390625, 3.9751815795898438, 2.60211181640625, 2.674896240234375, 3.495441436767578, 1.069915771484375, 2.244098663330078, 2.0417003631591797, 0.6188583374023438, 1.5271682739257812, 2.1547470092773438, 6.7868804931640625, 3.7316131591796875, 0.8685302734375, -1.4351043701171875, 1.224996566772461, 2.5656890869140625, 1.8525543212890625, 3.843902587890625, 3.073314666748047, 3.5510787963867188, 2.1605186462402344, 0.0019321441650390625, 2.0753021240234375, 10.052215576171875, 3.286956787109375, 1.8565521240234375, -0.3141822814941406, 0.372528076171875, 3.0262603759765625, 5.210735321044922, 3.271820068359375, 0.267578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000043.npy"} +{"epoch": 0.0631424375917768, "step": 44, "batch_size": 64, "mean": 2.79276704788208, "std": 2.1667473316192627, "min": -0.8519744873046875, "p10": 0.5859107971191406, "median": 2.3977203369140625, "p90": 5.579143524169924, "max": 10.09588623046875, "pos_frac": 0.953125, "sample": [3.9048614501953125, 2.5586929321289062, 1.5786056518554688, 0.6625423431396484, 6.211345672607422, 2.419677734375, 3.01202392578125, 1.5550765991210938, 2.3518600463867188, 5.229576110839844, 6.401313781738281, 0.4815101623535156, 4.779060363769531, 4.9478302001953125, 4.939849853515625, 3.2212066650390625, 0.9130439758300781, 2.375762939453125, 2.053281784057617, 5.7289581298828125, 0.5825176239013672, 2.8672637939453125, 2.6705875396728516, 7.9504547119140625, 2.486175537109375, 3.1745986938476562, 0.7161483764648438, 1.330780029296875, 5.001617431640625, 1.7666893005371094, 2.936065673828125, -0.028429031372070312, 1.7243595123291016, 1.3251304626464844, 1.974740982055664, 3.1199569702148438, 0.7998123168945312, 1.9022369384765625, 4.379852294921875, 2.341766357421875, 1.3996124267578125, 1.860076904296875, 3.902130126953125, 3.7816848754882812, 2.0951385498046875, 2.7619705200195312, 2.647491455078125, 3.02325439453125, 0.5938282012939453, 0.4058799743652344, 6.7511138916015625, 4.43280029296875, 1.67724609375, 3.1543731689453125, 1.011739730834961, 10.09588623046875, 0.7560081481933594, -0.8519744873046875, 3.7899017333984375, 1.69183349609375, -0.7720203399658203, 0.20571517944335938, 1.501251220703125, 8.473731994628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000044.npy"} +{"epoch": 0.06461086637298091, "step": 45, "batch_size": 64, "mean": 2.1753129959106445, "std": 2.469676971435547, "min": -1.6701431274414062, "p10": -0.21452560424804684, "median": 1.4464025497436523, "p90": 5.598724365234376, "max": 11.6182861328125, "pos_frac": 0.84375, "sample": [3.3045120239257812, 0.9381866455078125, 0.34355926513671875, 3.1077613830566406, 0.2890281677246094, 1.4173564910888672, 3.2668495178222656, 6.3368682861328125, 5.444915771484375, 2.4598388671875, 0.9235763549804688, 7.416648864746094, 3.5307750701904297, 0.3104286193847656, 0.007289886474609375, 1.4754486083984375, -0.24982452392578125, 1.8218498229980469, 0.042755126953125, 4.9788970947265625, -0.19359588623046875, 0.3784523010253906, 2.5172348022460938, 0.7924461364746094, 0.7106494903564453, 5.121337890625, 5.664642333984375, 4.1016998291015625, 0.09058952331542969, 0.6059055328369141, 5.077239990234375, 2.67333984375, 5.812164306640625, -0.14371109008789062, -0.5189971923828125, 1.368377685546875, 5.7960052490234375, 3.6108551025390625, -0.2234954833984375, 1.3991851806640625, 1.6059932708740234, 0.30661773681640625, -1.6701431274414062, -1.0106277465820312, 1.379302978515625, 11.6182861328125, 1.2691802978515625, 7.169853210449219, 3.1139602661132812, -0.7489700317382812, 2.6755619049072266, 1.502960205078125, 2.0133628845214844, 0.24318885803222656, 1.3504505157470703, 5.2136993408203125, 0.8504638671875, 2.5717010498046875, 2.083181381225586, 1.5474014282226562, -0.10901069641113281, 4.389549255371094, 0.7398681640625, -0.6928482055664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000045.npy"} +{"epoch": 0.06607929515418502, "step": 46, "batch_size": 64, "mean": 2.8499715328216553, "std": 3.068298816680908, "min": -2.4073867797851562, "p10": -0.0946937561035156, "median": 2.2493247985839844, "p90": 6.662253189086915, "max": 14.347259521484375, "pos_frac": 0.859375, "sample": [0.46659088134765625, -0.079254150390625, 5.189666748046875, 7.146759033203125, 3.92388916015625, -2.4073867797851562, 2.2213363647460938, 1.60968017578125, 6.70745849609375, 3.5691967010498047, 6.556774139404297, 1.7341346740722656, 0.24813079833984375, 2.3890380859375, 4.103515625, 0.16102218627929688, -0.3400688171386719, 0.6284027099609375, 4.5629730224609375, 10.94451904296875, -0.43232154846191406, 0.0337982177734375, -0.20252227783203125, 3.4799652099609375, 0.11851882934570312, 4.3006134033203125, 3.1732635498046875, 1.2138137817382812, 3.337963104248047, 2.63226318359375, 4.112579345703125, 0.6205615997314453, -2.2833251953125, 0.7545547485351562, 3.4360504150390625, 5.366893768310547, 5.497707366943359, -0.10131072998046875, 8.180580139160156, -0.0145111083984375, 0.21286964416503906, 4.0003509521484375, 5.449974060058594, 0.47910308837890625, 14.347259521484375, 0.3905792236328125, 6.1465301513671875, 5.931434631347656, 0.7168655395507812, 2.48065185546875, 1.5321197509765625, 8.055633544921875, 3.038541793823242, 0.6772499084472656, 1.8967475891113281, 7.608253479003906, 1.5148239135742188, 2.277313232421875, 3.6109466552734375, 5.390716552734375, 2.0089263916015625, 1.0011310577392578, 1.3736438751220703, -0.30500030517578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000046.npy"} +{"epoch": 0.06754772393538913, "step": 47, "batch_size": 64, "mean": 2.840768337249756, "std": 2.613548994064331, "min": -2.39349365234375, "p10": -0.11950988769531223, "median": 2.103339195251465, "p90": 6.352000427246094, "max": 9.89508056640625, "pos_frac": 0.890625, "sample": [1.8192901611328125, 3.2873382568359375, 0.67010498046875, 1.2982654571533203, 5.090778350830078, 1.158670425415039, 0.5711212158203125, 1.706207275390625, 6.72772216796875, 2.8251590728759766, 0.21440887451171875, 5.719280242919922, 6.2896270751953125, 1.2484207153320312, 3.6047286987304688, 2.1323318481445312, -0.23333740234375, 1.9200820922851562, 1.9044189453125, 5.512481689453125, 6.146308898925781, 1.5094375610351562, 0.7093124389648438, 3.282459259033203, 3.38958740234375, 1.360586166381836, -0.7545928955078125, 6.3598785400390625, 7.338287353515625, 2.665416717529297, -0.4126300811767578, 3.327117919921875, 6.499042510986328, 3.561431884765625, -2.39349365234375, 0.146087646484375, 0.33386993408203125, 1.1248512268066406, 1.4536666870117188, 9.89508056640625, 6.089508056640625, 2.313413619995117, 1.2343769073486328, 2.91131591796875, 8.7781982421875, 6.3336181640625, -0.2670440673828125, 3.2668724060058594, 9.780853271484375, -0.5697250366210938, 1.5237655639648438, 1.790771484375, -1.0720672607421875, 1.4345283508300781, 2.7680816650390625, 1.3749351501464844, 5.156166076660156, 1.9610824584960938, 3.840911865234375, 2.018054962158203, 4.465179443359375, 3.409271240234375, 2.183961868286133, 2.0743465423583984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000047.npy"} +{"epoch": 0.06901615271659324, "step": 48, "batch_size": 64, "mean": 2.877974510192871, "std": 2.9358506202697754, "min": -6.9232177734375, "p10": -0.038709449768066354, "median": 2.2776565551757812, "p90": 6.89225311279297, "max": 11.475784301757812, "pos_frac": 0.890625, "sample": [3.65875244140625, 0.9060592651367188, 7.0247344970703125, 8.037147521972656, 6.2869110107421875, 3.1755447387695312, -1.073486328125, 2.2208404541015625, 4.703033447265625, 3.7669219970703125, 1.819549560546875, 6.5831298828125, 1.83135986328125, -0.386871337890625, 0.00933837890625, 1.8355941772460938, 5.870330810546875, -0.05930137634277344, 1.221588134765625, -0.3409309387207031, 1.0049705505371094, 8.375740051269531, -0.44937896728515625, 1.9721603393554688, 5.637306213378906, 1.5529556274414062, 3.5509185791015625, 3.3101367950439453, 0.15979766845703125, 3.0058841705322266, 1.6121635437011719, 2.0368785858154297, 2.7106857299804688, 2.157258987426758, 8.436599731445312, 4.172441482543945, 4.9091796875, 9.086235046386719, 3.238372802734375, 1.0983657836914062, 1.3006172180175781, 1.2100391387939453, 1.6550064086914062, 2.7113494873046875, 1.1465606689453125, 3.699502944946289, 2.694284439086914, 1.841827392578125, -1.661376953125, 8.3824462890625, 2.7319164276123047, 2.930929183959961, 2.33447265625, 11.475784301757812, 3.9599456787109375, -6.9232177734375, 5.934528350830078, 2.034900665283203, 0.7655754089355469, 0.5584716796875, 4.787628173828125, 1.4807548522949219, 2.4851417541503906, 1.9843673706054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000048.npy"} +{"epoch": 0.07048458149779736, "step": 49, "batch_size": 64, "mean": 3.5184361934661865, "std": 3.6862456798553467, "min": -3.5025405883789062, "p10": -0.08754997253417954, "median": 2.9876184463500977, "p90": 7.70450439453125, "max": 14.711380004882812, "pos_frac": 0.890625, "sample": [3.6302032470703125, 0.5436477661132812, 0.4235668182373047, 3.4044876098632812, 3.019479751586914, 5.706729888916016, 0.31093597412109375, 2.311725616455078, -1.00579833984375, 0.8381500244140625, 0.06939506530761719, 2.1940536499023438, 2.9557571411132812, 7.756080627441406, 6.64105224609375, 2.3030624389648438, 4.201168060302734, 1.807546615600586, 3.5951309204101562, 0.73046875, 1.7814922332763672, -3.148923873901367, 3.3990936279296875, -0.14714622497558594, 14.057464599609375, 10.052444458007812, 1.049041748046875, 5.904804229736328, 4.169921875, 0.6349048614501953, 7.584159851074219, 14.711380004882812, 4.6027679443359375, 0.6024169921875, -0.675567626953125, -3.5025405883789062, 3.385589599609375, 3.532041549682617, 4.468963623046875, 3.8557586669921875, 2.680356979370117, 0.05150794982910156, -0.57159423828125, 4.868694305419922, 5.211448669433594, 14.18927001953125, 2.365325927734375, 2.0281143188476562, 7.850090026855469, 5.980010986328125, 6.480079650878906, 2.897960662841797, 4.590782165527344, 3.97662353515625, 11.988616943359375, 0.8440532684326172, -0.1999664306640625, 2.7769012451171875, 1.7648506164550781, 4.6578826904296875, 5.1352081298828125, 1.7454872131347656, 4.939907073974609, 1.1733856201171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000049.npy"} +{"epoch": 0.07195301027900147, "step": 50, "batch_size": 64, "mean": 3.8532233238220215, "std": 3.6484310626983643, "min": -2.952472686767578, "p10": -0.5097091674804687, "median": 3.1749134063720703, "p90": 8.713336181640626, "max": 14.080612182617188, "pos_frac": 0.859375, "sample": [6.015045166015625, -0.30696868896484375, -1.638397216796875, 9.750770568847656, 1.2790374755859375, 3.0621414184570312, 7.4101715087890625, 1.9560279846191406, 4.035160064697266, 1.7683038711547852, -0.4518280029296875, 6.5385894775390625, 3.361663818359375, 14.0477294921875, 0.698333740234375, 2.183074951171875, 6.8311004638671875, 3.020967483520508, 8.899032592773438, 3.878204345703125, 0.9607810974121094, 5.0369110107421875, 9.009696960449219, 2.3343048095703125, 2.131488800048828, 4.568454742431641, 0.2797355651855469, -0.6319313049316406, 3.2876853942871094, 8.280044555664062, 6.435859680175781, 5.352210998535156, 9.22445297241211, 14.080612182617188, -2.952472686767578, 0.8235569000244141, 1.538543701171875, 2.0575332641601562, 0.23209381103515625, 4.112571716308594, 2.556243896484375, 1.7233257293701172, 6.143455505371094, 4.458793640136719, 7.086414337158203, 6.585746765136719, -0.534515380859375, 1.9978141784667969, 5.6413116455078125, -0.6705551147460938, 6.3460235595703125, 2.6407318115234375, 4.405689239501953, 5.542961120605469, 2.5075759887695312, 5.5762786865234375, 6.916351318359375, 2.1731719970703125, 11.439788818359375, -0.7605361938476562, 6.6921844482421875, 1.6062545776367188, -2.0253143310546875, 0.0568084716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000050.npy"} +{"epoch": 0.07342143906020558, "step": 51, "batch_size": 64, "mean": 3.7630162239074707, "std": 4.703429698944092, "min": -5.166229248046875, "p10": -0.41556549072265614, "median": 2.526630401611328, "p90": 9.440888977050783, "max": 18.767913818359375, "pos_frac": 0.828125, "sample": [9.215057373046875, 4.0721588134765625, 0.8317527770996094, -0.5561485290527344, 0.5831527709960938, 0.25457763671875, 8.388870239257812, -0.7456722259521484, 2.1972885131835938, -0.2749176025390625, 10.6676025390625, 7.185367584228516, 2.0888805389404297, 4.586181640625, 2.1671714782714844, 4.273223876953125, 3.6186141967773438, -0.9068984985351562, 9.537673950195312, 5.864185333251953, 4.116769790649414, -0.451202392578125, -0.03157615661621094, 3.8654251098632812, 1.6356964111328125, 7.602939605712891, 0.7411518096923828, 5.819000244140625, 0.9196319580078125, 4.769340515136719, 18.767913818359375, 3.1243667602539062, 0.5917701721191406, 5.9184722900390625, 3.0823631286621094, 14.094009399414062, 1.0983238220214844, 14.516128540039062, 5.601112365722656, 1.9652576446533203, 2.1726150512695312, 0.8728599548339844, 3.8297576904296875, 8.844024658203125, 1.3051948547363281, 0.11056900024414062, 5.011528015136719, -4.1429595947265625, -0.31855010986328125, 17.345687866210938, 12.862747192382812, 6.658042907714844, 7.170555114746094, -0.3324127197265625, 1.6259326934814453, 5.575187683105469, 0.07121658325195312, 0.26104736328125, 2.63189697265625, -5.166229248046875, 2.5373916625976562, -1.484771728515625, 2.515869140625, 0.08082199096679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000051.npy"} +{"epoch": 0.07488986784140969, "step": 52, "batch_size": 64, "mean": 5.8035993576049805, "std": 4.671248912811279, "min": -2.263275146484375, "p10": 0.8415302276611332, "median": 4.752958297729492, "p90": 11.897981262207031, "max": 19.9918212890625, "pos_frac": 0.9375, "sample": [1.7556610107421875, 11.919052124023438, 4.215843200683594, 11.757377624511719, 7.0641937255859375, 9.878311157226562, 3.346660614013672, 6.8171844482421875, 9.1195068359375, 13.230865478515625, 0.0894927978515625, 12.631404876708984, 5.3062896728515625, 5.473968505859375, 8.705045700073242, 8.152509689331055, 3.1050186157226562, 4.842931747436523, 16.39698028564453, 9.47406005859375, 3.239412307739258, 1.2596893310546875, 11.714401245117188, 2.956930160522461, 0.05569267272949219, 7.7087249755859375, 2.5213394165039062, 2.273649215698242, 8.478780746459961, 19.9918212890625, 9.068328857421875, 1.8976116180419922, 9.565040588378906, 4.763103485107422, 3.97637939453125, 13.368209838867188, 3.863800048828125, -0.42706298828125, 4.333076477050781, -2.263275146484375, -2.2609786987304688, 2.5997238159179688, 6.184345245361328, 6.038612365722656, 1.4183616638183594, 9.258750915527344, 4.1589813232421875, 11.84881591796875, 7.000087738037109, 2.028097152709961, 0.6623191833496094, 3.2360687255859375, 3.1067771911621094, 4.7428131103515625, 3.7026901245117188, 9.698600769042969, 5.681186676025391, 1.3438873291015625, 16.558494567871094, -0.8646888732910156, 5.809478759765625, 3.7965736389160156, 2.5375919342041016, 1.5157890319824219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000052.npy"} +{"epoch": 0.0763582966226138, "step": 53, "batch_size": 64, "mean": 5.858806610107422, "std": 6.35233736038208, "min": -3.1304855346679688, "p10": 0.4056007385253907, "median": 3.9252796173095703, "p90": 14.756396865844726, "max": 28.422332763671875, "pos_frac": 0.921875, "sample": [-1.8813934326171875, 3.6830215454101562, 3.3217315673828125, 3.115997314453125, 1.29541015625, 3.6913928985595703, 13.428291320800781, 0.250579833984375, 7.8429718017578125, -1.36865234375, 2.4702529907226562, 27.430328369140625, -2.705322265625, 1.0210342407226562, 6.494873046875, 7.216503143310547, 0.973663330078125, 10.53271484375, 2.6043167114257812, 11.796211242675781, 4.0410919189453125, 6.144317626953125, 11.2779541015625, 7.136871337890625, 14.645854949951172, 1.48614501953125, 4.985252380371094, 0.6903839111328125, 5.381538391113281, 1.622610092163086, 17.159576416015625, 8.510501861572266, 7.305377960205078, 8.176849365234375, 3.809467315673828, 0.4496498107910156, 0.3867225646972656, 4.203441619873047, 1.0984668731689453, 4.139215469360352, 3.066650390625, 7.326005935668945, 1.610891342163086, 13.750099182128906, -3.1304855346679688, 17.16851806640625, 18.06982421875, 3.4354934692382812, 5.1846923828125, 3.2023563385009766, 2.1983184814453125, 4.161773681640625, 2.6891326904296875, 1.8874664306640625, 3.1546096801757812, 4.336627960205078, 2.9751510620117188, 28.422332763671875, 2.5500526428222656, 14.80377197265625, 4.518444061279297, 4.751708984375, 15.416847229003906, -0.45187950134277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000053.npy"} +{"epoch": 0.07782672540381791, "step": 54, "batch_size": 64, "mean": 5.140138626098633, "std": 4.829864501953125, "min": -0.9620780944824219, "p10": 0.32008800506591817, "median": 4.017575263977051, "p90": 11.596824645996096, "max": 21.24468994140625, "pos_frac": 0.9375, "sample": [7.621147155761719, 4.414459228515625, 8.652446746826172, 1.1361351013183594, 0.5490531921386719, 3.175018310546875, 0.9749488830566406, 2.4886722564697266, 5.302879333496094, 7.856266021728516, 1.013010025024414, 5.9959259033203125, 11.75848388671875, 2.078256607055664, 4.490947723388672, 10.457962036132812, -0.046230316162109375, 4.820980072021484, 5.111654281616211, 5.55145263671875, 0.014324188232421875, 1.3942184448242188, 16.44689178466797, 0.2421417236328125, 7.308082580566406, 0.5242767333984375, 4.648773193359375, 5.3415679931640625, 5.847175598144531, 5.922248840332031, 1.8288326263427734, -0.20121002197265625, 7.8827056884765625, -0.017307281494140625, 1.1291313171386719, 3.6520137786865234, 12.126335144042969, 3.0260772705078125, 6.513275146484375, 12.639846801757812, 1.7328681945800781, 2.323619842529297, 21.24468994140625, 10.881187438964844, -0.9620780944824219, 3.027841567993164, 0.09111595153808594, 13.53460693359375, 0.5019626617431641, 3.2708892822265625, 1.0141372680664062, 4.383136749267578, 10.6690673828125, 1.6959228515625, 2.13507080078125, 18.361114501953125, 1.3718624114990234, 4.727449417114258, 3.5568008422851562, 11.219619750976562, 2.4278430938720703, 10.282501220703125, 1.1940174102783203, 10.610748291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000054.npy"} +{"epoch": 0.07929515418502203, "step": 55, "batch_size": 64, "mean": 6.065757751464844, "std": 6.9302568435668945, "min": -9.974044799804688, "p10": -1.4487709045410153, "median": 5.085441589355469, "p90": 15.090407562255862, "max": 26.80706787109375, "pos_frac": 0.8125, "sample": [8.024360656738281, 9.885421752929688, 5.123321533203125, -3.736480712890625, 23.239410400390625, -1.5897140502929688, 7.198402404785156, 4.482112884521484, 16.632537841796875, -0.8401870727539062, 6.244182586669922, 5.5690460205078125, 2.2930450439453125, 11.554946899414062, 18.629638671875, 9.412750244140625, 4.592010498046875, 8.66217041015625, 26.80706787109375, 3.6478271484375, 11.172866821289062, -1.58026123046875, 4.092460632324219, 8.440834045410156, 22.857940673828125, 0.35133934020996094, 15.361419677734375, 0.4069976806640625, 0.3116321563720703, 8.238872528076172, 4.938163757324219, -2.5279998779296875, 5.842643737792969, 0.0079345703125, 11.943038940429688, 4.3851165771484375, -1.6070690155029297, -0.12729644775390625, 0.09222221374511719, -5.101463317871094, 9.090404510498047, 3.1860218048095703, 1.4642410278320312, 14.458045959472656, 6.776679992675781, -1.1419601440429688, 0.8346118927001953, 10.406982421875, 5.0475616455078125, -9.974044799804688, 2.818695068359375, 14.439193725585938, 6.8582611083984375, 4.157207489013672, 7.096149444580078, 5.387956619262695, -0.8287944793701172, 8.062065124511719, 7.663368225097656, 12.98004150390625, 4.962226867675781, -0.20335769653320312, 4.5840606689453125, 16.751571655273438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000055.npy"} +{"epoch": 0.08076358296622614, "step": 56, "batch_size": 64, "mean": 5.75810432434082, "std": 6.839889049530029, "min": -7.942081451416016, "p10": -1.4366912841796873, "median": 5.166349411010742, "p90": 14.554774475097663, "max": 25.543807983398438, "pos_frac": 0.8125, "sample": [-1.5157470703125, 6.733245849609375, -3.1208953857421875, 8.423038482666016, -0.8471221923828125, -7.942081451416016, 9.786949157714844, -1.1985912322998047, 3.358367919921875, -3.5463638305664062, 3.446014404296875, 1.8240852355957031, 5.302101135253906, 15.23748779296875, 2.243429183959961, -0.04460906982421875, -1.1580009460449219, 10.883071899414062, 9.561084747314453, 7.479316711425781, -2.1591339111328125, 10.281326293945312, 2.64300537109375, 21.681121826171875, 7.536834716796875, 15.727020263671875, 1.1421546936035156, 0.14380836486816406, 9.265785217285156, 2.7631778717041016, 7.484588623046875, 7.120475769042969, 8.745986938476562, 1.5236434936523438, 10.352890014648438, 12.10903549194336, 0.5202903747558594, 19.61529541015625, 7.659202575683594, 6.535789489746094, 5.656238555908203, 5.030597686767578, 3.0087203979492188, 20.209625244140625, 6.9645538330078125, 12.4315185546875, 6.061712265014648, 10.748062133789062, 0.30513763427734375, 25.543807983398438, 1.497171401977539, 0.0480499267578125, 4.1146240234375, 12.961776733398438, -7.021575927734375, 2.968679428100586, -1.252227783203125, 21.046241760253906, 4.931232452392578, -3.0307464599609375, 3.3074569702148438, 2.172943115234375, 5.918853759765625, 9.299118041992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000056.npy"} +{"epoch": 0.08223201174743025, "step": 57, "batch_size": 64, "mean": 6.474340438842773, "std": 6.0126261711120605, "min": -9.234222412109375, "p10": 0.21091995239257827, "median": 6.256856918334961, "p90": 14.428468322753908, "max": 21.887466430664062, "pos_frac": 0.921875, "sample": [7.941349029541016, 3.626401901245117, 0.5705413818359375, -2.128997802734375, 10.112533569335938, -9.234222412109375, 11.219345092773438, 0.35858154296875, 13.146728515625, 6.241542816162109, 16.040176391601562, 3.717439651489258, 21.887466430664062, 3.57379150390625, 6.705718994140625, 6.541412353515625, -1.1481704711914062, 3.4126663208007812, 6.421882629394531, 20.492218017578125, 10.417755126953125, 16.257369995117188, 11.017974853515625, 4.017475128173828, 6.462642669677734, 1.2900161743164062, 14.570388793945312, 5.379384994506836, 4.221502304077148, 3.187164306640625, 1.4355850219726562, 4.02598762512207, 3.3044052124023438, 0.12552833557128906, 7.229583740234375, 6.7625732421875, 0.14763641357421875, 1.7306022644042969, 2.3721237182617188, 8.730552673339844, 11.8719482421875, 1.898977279663086, 9.943336486816406, 5.869342803955078, 8.857986450195312, -2.3757667541503906, 8.765941619873047, 6.2721710205078125, 9.0574951171875, 3.2350692749023438, 2.4360084533691406, 17.045654296875, 5.657585144042969, 3.397693634033203, 7.5004425048828125, 4.068511962890625, 0.6409549713134766, 12.750114440917969, 11.651779174804688, -5.817878723144531, 14.097320556640625, 19.877243041992188, 7.253576278686523, 8.217613220214844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000057.npy"} +{"epoch": 0.08370044052863436, "step": 58, "batch_size": 64, "mean": 7.2314958572387695, "std": 7.587307929992676, "min": -8.209304809570312, "p10": -0.09409408569335903, "median": 5.074516296386719, "p90": 17.332496643066413, "max": 27.43353271484375, "pos_frac": 0.890625, "sample": [15.403488159179688, -1.986886978149414, 0.5264472961425781, 4.802181243896484, 3.9004440307617188, 4.443574905395508, 19.800621032714844, 1.8984603881835938, 14.758003234863281, 2.761087417602539, 5.346851348876953, 7.550689697265625, 7.2381591796875, 3.971466064453125, 2.3765411376953125, 12.970939636230469, 10.616981506347656, 9.054763793945312, 4.4847869873046875, 2.8446903228759766, 7.840087890625, 14.838478088378906, 14.253616333007812, -1.5484161376953125, 11.717880249023438, 2.0100250244140625, 9.591751098632812, 6.593893051147461, 22.153396606445312, 4.464899063110352, 27.43353271484375, 8.252315521240234, 6.7392578125, -0.23357009887695312, 14.957572937011719, 1.0489559173583984, 13.329757690429688, 3.9067153930664062, 2.2831039428710938, -2.905975341796875, 2.132274627685547, 2.620800018310547, 23.684722900390625, -1.6701278686523438, 10.299652099609375, 0.23134994506835938, 0.9682388305664062, 7.3302001953125, -4.764892578125, 24.25726318359375, 1.9542407989501953, 6.192768096923828, 1.042104721069336, 14.994373321533203, 11.020126342773438, -8.209304809570312, 23.140701293945312, 1.7686767578125, 2.8341598510742188, 7.795013427734375, 15.808830261230469, 1.6522274017333984, 0.2562828063964844, 17.985496520996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000058.npy"} +{"epoch": 0.08516886930983847, "step": 59, "batch_size": 64, "mean": 7.762610912322998, "std": 8.03657054901123, "min": -6.96087646484375, "p10": -1.4758525848388664, "median": 6.508495330810547, "p90": 20.11944274902344, "max": 33.51806640625, "pos_frac": 0.875, "sample": [8.478900909423828, 8.534904479980469, 16.38604736328125, 20.54559326171875, 16.526947021484375, 4.1981048583984375, 33.51806640625, 21.073760986328125, 7.405208587646484, 16.700210571289062, 0.6394138336181641, 7.848262786865234, -3.3642425537109375, 2.5791149139404297, 21.727737426757812, -0.7172050476074219, 2.49359130859375, 13.741157531738281, 6.1178741455078125, 21.023040771484375, 1.1713085174560547, 2.6273269653320312, 1.0985641479492188, 1.6935997009277344, 3.8902740478515625, 25.58697509765625, 2.512533187866211, 1.8366031646728516, 7.972637176513672, -6.96087646484375, 9.195137023925781, -2.0471115112304688, 8.143821716308594, 7.87860107421875, 8.888481140136719, 18.101219177246094, 3.03106689453125, 12.531158447265625, 7.6291961669921875, 3.8337440490722656, -5.681915283203125, -3.3923263549804688, 5.768293380737305, -1.8009872436523438, 4.255100250244141, 10.59274673461914, 7.392694473266602, 2.7421875, 8.158012390136719, 2.9860897064208984, 1.9731292724609375, 6.045402526855469, 3.3436756134033203, 9.493602752685547, 5.710531234741211, 15.159233093261719, 9.88150405883789, 4.192283630371094, 3.4220733642578125, -2.6815185546875, 18.467784881591797, 19.125091552734375, 6.899116516113281, 20.684539794921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000059.npy"} +{"epoch": 0.08663729809104258, "step": 60, "batch_size": 64, "mean": 6.3241682052612305, "std": 7.149575233459473, "min": -15.154327392578125, "p10": -1.5509704589843745, "median": 6.941120147705078, "p90": 12.870755767822265, "max": 26.09717559814453, "pos_frac": 0.8125, "sample": [7.772762298583984, -4.3381500244140625, 1.8671436309814453, 12.586555480957031, 1.129241943359375, 3.3859424591064453, 11.504657745361328, 8.588874816894531, 1.412109375, -15.154327392578125, 7.273105621337891, -1.7334671020507812, 2.0217418670654297, 4.2630615234375, 11.348381042480469, 7.134002685546875, 5.3531341552734375, 11.232101440429688, 4.285865783691406, 10.66119384765625, 13.655441284179688, 12.890396118164062, 20.25445556640625, 8.939361572265625, 8.660385131835938, 7.838737487792969, -0.7427101135253906, 6.748237609863281, 11.085479736328125, -0.10377311706542969, 9.291255950927734, 19.504470825195312, 4.252315521240234, 5.725822448730469, 7.868167877197266, -1.9703311920166016, 11.325515747070312, 9.035263061523438, 2.09368896484375, -5.320152282714844, 12.011856079101562, 3.3490562438964844, 5.6312255859375, 9.035873413085938, 4.272705078125, -0.6251983642578125, -11.229999542236328, 17.821121215820312, 20.603271484375, 11.759429931640625, 12.824928283691406, 2.9991798400878906, -2.8698081970214844, -1.1251449584960938, 11.506567001342773, 6.611139297485352, 26.09717559814453, 9.854515075683594, 1.081207275390625, 9.641241073608398, -0.6053009033203125, 8.437675476074219, 4.663089752197266, 1.3750057220458984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000060.npy"} +{"epoch": 0.0881057268722467, "step": 61, "batch_size": 64, "mean": 7.751380920410156, "std": 10.563068389892578, "min": -10.777801513671875, "p10": -1.4835662841796875, "median": 5.669303894042969, "p90": 19.042723846435546, "max": 58.73504638671875, "pos_frac": 0.8125, "sample": [3.0402069091796875, 1.2606353759765625, -1.7594680786132812, 1.5120468139648438, 15.111587524414062, 0.030153274536132812, 0.0592193603515625, 7.609966278076172, -0.6391677856445312, 27.598342895507812, 23.62494659423828, 20.295761108398438, 27.49463653564453, 14.265281677246094, 7.931144714355469, 9.417579650878906, -1.7299118041992188, 1.7934322357177734, 12.408622741699219, 13.048362731933594, 3.08721923828125, 5.166210174560547, 16.784469604492188, 8.55141830444336, 5.736328125, 6.793544769287109, 5.8477325439453125, 6.2378997802734375, 9.520050048828125, 8.87933349609375, -0.17832183837890625, -1.422149658203125, 12.647041320800781, 17.28009033203125, 16.798919677734375, 23.911865234375, 18.999801635742188, 1.9909439086914062, -0.72308349609375, 14.630386352539062, 3.1425323486328125, 0.2190723419189453, 13.570526123046875, 3.2499542236328125, 0.7549114227294922, 19.061119079589844, 0.8130722045898438, 3.6526756286621094, 58.73504638671875, 13.192985534667969, 13.33392333984375, -1.5098876953125, -10.777801513671875, 12.292640686035156, 2.447418212890625, -5.313610076904297, 5.6022796630859375, 4.028598785400391, -7.842437744140625, 2.7764434814453125, 7.544677734375, -6.125556945800781, -1.2789649963378906, 1.6056632995605469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000061.npy"} +{"epoch": 0.08957415565345081, "step": 62, "batch_size": 64, "mean": 5.783695697784424, "std": 7.510776519775391, "min": -9.392425537109375, "p10": -3.406118583679199, "median": 5.499258995056152, "p90": 15.117897033691408, "max": 31.001075744628906, "pos_frac": 0.84375, "sample": [9.33590316772461, 1.5871353149414062, 0.6188144683837891, 11.317878723144531, 5.623346328735352, 12.476631164550781, 7.754613876342773, 0.9094123840332031, 10.067855834960938, -5.69683837890625, 14.690170288085938, 6.484733581542969, 1.9097270965576172, -0.11255073547363281, 9.219108581542969, 3.484363555908203, 10.504638671875, 5.85009765625, 0.3032722473144531, 5.162376403808594, 7.276885986328125, 7.830120086669922, 1.6441192626953125, 5.47035026550293, -4.6964111328125, 8.684799194335938, 4.252328872680664, 8.301094055175781, -0.2748908996582031, 31.001075744628906, 18.07978057861328, 7.836051940917969, 5.830085754394531, 2.613616943359375, 1.0655975341796875, 5.868461608886719, 1.0614700317382812, 18.877700805664062, -9.392425537109375, -4.0079498291015625, -6.7314605712890625, 0.7966537475585938, 1.5615882873535156, 0.7004947662353516, 1.4035110473632812, 9.390972137451172, 16.421993255615234, 9.849361419677734, -3.4855594635009766, 15.30120849609375, 8.874542236328125, 5.528167724609375, -6.399139404296875, 4.271322250366211, 13.005325317382812, 3.6473846435546875, 3.378307342529297, 3.8187942504882812, 27.011550903320312, 20.956222534179688, 7.319921493530273, 1.86920166015625, -3.2207565307617188, 6.074371337890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000062.npy"} +{"epoch": 0.09104258443465492, "step": 63, "batch_size": 64, "mean": 7.922541618347168, "std": 8.270774841308594, "min": -13.621047973632812, "p10": -0.013949012756347468, "median": 7.200447082519531, "p90": 17.0286003112793, "max": 33.79638671875, "pos_frac": 0.890625, "sample": [25.904129028320312, 8.461700439453125, 4.610185623168945, 9.295780181884766, 3.290771484375, 1.4310073852539062, 4.796714782714844, -7.470481872558594, 6.767372131347656, 6.9789886474609375, -13.621047973632812, 16.461585998535156, 9.118064880371094, 7.421905517578125, 2.741636276245117, -1.076131820678711, 17.2716064453125, 4.901975631713867, 13.123703002929688, 21.518646240234375, 12.538787841796875, 14.44427490234375, 6.5146331787109375, 0.3166675567626953, -0.0915679931640625, 11.925065994262695, -4.099815368652344, 0.1671619415283203, 5.369293212890625, 3.385061264038086, 12.994232177734375, 8.539833068847656, 7.827781677246094, 10.0250244140625, 11.308357238769531, 12.519325256347656, 14.6500244140625, 14.387825012207031, 5.392181396484375, 2.797718048095703, 0.3379058837890625, 15.164077758789062, 2.6877593994140625, 5.468147277832031, -10.292533874511719, 13.984853744506836, 7.7433319091796875, 1.7837066650390625, 2.6796417236328125, 6.886566162109375, 3.392242431640625, 14.700668334960938, 18.237655639648438, 8.21563720703125, 18.381553649902344, 0.7491226196289062, 8.821523666381836, 33.79638671875, -2.9268722534179688, 15.851566314697266, 4.8056182861328125, 26.886985778808594, 5.055915832519531, 11.791221618652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000063.npy"} +{"epoch": 0.09251101321585903, "step": 64, "batch_size": 64, "mean": 9.295536041259766, "std": 10.532648086547852, "min": -18.506881713867188, "p10": -1.0077222824096679, "median": 7.777100563049316, "p90": 22.24172973632813, "max": 50.224151611328125, "pos_frac": 0.8125, "sample": [0.8104629516601562, 18.650978088378906, 27.682357788085938, 8.288200378417969, 5.0225067138671875, 12.538597106933594, 3.166471481323242, 16.624523162841797, -0.5787754058837891, 7.178434371948242, 2.2548904418945312, 11.149711608886719, 1.2436065673828125, 5.267768859863281, 12.522956848144531, 31.527664184570312, 4.8275604248046875, -1.6861343383789062, 18.63751220703125, 7.792116165161133, 12.578693389892578, -2.7594223022460938, 29.71771240234375, 8.758060455322266, -0.26900291442871094, 6.156181335449219, -0.9936618804931641, 7.512638092041016, -18.506881713867188, -0.25019073486328125, 7.7620849609375, -1.05120849609375, 50.224151611328125, 6.014289855957031, 1.7952136993408203, 10.829349517822266, 17.954566955566406, 21.195404052734375, 10.77020263671875, 6.005870819091797, 5.941476821899414, 23.639968872070312, 12.954597473144531, 10.148536682128906, -1.0137481689453125, -9.401832580566406, 25.897689819335938, 4.161491394042969, 22.690155029296875, 0.1589202880859375, 15.664352416992188, 13.997421264648438, 13.136711120605469, -1.4681930541992188, 13.387649536132812, -0.4260826110839844, 5.3662109375, 7.967075347900391, 14.219245910644531, 13.590560913085938, 1.7354469299316406, 11.910369873046875, 18.56627655029297, 5.7245635986328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000064.npy"} +{"epoch": 0.09397944199706314, "step": 65, "batch_size": 64, "mean": 9.323431015014648, "std": 10.29684066772461, "min": -23.44318389892578, "p10": -0.7886083602905267, "median": 7.614355087280273, "p90": 23.8426498413086, "max": 31.716644287109375, "pos_frac": 0.84375, "sample": [31.716644287109375, 26.129592895507812, 17.722030639648438, 12.208755493164062, 6.243871688842773, 9.238227844238281, 0.7609634399414062, 13.67471694946289, 3.2313098907470703, 22.110450744628906, 17.06208038330078, 1.4043216705322266, -0.2063426971435547, 24.46185302734375, 28.618881225585938, 1.1579399108886719, 10.641578674316406, 2.5816593170166016, -4.25921630859375, 5.149528503417969, 1.3611392974853516, 19.076583862304688, 6.2925262451171875, -23.44318389892578, 17.346771240234375, 9.48529052734375, -4.116912841796875, 8.233596801757812, 14.236831665039062, 5.400997161865234, 6.128936767578125, 5.626010894775391, -2.2170066833496094, 5.095388412475586, 30.206100463867188, 7.230365753173828, 22.397842407226562, 16.082191467285156, 26.15880584716797, 7.918689727783203, -0.1398162841796875, -12.199317932128906, 10.519882202148438, 2.104175567626953, 7.113800048828125, 19.436981201171875, 25.12151336669922, -2.055938720703125, 6.223052978515625, 20.8599853515625, 12.287353515625, 4.893585205078125, 21.190109252929688, -1.0381507873535156, 13.614883422851562, 8.343795776367188, 6.436775207519531, 2.0091094970703125, 17.66339111328125, 10.73321533203125, -0.14093399047851562, 8.198896408081055, 0.06344223022460938, 7.310020446777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000065.npy"} +{"epoch": 0.09544787077826726, "step": 66, "batch_size": 64, "mean": 8.572092056274414, "std": 10.30179214477539, "min": -9.97607421875, "p10": -0.7767261505126952, "median": 7.301836013793945, "p90": 19.487243270874025, "max": 48.81031799316406, "pos_frac": 0.84375, "sample": [7.851531982421875, 48.81031799316406, 0.27147674560546875, 15.803855895996094, 14.166618347167969, 13.490684509277344, 2.465360641479492, -0.3621673583984375, 10.792242050170898, 7.770965576171875, -9.066802978515625, 2.353099822998047, 6.889766693115234, -0.7033920288085938, 12.37460708618164, 3.975627899169922, 4.883661270141602, 0.2111797332763672, 2.540424346923828, 19.490341186523438, 8.464057922363281, 10.63027572631836, 0.7244663238525391, 10.842552185058594, 18.075031280517578, 2.5709686279296875, 4.759681701660156, -2.6942672729492188, -4.298465728759766, -9.97607421875, 10.805419921875, 17.67467498779297, 19.48001480102539, 8.305343627929688, 3.7365684509277344, 11.8487548828125, 6.739387512207031, -0.20126724243164062, 2.529296875, 14.763641357421875, 10.887619018554688, 4.170494079589844, -0.8081550598144531, 7.713905334472656, 6.059289932250977, -8.44927978515625, 10.338615417480469, 1.7090187072753906, -3.9325332641601562, 14.258186340332031, 4.470970153808594, 20.113967895507812, 27.224884033203125, 22.58477783203125, 4.835868835449219, 17.148727416992188, 2.1025619506835938, 8.548274993896484, 15.217384338378906, 2.7339229583740234, 34.59141540527344, 1.235565185546875, 33.182525634765625, 13.886428833007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000066.npy"} +{"epoch": 0.09691629955947137, "step": 67, "batch_size": 64, "mean": 8.888347625732422, "std": 10.270591735839844, "min": -12.6123046875, "p10": -1.0431358337402339, "median": 6.335858345031738, "p90": 21.72581481933594, "max": 48.348846435546875, "pos_frac": 0.84375, "sample": [22.4154052734375, 4.302768707275391, 22.040023803710938, 0.7889404296875, 2.4396133422851562, -0.6254653930664062, 16.986412048339844, 5.361808776855469, -5.942237854003906, 2.378538131713867, 17.658584594726562, 10.546676635742188, 4.485160827636719, 16.557945251464844, -0.5509567260742188, -4.401222229003906, 9.645751953125, 10.269386291503906, 1.7987060546875, -12.6123046875, 20.992660522460938, 8.94561767578125, -0.2916984558105469, 0.0156402587890625, 6.1206512451171875, 2.8389739990234375, 22.381973266601562, 6.821538925170898, -1.222137451171875, 6.100379943847656, 2.084747314453125, 15.357494354248047, 7.468132019042969, 3.2556934356689453, 2.6862335205078125, 10.1065673828125, 15.717103958129883, 6.21049690246582, 1.3534317016601562, -5.283050537109375, 5.807960510253906, 18.341819763183594, 4.8824310302734375, 4.094364166259766, 26.87652587890625, 14.934883117675781, 6.542476654052734, 18.9532470703125, 14.452957153320312, 15.534774780273438, 13.357841491699219, 5.678361892700195, 12.18730354309082, 34.50895690917969, 6.461219787597656, 1.0819034576416016, 48.348846435546875, 3.299257278442383, -2.0787124633789062, 27.081008911132812, 20.613868713378906, 7.480228424072266, -3.379730224609375, 8.588441848754883], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000067.npy"} +{"epoch": 0.09838472834067548, "step": 68, "batch_size": 64, "mean": 8.439209938049316, "std": 8.632508277893066, "min": -7.87646484375, "p10": 0.3489673614501954, "median": 5.976025581359863, "p90": 21.232487487792973, "max": 38.28321838378906, "pos_frac": 0.90625, "sample": [-5.196723937988281, 3.0496063232421875, 25.102508544921875, 8.484786987304688, 5.968709945678711, -6.6017608642578125, 8.618804931640625, 23.409584045410156, 1.3277130126953125, 0.4426383972167969, 13.613525390625, 15.928787231445312, 18.386123657226562, 12.370918273925781, 10.342475891113281, 0.3088226318359375, 7.000789642333984, 13.202770233154297, 4.068840026855469, 8.458213806152344, -0.9243392944335938, 4.846967697143555, 5.163341522216797, 3.0960540771484375, 7.897197723388672, 12.818130493164062, 19.32879638671875, 7.9530029296875, 9.582782745361328, 20.35498809814453, 21.608558654785156, 23.694263458251953, 1.51654052734375, 5.969367980957031, 10.539703369140625, 1.744943618774414, -0.42322731018066406, 5.982683181762695, 2.307220458984375, 5.214256286621094, 17.666240692138672, 2.947673797607422, 11.032407760620117, -7.87646484375, 6.9226226806640625, 5.769981384277344, 4.0675201416015625, 22.396087646484375, 1.8720855712890625, 1.572500228881836, 2.3526363372802734, 38.28321838378906, 25.97748565673828, 17.298843383789062, 13.755882263183594, 0.5924301147460938, 2.5528030395507812, 5.287498474121094, 11.365959167480469, -2.59228515625, 5.5172882080078125, 2.7041397094726562, 5.349601745605469, 8.734962463378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000068.npy"} +{"epoch": 0.09985315712187959, "step": 69, "batch_size": 64, "mean": 10.210320472717285, "std": 10.07213020324707, "min": -4.614227294921875, "p10": -0.2778038024902343, "median": 7.255146026611328, "p90": 21.784290313720707, "max": 47.442535400390625, "pos_frac": 0.875, "sample": [5.225372314453125, 6.988487243652344, 31.728424072265625, 9.722320556640625, 13.2677001953125, 16.66606903076172, -4.387451171875, -1.38519287109375, -0.16693878173828125, 4.862602233886719, 5.6932373046875, 9.318378448486328, -0.3253173828125, 9.506423950195312, 7.5218048095703125, 15.63107681274414, 27.774688720703125, 7.6299285888671875, 18.05620574951172, 15.939804077148438, -0.5814285278320312, 25.506301879882812, 13.102310180664062, 10.248645782470703, 19.25043487548828, 14.28802490234375, 4.647808074951172, 17.103797912597656, 13.265281677246094, 6.047941207885742, 1.003683090209961, 0.2750816345214844, 20.223121643066406, 47.442535400390625, 4.877902984619141, 20.850303649902344, 5.99371337890625, 15.393829345703125, 17.737838745117188, 0.888580322265625, 1.7088623046875, 16.855453491210938, 10.956623077392578, 2.3012619018554688, -0.8553905487060547, 15.043807983398438, 6.222434997558594, 37.88945770263672, 4.844207763671875, 7.996728897094727, 6.418699264526367, -4.614227294921875, 6.38916015625, 3.3197021484375, 1.6865272521972656, 6.95819091796875, 22.1845703125, 23.04693603515625, 0.118499755859375, 20.165786743164062, 1.9933891296386719, 1.348398208618164, 5.345766067504883, -0.697662353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000069.npy"} +{"epoch": 0.1013215859030837, "step": 70, "batch_size": 64, "mean": 11.062643051147461, "std": 11.917571067810059, "min": -10.566650390625, "p10": -1.5904834747314451, "median": 10.60970687866211, "p90": 27.611377716064467, "max": 43.939453125, "pos_frac": 0.828125, "sample": [6.481409072875977, 1.5330047607421875, 11.825740814208984, 20.22284698486328, 24.313865661621094, 13.374734878540039, -8.865280151367188, -1.6339263916015625, 14.831916809082031, 3.727611541748047, -2.2104644775390625, 13.981391906738281, -2.599609375, 9.2703857421875, 40.57231140136719, 1.7953929901123047, 10.442169189453125, 29.02459716796875, 11.655538558959961, 43.939453125, 12.527694702148438, 10.96624755859375, 2.1287155151367188, 30.851409912109375, -0.10277175903320312, 18.05353546142578, 19.9752197265625, 13.49555778503418, -2.0486621856689453, 7.459211349487305, 6.261474609375, 29.058250427246094, 1.95562744140625, 0.6033477783203125, 1.3369827270507812, -1.4891166687011719, 14.75082015991211, 3.2827835083007812, 12.890251159667969, -1.8209667205810547, 3.3582191467285156, 3.9806041717529297, -0.49604034423828125, 1.7791900634765625, 11.306352615356445, 1.32366943359375, 20.222084045410156, 24.163177490234375, -10.566650390625, 5.396938323974609, 18.502777099609375, 3.3631210327148438, 1.6779327392578125, 35.889556884765625, 14.141765594482422, -0.8359031677246094, 37.099456787109375, 10.777244567871094, 22.908294677734375, 17.96690559387207, 3.2650089263916016, 24.032943725585938, 13.770694732666016, 23.163116455078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000070.npy"} +{"epoch": 0.1027900146842878, "step": 71, "batch_size": 64, "mean": 12.967889785766602, "std": 13.040888786315918, "min": -9.671022415161133, "p10": 0.6459039688110366, "median": 10.924520492553711, "p90": 24.64378356933594, "max": 54.0628662109375, "pos_frac": 0.90625, "sample": [-6.254905700683594, 45.488372802734375, 6.412208557128906, 2.1165237426757812, 6.444156646728516, 14.086128234863281, 6.473358154296875, 12.960357666015625, 19.192527770996094, 5.960945129394531, 23.7900390625, 7.596223831176758, 19.037017822265625, 4.907894134521484, 3.3328628540039062, 7.302177429199219, 11.586833953857422, 16.745361328125, -1.1845626831054688, 2.055360794067383, 4.040550231933594, 47.970977783203125, 8.138633728027344, 7.235958099365234, 17.872222900390625, 17.383056640625, 6.538330078125, 13.730186462402344, 13.430805206298828, -9.145393371582031, 4.3208465576171875, 13.377487182617188, 12.544784545898438, 0.041851043701171875, 5.310789108276367, 18.85700225830078, 20.518234252929688, -0.01605224609375, 22.010879516601562, 17.654502868652344, 4.9993743896484375, 14.251388549804688, 6.5654296875, 17.72760009765625, 17.213043212890625, 19.429420471191406, 4.285266876220703, 38.97434997558594, 51.3590087890625, 54.0628662109375, 2.5043869018554688, 25.009674072265625, -9.671022415161133, -1.1567001342773438, 15.573104858398438, 7.407703399658203, 8.282093048095703, 11.976860046386719, 20.552658081054688, 17.416114807128906, 34.136566162109375, 2.7325592041015625, 6.186466217041016, 10.26220703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000071.npy"} +{"epoch": 0.10425844346549193, "step": 72, "batch_size": 64, "mean": 13.816070556640625, "std": 14.3781099319458, "min": -16.06719970703125, "p10": -2.5835136413574187, "median": 11.693943977355957, "p90": 34.9550491333008, "max": 60.88427734375, "pos_frac": 0.890625, "sample": [19.25402069091797, 16.019384384155273, 6.416450500488281, 14.029388427734375, 6.2328338623046875, 4.14979362487793, 9.550338745117188, 28.28662109375, -16.06719970703125, 27.83795166015625, 16.720638275146484, 15.382070541381836, 8.083620071411133, 0.7740478515625, 18.391132354736328, 10.45346450805664, 14.235748291015625, 29.031814575195312, 16.53284454345703, 36.64581298828125, 42.227447509765625, -4.227685928344727, 7.540809631347656, 11.36882209777832, 18.18042755126953, 3.4405593872070312, 20.103973388671875, 10.957527160644531, 12.019065856933594, 18.52965545654297, 3.3009109497070312, 5.7960205078125, 12.49102783203125, -15.069808959960938, 37.88615417480469, 31.009933471679688, 21.256057739257812, 7.658658981323242, 23.578125, -4.108863830566406, 9.895801544189453, 44.70814514160156, 60.88427734375, -7.480140686035156, 10.516691207885742, 25.807327270507812, 8.080062866210938, 8.633068084716797, 1.1737384796142578, 38.50825500488281, -12.3619384765625, 4.251438140869141, 2.5411224365234375, 17.95770263671875, 15.271907806396484, 20.340850830078125, 10.816150665283203, 16.872390747070312, 40.09521484375, 12.559358596801758, 6.894725799560547, 4.282623291015625, -4.022468566894531, 2.1026268005371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000072.npy"} +{"epoch": 0.10572687224669604, "step": 73, "batch_size": 64, "mean": 13.781963348388672, "std": 20.36886215209961, "min": -28.773956298828125, "p10": -10.054238128662108, "median": 10.669946670532227, "p90": 36.3125358581543, "max": 72.32838439941406, "pos_frac": 0.78125, "sample": [32.891021728515625, -16.087936401367188, 8.15865707397461, 16.390457153320312, 7.589086532592773, 39.04303741455078, -7.740203857421875, 26.679183959960938, 7.8488311767578125, -0.553619384765625, 31.03955078125, 35.91743469238281, 27.023818969726562, -1.3108062744140625, 0.164794921875, -17.75445556640625, 62.028533935546875, -2.1314926147460938, 11.707555770874023, 20.888458251953125, 17.82680892944336, 8.054458618164062, 10.799690246582031, 1.4093818664550781, 2.422008514404297, 10.540203094482422, -19.824695587158203, 11.242080688476562, 33.697357177734375, 16.86540985107422, 14.137619018554688, 3.744943618774414, 62.453216552734375, -12.566787719726562, 4.9406890869140625, 33.84361267089844, 32.51075744628906, -10.423866271972656, 5.147859573364258, 2.2577877044677734, -3.0493621826171875, -6.61297607421875, -11.238006591796875, 7.496612548828125, 17.808853149414062, 17.692901611328125, 2.912261962890625, 5.899587631225586, 22.97028350830078, 46.34474182128906, 16.550994873046875, 6.728275299072266, 23.89788818359375, 6.242961883544922, 16.012176513671875, -9.1917724609375, 15.27899169921875, 62.66172790527344, 30.223648071289062, 72.32838439941406, 3.4063987731933594, 36.48186492919922, 19.102767944335938, -28.773956298828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000073.npy"} +{"epoch": 0.10719530102790015, "step": 74, "batch_size": 64, "mean": 15.132284164428711, "std": 22.839929580688477, "min": -42.27965545654297, "p10": -3.5770503997802727, "median": 11.42477798461914, "p90": 34.982641601562506, "max": 136.45916748046875, "pos_frac": 0.796875, "sample": [28.31018829345703, 9.908195495605469, 14.735977172851562, 0.003437042236328125, 26.01892852783203, 8.08060073852539, 11.364250183105469, 60.376441955566406, 2.403644561767578, 18.723350524902344, 17.024879455566406, 10.749216079711914, 21.426509857177734, 2.126129150390625, 17.95672607421875, 136.45916748046875, 9.2706298828125, 33.23310089111328, 16.439876556396484, 3.619171142578125, 1.7146186828613281, 55.90876770019531, 4.1660003662109375, 35.732444763183594, 28.005393981933594, 11.025238037109375, -3.9658164978027344, -0.7055206298828125, -0.5548057556152344, -20.271026611328125, 15.782886505126953, -2.0768814086914062, 0.0077056884765625, 10.327667236328125, -42.27965545654297, 22.280357360839844, -2.6699295043945312, 28.582698822021484, 5.329959869384766, -4.757488250732422, -12.024948120117188, 11.485305786132812, 20.418731689453125, 24.689987182617188, -0.2878875732421875, 50.184234619140625, 15.618255615234375, 2.89111328125, 10.612411499023438, 9.330867767333984, 11.795063018798828, 39.304290771484375, 20.16973114013672, 25.871795654296875, 41.357154846191406, 16.986618041992188, 5.10713005065918, 19.24950408935547, 30.171966552734375, 29.142333984375, -0.5223846435546875, -4.995094299316406, -4.017608642578125, 16.11459732055664], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000074.npy"} +{"epoch": 0.10866372980910426, "step": 75, "batch_size": 64, "mean": 21.468948364257812, "std": 21.70078468322754, "min": -13.97723388671875, "p10": -3.0400632858276344, "median": 17.066402435302734, "p90": 52.788874816894534, "max": 78.84217834472656, "pos_frac": 0.859375, "sample": [-0.7579669952392578, 27.134475708007812, 18.273757934570312, 15.658042907714844, 19.10326385498047, 13.096332550048828, 11.179420471191406, 7.683586120605469, 50.19306945800781, 73.18084716796875, 33.08892059326172, 16.349878311157227, -8.318443298339844, 29.865859985351562, 78.84217834472656, 16.765949249267578, 44.225860595703125, 66.81253051757812, 10.366172790527344, -0.6760120391845703, 46.767791748046875, 26.613601684570312, 17.15314483642578, 10.923103332519531, 15.064620971679688, 52.07664489746094, 53.0941162109375, 3.612110137939453, 9.261268615722656, 33.31418228149414, 19.262710571289062, 18.70615005493164, 4.1413726806640625, 70.39305114746094, -6.079265594482422, -4.018104553222656, 27.261367797851562, -13.97723388671875, 22.13128662109375, 21.87777328491211, 7.601047515869141, 13.610748291015625, -9.576698303222656, 9.186859130859375, -7.960796356201172, 31.179763793945312, 3.2708072662353516, 13.457260131835938, 42.618377685546875, 28.4334716796875, 4.223934173583984, -7.154998779296875, 9.388154983520508, 7.230358123779297, 17.437335968017578, 5.8655853271484375, 7.2853851318359375, 25.832305908203125, 16.979660034179688, 18.764617919921875, 40.051918029785156, 63.242462158203125, 66.06982421875, 17.32782745361328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000075.npy"} +{"epoch": 0.11013215859030837, "step": 76, "batch_size": 64, "mean": 13.827641487121582, "std": 16.98805046081543, "min": -47.10325622558594, "p10": -2.9663013458251943, "median": 13.02697467803955, "p90": 32.70941276550293, "max": 72.02825927734375, "pos_frac": 0.84375, "sample": [14.402002334594727, 6.631404876708984, 2.7507076263427734, 25.86962890625, 4.132087707519531, 17.821197509765625, 9.426788330078125, 29.423492431640625, 32.50593948364258, 15.101051330566406, 17.31999969482422, 22.9202880859375, 10.264041900634766, 34.26305389404297, 4.245750427246094, 5.731576919555664, 9.360721588134766, 72.02825927734375, 36.901329040527344, 6.186164855957031, -7.727622985839844, 13.142671585083008, -0.17510414123535156, 13.4754638671875, -16.392623901367188, 7.074518203735352, 6.120288848876953, 16.854644775390625, 6.578453063964844, 7.831806182861328, 32.31599426269531, 18.22161102294922, 23.543800354003906, 9.682598114013672, 29.662643432617188, 4.480556488037109, -3.4132652282714844, 28.477264404296875, 3.9246978759765625, -11.896217346191406, 7.714160919189453, 2.674741744995117, -6.805604934692383, -8.8187255859375, 17.792076110839844, 32.79661560058594, 52.5496826171875, 30.779022216796875, 9.233997344970703, 6.03448486328125, 13.456306457519531, -1.9233856201171875, 16.921920776367188, 19.277206420898438, 17.22779083251953, 12.707252502441406, -0.6357612609863281, 15.2730712890625, 38.14142608642578, 25.27729034423828, 24.809005737304688, 35.61077880859375, 12.911277770996094, -47.10325622558594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000076.npy"} +{"epoch": 0.11160058737151249, "step": 77, "batch_size": 64, "mean": 22.32929039001465, "std": 23.605266571044922, "min": -44.575531005859375, "p10": -0.009384536743162641, "median": 22.88666534423828, "p90": 59.33101196289063, "max": 81.01947021484375, "pos_frac": 0.890625, "sample": [18.13298797607422, 45.66388702392578, 32.752750396728516, 44.03402328491211, 44.95167541503906, -0.6109237670898438, 63.406280517578125, 23.188640594482422, 9.332284927368164, 10.851520538330078, 6.806142807006836, 37.338409423828125, 41.15558624267578, 6.380466461181641, 4.2620086669921875, 28.37572479248047, -44.575531005859375, 23.718482971191406, 38.38549041748047, 33.368019104003906, 14.692405700683594, -2.961223602294922, 10.796504974365234, 1.3942070007324219, 4.485618591308594, 59.4029541015625, 3.487842559814453, 29.17028045654297, 12.362350463867188, -5.814794540405273, 33.68381118774414, 30.148414611816406, 13.793848037719727, 69.47102355957031, 34.897003173828125, 10.347427368164062, 62.120452880859375, 81.01947021484375, 22.074989318847656, 14.3050537109375, -35.20143127441406, 8.466323852539062, -17.05352783203125, 3.228893280029297, 13.114439010620117, 27.029571533203125, 32.89910125732422, 26.02972984313965, 27.749046325683594, 27.39019012451172, 8.250839233398438, -7.718109130859375, 5.085981369018555, 27.229759216308594, 29.592124938964844, 23.369155883789062, 4.567596435546875, 60.40936279296875, 71.66456604003906, 12.969547271728516, 25.24825668334961, 59.16314697265625, 22.58469009399414, 7.209747314453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000077.npy"} +{"epoch": 0.1130690161527166, "step": 78, "batch_size": 64, "mean": 17.180259704589844, "std": 20.03946876525879, "min": -23.506576538085938, "p10": -3.5817176818847654, "median": 16.84520721435547, "p90": 44.3201026916504, "max": 90.25717163085938, "pos_frac": 0.796875, "sample": [0.7560653686523438, 2.0338134765625, 43.148406982421875, 18.364959716796875, -3.006500244140625, 48.69624328613281, 17.636594772338867, 90.25717163085938, 27.566162109375, 23.42013168334961, 21.720401763916016, 8.661457061767578, -5.03076171875, 15.09321403503418, 22.541088104248047, 21.042041778564453, -0.3341827392578125, 9.525323867797852, -5.297096252441406, 3.5680999755859375, 7.036409378051758, 18.671907424926758, 27.56399917602539, 25.639930725097656, 20.608577728271484, 15.389995574951172, 18.840717315673828, 18.40875244140625, 26.158096313476562, 6.249654769897461, 44.82225799560547, 16.887697219848633, 6.560981750488281, -11.479549407958984, 27.671165466308594, -11.230575561523438, 57.26739501953125, 22.213531494140625, 46.16477966308594, -1.932464599609375, 39.08626937866211, 4.102210998535156, 51.975616455078125, 34.288883209228516, 31.41309356689453, -1.2860374450683594, 11.881744384765625, 3.9230728149414062, 13.597698211669922, 8.07745361328125, 6.01078987121582, -1.232156753540039, 16.802717208862305, -3.3173751831054688, 32.79240417480469, -23.506576538085938, 54.251007080078125, 4.426837921142578, -19.302085876464844, 27.147323608398438, -3.69500732421875, 38.776031494140625, 2.7525634765625, 28.694259643554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000078.npy"} +{"epoch": 0.1145374449339207, "step": 79, "batch_size": 64, "mean": 18.81760025024414, "std": 24.413606643676758, "min": -24.213485717773438, "p10": -7.344851303100585, "median": 13.980016708374023, "p90": 53.02166137695314, "max": 82.12332153320312, "pos_frac": 0.765625, "sample": [-7.9947052001953125, 56.17363739013672, 6.477750778198242, 73.21742248535156, 4.394950866699219, 0.3094635009765625, -2.391073226928711, -11.799942016601562, 45.09690856933594, 38.816131591796875, 19.276168823242188, -24.213485717773438, 25.462841033935547, 74.54214477539062, 9.391304016113281, 27.44219970703125, 77.51028442382812, 55.00885009765625, 17.801660537719727, -11.2138671875, 82.12332153320312, 17.372920989990234, -4.2426300048828125, -12.58941650390625, 30.441238403320312, 1.408651351928711, 0.0367431640625, 17.34609603881836, 14.617610931396484, 65.95507049560547, 2.5322952270507812, 9.008466720581055, 13.342422485351562, 28.287113189697266, -12.069488525390625, 10.659782409667969, -0.27660369873046875, 7.800773620605469, 45.22723388671875, 44.29402160644531, 21.470882415771484, 34.27330017089844, 35.9044189453125, 20.573909759521484, 28.32660675048828, -5.562044143676758, 2.929229736328125, 33.80241394042969, 39.764366149902344, 11.480634689331055, 16.119417190551758, -5.828525543212891, -1.3932151794433594, 41.10694885253906, -0.993865966796875, 23.4757080078125, 5.399051666259766, 6.779638290405273, 0.02741241455078125, -10.012939453125, 7.669780731201172, 48.3848876953125, -4.309787750244141, 20.35382080078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000079.npy"} +{"epoch": 0.11600587371512482, "step": 80, "batch_size": 64, "mean": 17.45241928100586, "std": 25.401348114013672, "min": -26.21485137939453, "p10": -8.462450408935545, "median": 12.678586959838867, "p90": 46.489024353027354, "max": 103.24301147460938, "pos_frac": 0.78125, "sample": [26.528656005859375, 9.154792785644531, -6.3976593017578125, 25.261329650878906, 6.798360824584961, 3.8915328979492188, -3.4553260803222656, -10.232154846191406, 34.849456787109375, 0.78094482421875, 4.393182754516602, 17.270401000976562, -17.037979125976562, -21.705596923828125, -8.761024475097656, 24.21019744873047, 103.24301147460938, 40.240501403808594, 88.2821044921875, 48.11095428466797, 26.328765869140625, 7.3935699462890625, 43.870582580566406, 47.61121368408203, 36.30479431152344, 16.466564178466797, -1.3803176879882812, 35.65776062011719, 15.38949203491211, 9.230819702148438, 3.7678680419921875, -18.169761657714844, 4.673778533935547, -7.765777587890625, 26.914962768554688, -26.21485137939453, 61.96168518066406, 32.46380615234375, -9.115848541259766, 94.2130126953125, 10.677335739135742, 3.7645721435546875, 3.142498016357422, 2.2445831298828125, 48.13788604736328, 13.786396026611328, 24.828285217285156, 32.83238220214844, 17.361499786376953, 11.570777893066406, -3.909130096435547, -3.029794692993164, 1.0920467376708984, 7.502922058105469, 24.03880500793457, 23.2154541015625, 6.778236389160156, 3.4544830322265625, 33.899566650390625, 26.728561401367188, 13.918853759765625, 40.521881103515625, -5.448339462280273, 14.817245483398438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000080.npy"} +{"epoch": 0.11747430249632893, "step": 81, "batch_size": 64, "mean": 24.351259231567383, "std": 29.816463470458984, "min": -32.436187744140625, "p10": -9.43324737548828, "median": 20.669248580932617, "p90": 61.09842529296875, "max": 126.4140625, "pos_frac": 0.84375, "sample": [46.223548889160156, 36.08807373046875, -32.436187744140625, 41.802818298339844, 26.641773223876953, 63.50971984863281, 21.595989227294922, -13.18533706665039, 7.567846298217773, 34.20857620239258, 33.333160400390625, -23.722747802734375, 3.1323699951171875, 26.72747039794922, 94.0321044921875, 11.434906005859375, 25.875869750976562, 39.36775207519531, 61.285797119140625, -17.254859924316406, 1.3282527923583984, 10.331466674804688, 21.762470245361328, 75.95185852050781, 24.939125061035156, 12.250293731689453, 17.574066162109375, 8.100479125976562, 7.668081283569336, -8.694923400878906, 27.30707550048828, 8.041412353515625, 5.4796905517578125, 13.467658996582031, 30.291748046875, 126.4140625, 44.479007720947266, 19.742507934570312, 2.71868896484375, 60.661224365234375, 44.38433837890625, 51.86328125, 16.686599731445312, -1.20220947265625, 4.331798553466797, -2.854015350341797, -20.37094497680664, 15.576652526855469, 13.958770751953125, 31.880996704101562, 16.11886978149414, 13.54484748840332, 21.986679077148438, -16.44585418701172, 40.84547424316406, 48.65666198730469, 113.32733154296875, 63.27455139160156, 10.20257568359375, 38.52271270751953, 2.6712493896484375, 29.846527099609375, -9.749671936035156, 35.38047790527344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000081.npy"} +{"epoch": 0.11894273127753303, "step": 82, "batch_size": 64, "mean": 19.267547607421875, "std": 31.920446395874023, "min": -64.02735900878906, "p10": -17.288195800781246, "median": 16.9479923248291, "p90": 53.92407150268555, "max": 127.28945922851562, "pos_frac": 0.796875, "sample": [-9.750221252441406, 34.86468505859375, -21.040687561035156, 19.172801971435547, 22.066165924072266, 19.714120864868164, -27.763145446777344, 16.26761245727539, 26.744354248046875, 59.89954376220703, 73.34524536132812, 13.848930358886719, -14.49169921875, 9.711151123046875, 16.535654067993164, 112.74763488769531, 13.544136047363281, -55.79216003417969, -9.959213256835938, 36.383941650390625, 2.5497360229492188, 33.75566101074219, 53.585121154785156, 4.551229476928711, -18.4866943359375, -7.573822021484375, 4.159309387207031, 25.557235717773438, 21.825531005859375, 13.055648803710938, 21.50812530517578, 38.49835205078125, 39.860870361328125, -0.9152736663818359, 78.16015625, 20.556594848632812, 54.0693359375, 16.830135345458984, 10.015348434448242, 6.9850921630859375, -2.1306686401367188, 31.530426025390625, 64.56402587890625, -20.581314086914062, 30.68169403076172, 22.99687957763672, 7.172384262084961, 48.87710189819336, 37.881996154785156, -24.232864379882812, 10.322921752929688, 7.777008056640625, -64.02735900878906, 9.081695556640625, 34.8350830078125, 22.734176635742188, 5.299980163574219, 27.832130432128906, 17.06584930419922, 127.28945922851562, 16.405723571777344, 18.402265548706055, 0.058349609375, 48.68950653076172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000082.npy"} +{"epoch": 0.12041116005873716, "step": 83, "batch_size": 64, "mean": 22.84069061279297, "std": 28.214187622070312, "min": -27.526336669921875, "p10": -10.007940673828124, "median": 18.271611213684082, "p90": 67.90889434814456, "max": 91.37033081054688, "pos_frac": 0.8125, "sample": [22.333282470703125, 26.330059051513672, 17.455020904541016, 72.20626831054688, 49.002281188964844, 13.231971740722656, -7.9163360595703125, 75.40338134765625, 62.321380615234375, 4.619537353515625, 13.132110595703125, 18.077974319458008, 1.5877838134765625, -5.7126617431640625, 19.31597900390625, 74.33956909179688, -15.591552734375, 91.37033081054688, 11.65838623046875, 77.77008819580078, 42.41387939453125, 12.887504577636719, 52.80116271972656, -21.51507568359375, -27.526336669921875, -10.748725891113281, 58.084014892578125, -22.61536407470703, 17.96187400817871, 70.30354309082031, 10.622169494628906, 16.0955810546875, 5.57771110534668, 0.0003204345703125, 27.27342987060547, 12.845588684082031, 27.434894561767578, -0.09157943725585938, 43.12648010253906, 14.959842681884766, 22.369232177734375, 21.16701889038086, 47.3565673828125, 76.751953125, 20.11516571044922, 51.59405517578125, 5.4237823486328125, -20.443328857421875, -2.5941925048828125, 2.0493545532226562, -25.534255981445312, 55.684417724609375, 9.561225891113281, 44.997039794921875, 26.904945373535156, 26.02558135986328, -8.279441833496094, 7.803638458251953, 19.70560073852539, 34.63159942626953, 44.42131805419922, 18.465248107910156, 29.65215301513672, 3.149749755859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000083.npy"} +{"epoch": 0.12187958883994127, "step": 84, "batch_size": 64, "mean": 19.657663345336914, "std": 21.597183227539062, "min": -46.52813720703125, "p10": -1.4343183517456053, "median": 17.270617485046387, "p90": 45.266647338867195, "max": 88.32102966308594, "pos_frac": 0.84375, "sample": [7.3150634765625, 17.35345458984375, 3.456817626953125, 5.25990104675293, 49.4776611328125, 5.989471435546875, 6.3529815673828125, 21.61016845703125, 17.187780380249023, 9.458877563476562, 31.579181671142578, 37.98241424560547, -21.702041625976562, 13.824951171875, 22.481311798095703, 31.630950927734375, 10.827678680419922, 88.32102966308594, 32.15102005004883, 3.470062255859375, 6.765953063964844, 30.260513305664062, 6.448844909667969, 6.622707366943359, -1.1233863830566406, 44.0552978515625, 27.15888214111328, 31.627243041992188, -46.52813720703125, -1.4943294525146484, 55.43086242675781, 2.9230422973632812, 38.166358947753906, 16.317481994628906, 19.867996215820312, 9.576240539550781, 50.010719299316406, 45.785797119140625, 32.82057189941406, 21.511825561523438, 2.9722061157226562, 33.17200469970703, -4.782358169555664, 17.101032257080078, 29.828369140625, 30.85129165649414, -1.2942924499511719, -4.347932815551758, 35.17631530761719, 34.41864013671875, -2.6319808959960938, 43.586883544921875, 58.22688293457031, 20.476905822753906, 63.732696533203125, 15.91595458984375, 32.71379852294922, 8.672439575195312, -18.935287475585938, 32.2376708984375, 13.108604431152344, 20.118820190429688, 7.695903778076172, -0.15729141235351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000084.npy"} +{"epoch": 0.12334801762114538, "step": 85, "batch_size": 64, "mean": 20.310611724853516, "std": 25.771699905395508, "min": -32.311676025390625, "p10": -6.0521911621093745, "median": 18.26822853088379, "p90": 60.832456207275406, "max": 73.691650390625, "pos_frac": 0.71875, "sample": [44.640159606933594, 14.318824768066406, 30.83112335205078, 3.819368362426758, -2.4739990234375, 23.315975189208984, 29.686416625976562, 1.9911003112792969, 62.63123321533203, 9.733085632324219, -2.3899784088134766, 5.160430908203125, 67.89334106445312, 27.45557403564453, 25.613080978393555, 22.29895782470703, -4.218505859375, -18.86394500732422, 19.963943481445312, 0.39847564697265625, -32.311676025390625, 73.691650390625, 20.47577667236328, -2.642742156982422, -5.6969146728515625, 46.918067932128906, 7.203330993652344, -9.088134765625, 18.97191619873047, 11.148590087890625, 49.73353576660156, -6.2044525146484375, -17.88946533203125, -1.2958412170410156, 14.6390380859375, 5.267644882202148, 66.65660095214844, 17.56454086303711, 41.2265625, -0.18906593322753906, -4.484651565551758, 72.0872802734375, -1.8493518829345703, 47.993560791015625, 62.162811279296875, 15.769405364990234, 57.728294372558594, 70.73646545410156, 11.340835571289062, 5.790159225463867, 34.66230010986328, 52.147804260253906, -1.0893096923828125, -8.62994384765625, 19.995193481445312, 19.413286209106445, 54.81419372558594, 22.97698211669922, 30.70764923095703, 37.97896957397461, -23.105560302734375, 19.76754379272461, -1.069427490234375, 44.051048278808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000085.npy"} +{"epoch": 0.12481644640234948, "step": 86, "batch_size": 64, "mean": 21.457523345947266, "std": 39.57339859008789, "min": -60.85264587402344, "p10": -16.118485641479488, "median": 15.523366928100586, "p90": 68.46669921875002, "max": 151.60061645507812, "pos_frac": 0.734375, "sample": [-13.710289001464844, 19.785301208496094, 70.87008666992188, -21.591629028320312, 36.259246826171875, 31.779708862304688, 28.334482192993164, -2.2919235229492188, 109.68243408203125, 11.942821502685547, 6.7793121337890625, -6.630435943603516, 0.820037841796875, 22.421707153320312, -8.932693481445312, 20.503433227539062, 50.417083740234375, 20.148727416992188, -3.5851478576660156, -55.731842041015625, 63.553497314453125, 23.358959197998047, 32.70528793334961, 11.543903350830078, 10.122398376464844, -17.150569915771484, -4.604576110839844, 70.57235717773438, -5.0691680908203125, 8.628934860229492, 39.839359283447266, 19.103912353515625, 8.118936538696289, -60.85264587402344, 48.24183654785156, 56.45330810546875, 29.00225067138672, -43.103416442871094, 79.13706970214844, 3.2117977142333984, 151.60061645507812, 19.48990249633789, 26.513458251953125, 11.822681427001953, -5.7925872802734375, 133.8702392578125, 6.935733795166016, 127.60360717773438, 49.69744873046875, -7.35205078125, -33.10346221923828, 40.42194366455078, 9.831554412841797, 34.9095458984375, 30.786758422851562, 35.91362380981445, 38.25819396972656, 6.416904449462891, 1.6532058715820312, 4.115016937255859, -5.393854141235352, -19.171485900878906, 3.2431182861328125, 20.927444458007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000086.npy"} +{"epoch": 0.1262848751835536, "step": 87, "batch_size": 64, "mean": 25.20366859436035, "std": 34.9445915222168, "min": -81.3453369140625, "p10": -8.596290588378904, "median": 21.08606719970703, "p90": 69.55859222412111, "max": 133.6138153076172, "pos_frac": 0.8125, "sample": [23.76860237121582, 38.12870788574219, 0.1604442596435547, 10.338556289672852, 38.769134521484375, -9.482131958007812, 31.9761962890625, -0.6967716217041016, 0.8503036499023438, -5.82171630859375, -26.191650390625, 48.44287109375, -6.045326232910156, 71.85343933105469, 32.759727478027344, 61.79052734375, 56.087974548339844, 11.327280044555664, 92.81832885742188, 62.531402587890625, 21.57555389404297, 21.169044494628906, 7.770589828491211, 44.470733642578125, 41.641441345214844, 2.082895278930664, 40.790992736816406, 133.6138153076172, 13.925209045410156, 2.5697174072265625, 13.906850814819336, 76.80270385742188, 61.60200500488281, 10.85195541381836, 59.68342590332031, 27.188629150390625, 15.110147476196289, 41.55826187133789, 21.003089904785156, 79.55210876464844, 0.9561557769775391, 84.01286315917969, 12.587150573730469, 20.735511779785156, -81.3453369140625, 12.055929183959961, 42.52861785888672, -35.806907653808594, 39.74170684814453, 74.32247924804688, -17.52032470703125, -18.044536590576172, 9.105770111083984, 20.633407592773438, -6.529327392578125, 64.20394897460938, 32.494598388671875, 35.92259216308594, -39.380950927734375, 26.030792236328125, 15.876548767089844, 1.1436138153076172, 53.92070007324219, -4.845256805419922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000087.npy"} +{"epoch": 0.1277533039647577, "step": 88, "batch_size": 64, "mean": 19.053417205810547, "std": 33.52738952636719, "min": -62.710205078125, "p10": -9.824423217773438, "median": 11.746100425720215, "p90": 62.039691162109406, "max": 135.19012451171875, "pos_frac": 0.765625, "sample": [10.342918395996094, 11.17022705078125, 66.85635375976562, 67.69534301757812, 35.35472869873047, 50.69169616699219, 4.008270263671875, 65.58047485351562, -9.805282592773438, 13.817499160766602, 35.71278381347656, 7.093955993652344, 2.0032196044921875, 50.04109191894531, -22.619003295898438, 32.75816345214844, -20.333282470703125, 16.21070098876953, -50.222198486328125, -62.710205078125, -5.289272308349609, 78.38034057617188, 40.450775146484375, -4.159416198730469, 46.077781677246094, 5.799715042114258, 29.752670288085938, -9.832626342773438, 23.698747634887695, 21.679275512695312, 42.083160400390625, 8.198188781738281, -1.6961784362792969, 27.062393188476562, 0.8258132934570312, 98.0433349609375, 9.512916564941406, 53.777862548828125, 5.247150421142578, 135.19012451171875, 5.642585754394531, 47.31306457519531, 3.9202117919921875, 14.181211471557617, 11.17437744140625, 26.196678161621094, 0.2919769287109375, -3.21661376953125, 36.19493103027344, 83.11990356445312, 36.15660858154297, 16.051071166992188, -0.376373291015625, 10.409568786621094, -54.9566650390625, -5.614715576171875, 11.071725845336914, -1.5190620422363281, 12.31782341003418, 14.731361389160156, 19.034208297729492, -21.342063903808594, 7.3952789306640625, 42.79138946533203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000088.npy"} +{"epoch": 0.12922173274596183, "step": 89, "batch_size": 64, "mean": 21.37381362915039, "std": 28.52877426147461, "min": -67.52581787109375, "p10": -2.294877815246582, "median": 17.34477424621582, "p90": 50.94893493652345, "max": 126.81071472167969, "pos_frac": 0.859375, "sample": [5.2503509521484375, 1.5705909729003906, 5.693029403686523, 21.293169021606445, -10.389225006103516, 52.45512390136719, 43.10863494873047, 16.583412170410156, -8.728340148925781, 60.62822723388672, 60.333091735839844, 23.88985824584961, 22.58483123779297, 47.43449401855469, 37.320579528808594, 30.92409896850586, 13.735458374023438, 10.272750854492188, -2.186067581176758, 19.889572143554688, 15.522834777832031, 3.382547378540039, 16.512523651123047, 11.934364318847656, -10.400405883789062, 1.150247573852539, 1.6400222778320312, 10.326303482055664, 78.32260131835938, 20.6448974609375, 30.19757843017578, 91.16220092773438, 17.451129913330078, 45.81641387939453, 22.294315338134766, 2.2387847900390625, 13.034698486328125, 126.81071472167969, 5.008335113525391, -2.6902942657470703, 29.914073944091797, -0.990509033203125, 23.199655532836914, 31.55556869506836, 15.969499588012695, 5.724504470825195, 10.281135559082031, 5.188442230224609, 4.05279541015625, 36.97407531738281, 22.28466796875, 41.579620361328125, -2.341510772705078, 17.238418579101562, 44.769309997558594, -67.52581787109375, -26.8101806640625, 28.74205780029297, 18.980792999267578, 24.068355560302734, 25.653961181640625, 24.893646240234375, 95.13606262207031, 7.361942291259766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000089.npy"} +{"epoch": 0.13069016152716592, "step": 90, "batch_size": 64, "mean": 29.19457244873047, "std": 43.23085403442383, "min": -54.22804260253906, "p10": -11.322014999389642, "median": 19.021102905273438, "p90": 94.59552001953126, "max": 153.40838623046875, "pos_frac": 0.8125, "sample": [69.64274597167969, -45.275062561035156, 30.534423828125, 96.49432373046875, 22.938953399658203, 37.20252990722656, 0.6296348571777344, -4.312568664550781, 60.677520751953125, -1.6740150451660156, 46.80181121826172, 18.55615997314453, 0.059604644775390625, 13.069839477539062, 39.829994201660156, 88.09457397460938, -5.582393646240234, 153.40838623046875, 55.02418518066406, 2.3401260375976562, 2.661092758178711, 10.793472290039062, 20.411277770996094, -13.781852722167969, 120.3668212890625, 17.42682647705078, 25.847793579101562, 126.70895385742188, 5.111869812011719, 24.673564910888672, 143.72174072265625, 23.422439575195312, -28.180908203125, 12.06500244140625, -5.411956787109375, -54.22804260253906, 28.607440948486328, 8.837440490722656, 65.24382019042969, 6.917366027832031, 15.483261108398438, 13.392860412597656, -22.642471313476562, -4.902599334716797, 23.007843017578125, 90.16497802734375, 102.59771728515625, -37.63182830810547, -32.795989990234375, 12.430564880371094, 38.738914489746094, 1.4065475463867188, 19.486045837402344, 38.056480407714844, 69.13296508789062, 33.91454315185547, 11.009637832641602, 11.104005813598633, 64.1973876953125, 17.466594696044922, 24.596900939941406, 17.826379776000977, 100.75027465820312, 41.986610412597656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000090.npy"} +{"epoch": 0.13215859030837004, "step": 91, "batch_size": 64, "mean": 23.98028564453125, "std": 35.40739822387695, "min": -34.80876159667969, "p10": -20.299038696289063, "median": 22.010177612304688, "p90": 71.69305725097661, "max": 125.91748046875, "pos_frac": 0.75, "sample": [19.22840118408203, 54.05583190917969, -34.80876159667969, 8.11587905883789, 76.00567626953125, 0.6335220336914062, -19.288330078125, 45.20963668823242, 3.4870471954345703, 21.358619689941406, 43.294158935546875, 41.26301574707031, 43.17798614501953, 14.540840148925781, -34.351539611816406, 113.35618591308594, -12.01153564453125, 2.2467193603515625, 9.270065307617188, 87.01615905761719, 10.103515625, 34.06309127807617, 125.91748046875, -18.109352111816406, 29.559547424316406, 56.77101135253906, -20.20153045654297, 3.563629150390625, 23.407127380371094, -5.2826690673828125, -20.873214721679688, 22.77654457092285, 61.630279541015625, -20.781051635742188, 18.333648681640625, 22.66173553466797, 58.838043212890625, -14.312156677246094, 21.211875915527344, -5.007902145385742, 9.657478332519531, 24.832107543945312, 60.981712341308594, 2.5627098083496094, -20.34082794189453, 39.060638427734375, 24.193374633789062, 82.08660888671875, 26.364479064941406, 43.66693115234375, 10.2630615234375, -26.07025909423828, 0.8597812652587891, 29.8421630859375, -2.103363037109375, 42.992576599121094, -0.5650672912597656, 87.4637451171875, 46.776893615722656, 24.73352813720703, 54.37721252441406, -27.04216766357422, 49.03242492675781, 85.04322052001953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000091.npy"} +{"epoch": 0.13362701908957417, "step": 92, "batch_size": 64, "mean": 24.30576515197754, "std": 34.64317321777344, "min": -49.610939025878906, "p10": -12.08062515258789, "median": 17.625999450683594, "p90": 59.92264404296875, "max": 130.342041015625, "pos_frac": 0.75, "sample": [-4.736431121826172, 6.051261901855469, 7.736442565917969, 50.00389862060547, -8.646575927734375, 6.230628967285156, -49.610939025878906, -16.57663345336914, 49.57014465332031, 25.22313690185547, 67.96310424804688, 0.39200592041015625, 45.790618896484375, 3.3288230895996094, -5.879783630371094, 114.86453247070312, 55.552024841308594, 130.342041015625, -42.61590576171875, -5.63311767578125, 26.557697296142578, -11.767768859863281, 28.946670532226562, -8.178218841552734, -19.34198760986328, 59.98042297363281, -12.214706420898438, 74.89144897460938, 31.744651794433594, 4.816656112670898, 75.17992401123047, -2.796192169189453, 35.96417999267578, 0.7165622711181641, 52.53321075439453, 8.771347045898438, 33.57599639892578, 109.60401916503906, 10.252046585083008, 59.224639892578125, 44.88542175292969, 59.78782653808594, -1.7420883178710938, -5.5070037841796875, 8.913005828857422, 26.021034240722656, 5.1680755615234375, 42.221649169921875, 36.0218505859375, 11.730024337768555, 0.4027843475341797, -13.000364303588867, 53.29384994506836, 16.520530700683594, 18.731468200683594, 13.741249084472656, 36.971466064453125, 53.53205871582031, 44.29435729980469, 25.1405029296875, 11.142030715942383, -13.35190200805664, 57.39692687988281, 35.44432830810547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000092.npy"} +{"epoch": 0.13509544787077826, "step": 93, "batch_size": 64, "mean": 24.499828338623047, "std": 30.791046142578125, "min": -37.346946716308594, "p10": -3.8429769515991197, "median": 16.755695343017578, "p90": 59.61588363647461, "max": 155.72198486328125, "pos_frac": 0.828125, "sample": [19.267318725585938, 9.343378067016602, 27.359664916992188, 64.92682647705078, 59.60235595703125, 56.848541259765625, 4.8021087646484375, 23.55547332763672, 47.383399963378906, -14.975082397460938, 77.50340270996094, 21.88225555419922, 52.88262176513672, 52.631858825683594, 155.72198486328125, 13.55496597290039, -2.2406692504882812, 3.769195556640625, 2.3040313720703125, 91.3476791381836, -8.642738342285156, 13.84383773803711, 22.866928100585938, -0.27208709716796875, 34.95494079589844, 10.569305419921875, 33.24121856689453, 5.764991760253906, -1.0840682983398438, 11.627677917480469, -0.7038326263427734, 14.899665832519531, 9.750799179077148, 19.4051513671875, 34.16194152832031, 54.17938232421875, 66.6478042602539, -24.3245849609375, -4.529680252075195, 52.783721923828125, 47.352210998535156, 35.57536697387695, 39.22203826904297, 6.460422515869141, 10.387775421142578, 8.871467590332031, 30.24164581298828, 2.76507568359375, 59.621681213378906, 1.9394264221191406, -7.2735595703125, 62.10602569580078, 6.006202697753906, 17.746734619140625, 5.982931137084961, 5.278331756591797, -18.725914001464844, 32.13774871826172, 47.12609100341797, 35.285179138183594, 48.76128387451172, 15.764656066894531, -37.346946716308594, 0.091461181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000093.npy"} +{"epoch": 0.13656387665198239, "step": 94, "batch_size": 64, "mean": 28.207868576049805, "std": 32.23759078979492, "min": -23.186920166015625, "p10": -7.498330497741699, "median": 24.410072326660156, "p90": 70.15250167846679, "max": 116.23855590820312, "pos_frac": 0.78125, "sample": [26.42154312133789, -18.4141845703125, 45.847373962402344, 20.281700134277344, 5.890106201171875, -15.121150970458984, 10.536937713623047, 61.4686164855957, 5.80534553527832, -21.208648681640625, 24.948434829711914, 12.205291748046875, 26.05998992919922, 7.308433532714844, 12.275482177734375, 0.4843330383300781, 60.46543884277344, 12.561534881591797, -7.194423675537109, 115.54400634765625, 5.444906234741211, 31.313213348388672, 32.932273864746094, 55.52000427246094, 82.42337036132812, 30.356918334960938, 14.561820983886719, 30.488792419433594, 22.334293365478516, -3.1695327758789062, -4.769720077514648, 17.49265480041504, 37.53179931640625, 63.3040771484375, 43.70991134643555, 20.14773941040039, 36.934967041015625, 70.2790298461914, 33.96900939941406, 23.8717098236084, 5.72172737121582, 46.11748504638672, -3.7518463134765625, -18.46282958984375, 69.85726928710938, 77.32168579101562, -7.628576278686523, 61.618408203125, 116.23855590820312, -6.863624572753906, -23.186920166015625, 90.13291931152344, 33.06597137451172, 59.92033386230469, -3.225341796875, 19.629119873046875, -14.918716430664062, -5.571502685546875, 37.35472869873047, 16.768051147460938, 36.8573112487793, 58.42106628417969, 74.3199462890625, 54.72483825683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000094.npy"} +{"epoch": 0.13803230543318648, "step": 95, "batch_size": 64, "mean": 22.786226272583008, "std": 30.680328369140625, "min": -75.20936584472656, "p10": -7.656877136230468, "median": 17.04152011871338, "p90": 66.72337646484377, "max": 95.77642822265625, "pos_frac": 0.8125, "sample": [16.6263427734375, 44.517295837402344, 5.301033020019531, 23.973899841308594, 20.31962776184082, 70.55743408203125, 9.307880401611328, 6.511499404907227, -8.06484603881836, 16.264808654785156, 19.942068099975586, 4.556800842285156, 35.495174407958984, 85.30485534667969, 52.06074523925781, -6.867668151855469, 38.834434509277344, 2.751798629760742, 15.804828643798828, 48.58637237548828, 23.416824340820312, 41.55912780761719, 20.03270149230957, 15.718050003051758, 10.124765396118164, -12.390968322753906, 10.888206481933594, 55.73796844482422, 21.32822036743164, 30.450164794921875, -10.497352600097656, -7.995109558105469, 55.70228576660156, 95.77642822265625, 79.52926635742188, 60.9071044921875, 37.32630157470703, 1.5162239074707031, -0.20384979248046875, 69.216064453125, 46.97641372680664, 84.18026733398438, 73.57237243652344, 4.9846954345703125, 14.944408416748047, 9.065452575683594, -1.4698982238769531, 32.67198944091797, 31.32799530029297, -75.20936584472656, 2.500734329223633, 2.181406021118164, -31.2464599609375, 47.260223388671875, 44.310150146484375, 13.504413604736328, 42.7747802734375, 11.700593948364258, 17.456697463989258, 17.99138641357422, -5.72093391418457, -3.1235828399658203, 15.97286605834961, -38.21495819091797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000095.npy"} +{"epoch": 0.1395007342143906, "step": 96, "batch_size": 64, "mean": 30.88555145263672, "std": 37.0584602355957, "min": -57.33744812011719, "p10": -0.38352718353271414, "median": 19.872764587402344, "p90": 79.21403121948244, "max": 161.4724884033203, "pos_frac": 0.890625, "sample": [65.58268737792969, 17.4354248046875, 38.51045227050781, 32.180118560791016, 19.894363403320312, 66.86395263671875, 6.2799835205078125, 2.816495895385742, 0.32781219482421875, 25.446533203125, 115.16505432128906, 17.14563751220703, 9.794715881347656, -4.072948455810547, 2.9446067810058594, 21.44650650024414, 30.03711700439453, 45.391868591308594, 87.72692108154297, 93.24031066894531, 9.388618469238281, 45.42852783203125, 38.24474334716797, 161.4724884033203, 76.07219696044922, 13.293720245361328, 19.851165771484375, 9.393081665039062, 16.80120849609375, 16.17113494873047, 9.422004699707031, 138.50711059570312, 33.634315490722656, 29.522010803222656, 6.4542999267578125, 90.88711547851562, 59.1224365234375, 6.800403594970703, 38.09928894042969, 1.6746749877929688, 49.293052673339844, 27.573148727416992, 23.6749267578125, -10.116340637207031, 15.499696731567383, 0.8283672332763672, -2.5232696533203125, 42.095359802246094, -1.4127349853515625, 26.07379913330078, 12.1845703125, 61.097503662109375, -14.810054779052734, 7.7621612548828125, 69.89892578125, 11.582015991210938, 18.14607048034668, 68.06767272949219, 80.56053161621094, 1.5726165771484375, -0.6883869171142578, 32.20427703857422, -57.33744812011719, 1.0505828857421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000096.npy"} +{"epoch": 0.14096916299559473, "step": 97, "batch_size": 64, "mean": 23.93172836303711, "std": 28.983230590820312, "min": -63.712669372558594, "p10": -8.451221656799314, "median": 23.027634620666504, "p90": 58.85814285278321, "max": 111.60845947265625, "pos_frac": 0.78125, "sample": [18.468971252441406, 51.926239013671875, 1.095245361328125, 1.4192733764648438, -4.739727020263672, -3.709003448486328, 17.471576690673828, 29.22523307800293, 2.364124298095703, 49.01054382324219, 24.31058120727539, 43.505950927734375, -0.8289318084716797, 18.963773727416992, 81.15130615234375, 63.41191101074219, -22.314979553222656, 35.60444641113281, 0.399932861328125, 13.969320297241211, 27.674041748046875, 7.001682281494141, 16.45315933227539, -22.33294677734375, 72.9034423828125, 20.45200538635254, 26.092056274414062, 31.462417602539062, 42.12318420410156, 15.038990020751953, 27.769187927246094, -5.655145645141602, 17.399642944335938, -11.86161994934082, 51.580894470214844, 43.79095458984375, 51.711509704589844, 30.741222381591797, -63.712669372558594, -9.781364440917969, 50.35163116455078, 32.65144348144531, -11.070205688476562, 8.32802963256836, 57.00322723388672, 36.361263275146484, 1.6541461944580078, 22.499248504638672, 23.556020736694336, 17.88970947265625, -4.928670883178711, 59.653106689453125, -9.649539947509766, 52.55946731567383, -2.122020721435547, 67.31004333496094, -1.431793212890625, 44.68359375, 49.32331848144531, 64.54582214355469, 33.751346588134766, 10.030143737792969, 111.60845947265625, 27.51641845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000097.npy"} +{"epoch": 0.14243759177679882, "step": 98, "batch_size": 64, "mean": 27.260852813720703, "std": 38.40777587890625, "min": -108.06094360351562, "p10": -13.337170410156247, "median": 29.30670166015625, "p90": 69.95939636230469, "max": 133.81820678710938, "pos_frac": 0.796875, "sample": [42.320709228515625, 5.840675354003906, 66.4588623046875, 44.99365997314453, 6.21092414855957, 21.40167808532715, 24.938629150390625, 28.056930541992188, 32.12144470214844, 7.642425537109375, 30.990755081176758, -9.695709228515625, 21.17330551147461, 24.69207000732422, 68.37516784667969, 52.37786102294922, -14.897796630859375, 1.818756103515625, -0.5528030395507812, -23.180931091308594, 41.79063034057617, 36.29889678955078, -31.058181762695312, -40.01727294921875, 56.481773376464844, 53.75618362426758, 32.287109375, 71.27775573730469, 70.63835144042969, 26.78922462463379, 41.42106628417969, -3.811767578125, 63.73918914794922, 133.81820678710938, -108.06094360351562, 84.93840026855469, 62.843231201171875, 28.230438232421875, 48.982391357421875, 19.016250610351562, -9.104839324951172, 40.75794982910156, -60.33821105957031, 26.70922088623047, 38.13836669921875, 3.7357139587402344, 40.613014221191406, 85.36416625976562, 62.31758117675781, 38.58755874633789, 16.365577697753906, 11.004493713378906, 76.86769104003906, 107.51681518554688, 36.13323211669922, 33.37163162231445, 30.382965087890625, 15.279672622680664, 3.9234466552734375, -8.99703598022461, 54.58131408691406, -3.7014617919921875, 3.807649612426758, -19.06938934326172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000098.npy"} +{"epoch": 0.14390602055800295, "step": 99, "batch_size": 64, "mean": 33.529537200927734, "std": 42.39036560058594, "min": -33.647193908691406, "p10": -8.1749885559082, "median": 22.441463470458984, "p90": 85.10894775390625, "max": 183.78843688964844, "pos_frac": 0.828125, "sample": [-0.7361507415771484, -9.336524963378906, 6.081796646118164, 21.696735382080078, 2.1112289428710938, 42.45964813232422, 27.878799438476562, 31.303390502929688, 111.08151245117188, 20.92119026184082, 25.467124938964844, 126.1180419921875, 6.838275909423828, -30.394546508789062, 85.21052551269531, -5.039817810058594, 22.337356567382812, 37.05744171142578, -2.1434059143066406, 18.35388946533203, 76.46780395507812, 16.19298553466797, 12.009923934936523, 161.922119140625, 10.174631118774414, 60.55049133300781, 2.5272750854492188, -25.611000061035156, 11.066804885864258, -33.647193908691406, -10.64788818359375, 55.81605529785156, 39.15570068359375, 27.937305450439453, 68.06257629394531, 183.78843688964844, 76.37918090820312, 38.342899322509766, 18.2242431640625, 21.171295166015625, 5.594169616699219, 30.261844635009766, 39.56413269042969, 84.87193298339844, 103.97652435302734, 7.57794189453125, 12.7412109375, 69.67399597167969, -5.4647369384765625, 0.6630535125732422, 33.84217834472656, 91.18692016601562, 31.66128158569336, 10.534000396728516, 58.47578430175781, -9.542388916015625, 43.02797317504883, 14.909395217895508, 70.01617431640625, 52.305015563964844, 22.545570373535156, 48.47614288330078, 4.080982208251953, -22.238868713378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000099.npy"} +{"epoch": 0.14537444933920704, "step": 100, "batch_size": 64, "mean": 19.546470642089844, "std": 36.68450164794922, "min": -77.53128051757812, "p10": -19.88119106292724, "median": 20.06147003173828, "p90": 63.9695899963379, "max": 147.4073486328125, "pos_frac": 0.765625, "sample": [56.73828125, 30.950937271118164, -6.6725006103515625, 15.752799987792969, 23.68604278564453, -9.184127807617188, 33.86321258544922, 26.436988830566406, 8.020082473754883, 43.439849853515625, 11.66412353515625, 1.1168441772460938, 38.964874267578125, 25.528518676757812, 4.158088684082031, 79.95195007324219, 20.038436889648438, -43.874603271484375, -33.675994873046875, -54.05145263671875, -14.558443069458008, -9.735023498535156, 0.5418834686279297, -64.6724853515625, 56.773929595947266, 20.084503173828125, 50.495338439941406, 24.165863037109375, 1.4858932495117188, 5.542970657348633, 44.459747314453125, -3.093109130859375, 45.781349182128906, 6.036243438720703, 68.66903686523438, 32.42768096923828, 8.875772476196289, 12.404953002929688, 61.729576110839844, 26.555145263671875, 41.20512008666992, 16.159523010253906, 8.271881103515625, 0.9214763641357422, 31.901947021484375, 35.99229431152344, -9.676773071289062, -27.049423217773438, -7.6540985107421875, 68.50006866455078, 37.083900451660156, 70.95934295654297, 24.97989845275879, -77.53128051757812, 147.4073486328125, -11.639547348022461, 16.561279296875, 82.20352172851562, -22.162368774414062, 64.92959594726562, 46.58087158203125, 7.666435241699219, 37.75226593017578, 20.78765869140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000100.npy"} +{"epoch": 0.14684287812041116, "step": 101, "batch_size": 64, "mean": 23.726482391357422, "std": 41.204681396484375, "min": -54.50536346435547, "p10": -20.304966354370116, "median": 18.834228515625, "p90": 86.542172241211, "max": 134.91500854492188, "pos_frac": 0.671875, "sample": [98.61103820800781, 134.91500854492188, 30.93022918701172, 30.677452087402344, -14.547348022460938, 104.59864044189453, 108.92880249023438, 10.652336120605469, 94.075927734375, 111.01719665527344, -36.232421875, 27.746078491210938, -40.486427307128906, 48.834716796875, 14.127130508422852, -7.944816589355469, -21.565673828125, 15.418733596801758, 63.51422882080078, 28.400665283203125, -15.980926513671875, 26.850753784179688, 54.85304260253906, 63.965232849121094, 18.890701293945312, 18.777755737304688, 9.55440902709961, 65.0494613647461, 120.429443359375, 25.8677978515625, -8.510055541992188, -6.437633514404297, -14.11674690246582, 52.1164436340332, 18.1091365814209, 38.13713073730469, 68.96340942382812, 45.69136047363281, -5.95135498046875, 6.439830780029297, -2.735198974609375, -17.36331558227539, 20.105693817138672, -6.905570983886719, -0.3747100830078125, -15.595651626586914, 35.80188751220703, 32.460201263427734, 8.871944427490234, 26.102495193481445, -54.50536346435547, 35.64099884033203, 2.0473175048828125, -31.343463897705078, 25.935955047607422, 18.07001495361328, -15.631938934326172, 39.50271987915039, 7.665290832519531, -1.7652912139892578, 52.04615783691406, -29.158382415771484, 40.483680725097656, -35.23127746582031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000101.npy"} +{"epoch": 0.14831130690161526, "step": 102, "batch_size": 64, "mean": 17.46571159362793, "std": 39.091835021972656, "min": -79.61849975585938, "p10": -22.37333831787109, "median": 10.995735168457031, "p90": 57.55325317382813, "max": 140.27505493164062, "pos_frac": 0.65625, "sample": [3.2738609313964844, 40.716041564941406, 58.453514099121094, 11.78662109375, 3.626983642578125, 5.870094299316406, -16.68017578125, -45.423919677734375, 22.14699363708496, -76.70208740234375, 28.827957153320312, 88.55770874023438, -11.785076141357422, 38.6180419921875, 28.861949920654297, -9.279325485229492, 10.204849243164062, 35.03912353515625, 50.96240997314453, 2.0311851501464844, -16.659896850585938, -4.55653190612793, 48.65464782714844, -4.967267990112305, 59.88226318359375, 22.698711395263672, 38.592933654785156, 140.27505493164062, -3.9162826538085938, -0.6503753662109375, 113.39310455322266, -0.37514495849609375, 37.7563591003418, 42.639007568359375, 4.012275695800781, -17.196510314941406, 32.55870056152344, -79.61849975585938, 1.638214111328125, -28.701766967773438, 0.9338226318359375, -44.50489807128906, -23.266357421875, -4.44244384765625, 55.45264434814453, 30.9725341796875, 42.624229431152344, 83.16188049316406, 43.99525451660156, 6.451629638671875, 70.58580780029297, -27.291915893554688, -6.812553405761719, 5.957637786865234, 53.14813232421875, -20.289627075195312, 39.88365936279297, 34.81810760498047, 24.917531967163086, 37.20911407470703, -13.141555786132812, -16.28406524658203, 41.69102478027344, 47.4702033996582], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000102.npy"} +{"epoch": 0.14977973568281938, "step": 103, "batch_size": 64, "mean": 35.02385330200195, "std": 42.719722747802734, "min": -38.652313232421875, "p10": -11.033242034912108, "median": 27.810401916503906, "p90": 82.2153060913086, "max": 182.66400146484375, "pos_frac": 0.796875, "sample": [-6.147697448730469, 7.9886474609375, 14.065181732177734, 34.273826599121094, -21.068283081054688, -25.463546752929688, 34.36381149291992, 39.30308532714844, 14.213783264160156, 19.908702850341797, 28.339385986328125, 45.07232666015625, 16.861572265625, 19.392189025878906, -0.9474334716796875, 66.92534637451172, 111.82949829101562, 35.437400817871094, 49.75343322753906, 61.86951446533203, -35.87870788574219, -11.131072998046875, 48.923095703125, 26.3994140625, -38.652313232421875, 75.5114517211914, 36.43000793457031, -17.410751342773438, 14.959815979003906, 19.20660400390625, 39.24109649658203, 168.00119018554688, 26.509368896484375, 20.432453155517578, 71.70558166503906, 67.5885238647461, -6.100372314453125, -10.804969787597656, 21.656587600708008, 17.91209602355957, -5.0517425537109375, 40.53813934326172, 64.20623016357422, -28.53826904296875, 23.84051513671875, 100.65840911865234, 80.49169921875, 27.281417846679688, 182.66400146484375, 101.21188354492188, 105.32128143310547, 82.95399475097656, 12.912353515625, 41.224586486816406, 68.19145202636719, -1.5090904235839844, 32.377418518066406, 4.310441970825195, 3.953197479248047, 50.46277618408203, 79.1241683959961, 16.499736785888672, 32.412384033203125, 45.51972198486328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000103.npy"} +{"epoch": 0.1512481644640235, "step": 104, "batch_size": 64, "mean": 33.20488739013672, "std": 48.7789192199707, "min": -65.19480895996094, "p10": -12.749076271057127, "median": 19.275466918945312, "p90": 92.76042938232422, "max": 191.7330322265625, "pos_frac": 0.75, "sample": [92.72117614746094, -7.2371978759765625, 14.162010192871094, 14.951263427734375, 62.29191589355469, -5.756782531738281, -29.009628295898438, 3.1871490478515625, -3.4878082275390625, 39.379364013671875, -13.266847610473633, 35.201358795166016, 152.31777954101562, 52.413429260253906, 86.41350555419922, -2.823333740234375, 92.77725219726562, 33.602813720703125, -17.78491973876953, 117.25363159179688, 39.27598571777344, 5.0888824462890625, 19.391422271728516, 7.319236755371094, 15.520940780639648, 2.170867919921875, 191.7330322265625, -47.68230438232422, 58.152320861816406, 136.41116333007812, 138.77210998535156, 30.235610961914062, 13.237398147583008, 66.31442260742188, 12.620485305786133, 14.730087280273438, -11.540943145751953, -22.675308227539062, 115.57476806640625, 16.44647216796875, 51.12366485595703, -7.728057861328125, 69.73475646972656, -3.927398681640625, 85.81184387207031, 38.44879150390625, 31.520103454589844, 52.162899017333984, 37.821014404296875, 30.613571166992188, 3.990762710571289, 91.16230773925781, 7.709266662597656, 28.5931396484375, 61.39970397949219, 67.65904998779297, -7.461236953735352, 28.19187355041504, -0.7981300354003906, 19.15951156616211, -23.702537536621094, 1.5627326965332031, -65.19480895996094, 8.857025146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000104.npy"} +{"epoch": 0.1527165932452276, "step": 105, "batch_size": 64, "mean": 47.245811462402344, "std": 42.50857925415039, "min": -69.57260131835938, "p10": 2.6169235229492207, "median": 45.290687561035156, "p90": 102.3254058837891, "max": 178.41539001464844, "pos_frac": 0.90625, "sample": [49.417755126953125, 32.08836364746094, 46.201026916503906, 29.724197387695312, 42.776092529296875, 45.43657684326172, 73.86967468261719, 111.6141128540039, 10.081838607788086, -69.57260131835938, -6.601127624511719, 83.31201171875, 83.32687377929688, 13.71388053894043, 35.422515869140625, 48.94510269165039, -32.931453704833984, 1.8159332275390625, -6.0894775390625, 109.63358306884766, 32.863250732421875, 109.13957214355469, 71.09291076660156, 20.33587646484375, 11.845321655273438, 82.1181411743164, 63.12397766113281, 45.144798278808594, 9.533708572387695, 37.91443634033203, 68.181396484375, 168.99819946289062, 20.29419708251953, 51.57464599609375, 17.48809051513672, 19.328880310058594, 71.32295227050781, 92.38270568847656, 48.253936767578125, 17.30838394165039, 51.20341491699219, 92.8331298828125, 35.869964599609375, 37.992828369140625, 44.68278503417969, 53.06544494628906, -0.9669647216796875, 66.00285339355469, 68.88980102539062, 106.39352416992188, 37.047210693359375, 22.834434509277344, -19.694480895996094, 9.487434387207031, 66.06385040283203, 60.079627990722656, 56.63511657714844, 114.0509033203125, 61.36347961425781, 178.41539001464844, 25.939285278320312, 4.48590087890625, 13.008779525756836, 77.61781311035156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000105.npy"} +{"epoch": 0.15418502202643172, "step": 106, "batch_size": 64, "mean": 29.639097213745117, "std": 36.756900787353516, "min": -54.69886016845703, "p10": -13.173191070556634, "median": 29.612028121948242, "p90": 78.07882080078126, "max": 136.35772705078125, "pos_frac": 0.84375, "sample": [27.903884887695312, 5.232063293457031, 8.707311630249023, -4.180507659912109, 18.09636688232422, 2.150789260864258, 50.27300262451172, -6.449485778808594, 31.504005432128906, 68.74638366699219, 36.879722595214844, 37.100093841552734, 2.6589736938476562, 75.73753356933594, 27.378074645996094, 94.89424133300781, 7.313371658325195, 12.267568588256836, 79.08222961425781, 1.2037200927734375, 64.79762268066406, 42.234249114990234, 0.9738998413085938, 86.65374755859375, 23.92411994934082, 44.829872131347656, 7.688194274902344, -16.054779052734375, 55.21961212158203, -37.806121826171875, 48.45410919189453, 15.997779846191406, 41.87967300415039, 40.423866271972656, -2.640787124633789, 5.474822998046875, 24.293861389160156, 1.7227706909179688, 92.80143737792969, 64.19329071044922, 64.2239990234375, 25.408344268798828, -54.69886016845703, 3.9319610595703125, 136.35772705078125, 38.62109375, 28.208316802978516, 34.202552795410156, 32.523223876953125, 9.250991821289062, 0.6617813110351562, 59.15704345703125, 106.91826629638672, 35.095062255859375, 55.541595458984375, 58.09477233886719, -23.58470916748047, 33.75492858886719, -23.286544799804688, 31.01573944091797, 82.98283386230469, -42.02271270751953, -24.16693878173828, 47.151268005371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000106.npy"} +{"epoch": 0.15565345080763582, "step": 107, "batch_size": 64, "mean": 33.398658752441406, "std": 43.218994140625, "min": -91.23826599121094, "p10": -12.554787826538076, "median": 22.860742568969727, "p90": 86.00971984863281, "max": 142.18540954589844, "pos_frac": 0.84375, "sample": [85.90151977539062, 112.58456420898438, 47.992034912109375, 69.022705078125, 24.93872833251953, 31.901214599609375, 52.47154235839844, -3.284832000732422, -91.23826599121094, 9.427431106567383, 16.40401840209961, 77.58819580078125, -38.39448547363281, 28.39364242553711, 10.09295654296875, 7.750175476074219, 15.428308486938477, 86.05609130859375, 1.9009552001953125, -27.561420440673828, 108.97586822509766, 67.74944305419922, 80.32427978515625, 6.186182022094727, -16.6263427734375, 3.2761611938476562, -0.907623291015625, 4.664154052734375, 142.18540954589844, 75.58936309814453, 18.41766357421875, 18.339698791503906, 99.00785064697266, 45.13252258300781, 65.757080078125, 89.00601959228516, 16.834117889404297, 29.737594604492188, 16.83355140686035, -16.527626037597656, 62.90782165527344, -47.22997283935547, 66.37883758544922, 76.13499450683594, 11.07135009765625, 83.03240966796875, 0.084075927734375, 81.11123657226562, 49.29231262207031, 53.95372009277344, 24.18799591064453, 22.107263565063477, 4.2209014892578125, -28.48076629638672, -0.47658538818359375, 5.815216064453125, 13.275920867919922, 73.53740692138672, 11.0986328125, 23.614221572875977, 68.13519287109375, 99.4366455078125, 2.4420623779296875, 10.532913208007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000107.npy"} +{"epoch": 0.15712187958883994, "step": 108, "batch_size": 64, "mean": 40.09585952758789, "std": 58.3842887878418, "min": -62.987884521484375, "p10": -23.431006813049315, "median": 28.162052154541016, "p90": 124.41716613769535, "max": 215.70904541015625, "pos_frac": 0.765625, "sample": [36.898895263671875, 136.04039001464844, 40.02467346191406, -45.573699951171875, -62.987884521484375, 109.36380004882812, 31.11400032043457, 8.993698120117188, 215.70904541015625, -0.5573883056640625, -10.747039794921875, 128.13247680664062, 8.562385559082031, 6.37091064453125, -5.377531051635742, 72.88861846923828, -7.158758163452148, 97.02919006347656, 164.6047821044922, 107.8348617553711, 115.74810791015625, 83.43663024902344, 1.8639698028564453, -23.35981559753418, 48.44830322265625, 10.324003219604492, 27.531112670898438, 16.767879486083984, 76.37478637695312, 5.243770599365234, 25.811111450195312, 0.7705039978027344, 71.47579193115234, -23.461517333984375, 175.04165649414062, -40.10527038574219, 81.89620971679688, -29.691097259521484, -15.564811706542969, 4.579496383666992, 26.49681854248047, 7.335693359375, 50.641448974609375, 156.2396240234375, 33.969383239746094, 10.835916519165039, 90.30509185791016, 74.69284057617188, 31.034936904907227, -53.732513427734375, 14.696914672851562, 51.665992736816406, 128.13833618164062, 64.08744812011719, -1.5820808410644531, -1.7738590240478516, 38.786895751953125, 16.418376922607422, 73.85591125488281, -49.560665130615234, 28.792991638183594, 40.921504974365234, 66.28192138671875, 23.289710998535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000108.npy"} +{"epoch": 0.15859030837004406, "step": 109, "batch_size": 64, "mean": 42.91639709472656, "std": 50.9198112487793, "min": -83.68093872070312, "p10": -11.263848876953123, "median": 36.97645950317383, "p90": 108.12711639404297, "max": 187.08523559570312, "pos_frac": 0.828125, "sample": [50.252960205078125, 8.75705337524414, 136.8688201904297, 86.84163665771484, 30.861976623535156, 54.967777252197266, -59.04893493652344, 12.761398315429688, 18.040359497070312, 49.19566345214844, 132.66436767578125, 42.68723678588867, 73.77677917480469, 54.83634948730469, 54.1439208984375, 5.846540451049805, 22.29671859741211, 37.73014831542969, -12.966793060302734, -5.094636917114258, -5.4224090576171875, 187.08523559570312, 0.4308052062988281, -9.953292846679688, 26.266477584838867, 4.3270263671875, 127.49838256835938, 30.758272171020508, 33.253692626953125, -39.08600997924805, 62.90638732910156, 70.23760986328125, 9.356151580810547, 70.93142700195312, 56.66131591796875, 39.02293395996094, 100.62156677246094, 72.00210571289062, 109.04541778564453, 105.50257110595703, -0.38330841064453125, 13.019844055175781, -83.68093872070312, 29.025508880615234, 1.27862548828125, 73.99420928955078, 35.590850830078125, 34.92375564575195, 19.27830696105957, 82.8170166015625, -49.715576171875, 34.34466552734375, 37.5084114074707, 112.73335266113281, 127.86878967285156, 37.098045349121094, 86.75780487060547, 36.85487365722656, 91.17231750488281, 105.98441314697266, -11.825515747070312, -23.118606567382812, 3.110414505004883, 105.1469955444336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000109.npy"} +{"epoch": 0.16005873715124816, "step": 110, "batch_size": 64, "mean": 38.542945861816406, "std": 54.42041778564453, "min": -101.18891906738281, "p10": -15.089390182495116, "median": 33.35336971282959, "p90": 112.93467559814455, "max": 182.9139404296875, "pos_frac": 0.8125, "sample": [123.07748413085938, 55.786834716796875, 23.039813995361328, 54.17628479003906, 59.18804168701172, 36.903297424316406, 49.5981330871582, -1.1092529296875, -101.18891906738281, 45.31352996826172, 66.94741821289062, 16.174514770507812, -64.92230224609375, 19.09193992614746, 12.172557830810547, 1.4801654815673828, 30.91798973083496, 169.1707763671875, 107.51722717285156, 27.692344665527344, 4.956296920776367, -8.460025787353516, 182.9139404296875, 136.1932373046875, 101.81736755371094, 78.96009826660156, 17.51611328125, -18.615737915039062, -14.361141204833984, 61.39453125, -43.722381591796875, -15.401496887207031, 9.555641174316406, 19.436969757080078, 6.668481826782227, 159.321044921875, 52.9775390625, 85.43914794921875, 115.25643920898438, 4.854375839233398, 77.50228881835938, -8.206826210021973, 39.43132019042969, 35.78874969482422, 15.02853012084961, 11.052520751953125, 82.4472885131836, -58.80242919921875, 76.34036254882812, -26.2750244140625, 40.31207275390625, 1.4745540618896484, 55.89708709716797, 35.85636520385742, 51.55923080444336, 44.714576721191406, 19.705596923828125, 20.779586791992188, 18.87799644470215, 44.680335998535156, 80.11251831054688, 2.3550987243652344, -5.660900115966797, 144.04931640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000110.npy"} +{"epoch": 0.16152716593245228, "step": 111, "batch_size": 64, "mean": 43.80938720703125, "std": 60.30035400390625, "min": -85.91824340820312, "p10": -33.51629180908203, "median": 34.01167297363281, "p90": 121.39797515869142, "max": 194.4635009765625, "pos_frac": 0.765625, "sample": [194.4635009765625, -52.857666015625, -85.64044189453125, 123.38192749023438, -7.270023345947266, 54.99259948730469, 14.695182800292969, 35.074256896972656, 28.599605560302734, 147.38905334472656, 31.687057495117188, 79.03985595703125, 42.39684295654297, 56.982269287109375, 28.091766357421875, 112.08370971679688, 113.68082427978516, -36.211212158203125, 100.28974914550781, 8.234342575073242, 79.56238555908203, -27.429306030273438, -22.793987274169922, -10.008148193359375, 66.92192077636719, 116.76875305175781, -7.4169158935546875, 113.16658020019531, -36.125, 11.748825073242188, -23.085968017578125, 32.94908905029297, 137.28067016601562, 23.134567260742188, 151.30706787109375, 55.4927978515625, 32.20097351074219, 55.57332992553711, 68.17211151123047, 132.19241333007812, 24.627410888671875, -44.63017272949219, 19.54791259765625, 29.880931854248047, 138.088134765625, 70.32637023925781, 29.897361755371094, 62.117431640625, 116.41166687011719, 2.90740966796875, 113.48086547851562, 38.06035614013672, 95.97882843017578, 72.55374145507812, 51.93638610839844, 13.630882263183594, -85.91824340820312, 32.230308532714844, 28.783235549926758, 41.96919250488281, -5.694438934326172, 93.80319213867188, -64.19985961914062, -10.703384399414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000111.npy"} +{"epoch": 0.16299559471365638, "step": 112, "batch_size": 64, "mean": 27.892642974853516, "std": 41.354331970214844, "min": -59.28949737548828, "p10": -24.98142318725586, "median": 26.104644775390625, "p90": 92.57630157470705, "max": 121.44387817382812, "pos_frac": 0.734375, "sample": [45.4084358215332, 121.44387817382812, 87.82441711425781, -6.266130447387695, -44.050411224365234, 34.2715950012207, 37.31488037109375, 31.51214599609375, 28.735305786132812, -5.275043487548828, 101.95770263671875, 79.27175903320312, -0.7927379608154297, -2.40325927734375, 94.61282348632812, 61.53165817260742, 39.75729751586914, 56.340614318847656, 39.013607025146484, 37.346832275390625, 43.68511962890625, 30.701065063476562, 72.04522705078125, -15.856460571289062, 25.4208984375, 23.26413917541504, 32.47991180419922, 33.072086334228516, -24.735687255859375, 17.503604888916016, 3.412607192993164, -34.851749420166016, 30.915939331054688, 110.1450424194336, 114.00847625732422, -32.673553466796875, 41.00769805908203, 14.798107147216797, 71.18470764160156, -0.03988838195800781, -59.28949737548828, 97.11986541748047, 25.289840698242188, 12.867111206054688, -13.024856567382812, 22.147777557373047, 62.609710693359375, 0.2622261047363281, 16.45862579345703, 101.27033996582031, 26.78839111328125, 8.823291778564453, 45.58216094970703, -42.606597900390625, 12.626001358032227, -25.08673858642578, -27.528457641601562, -11.28094482421875, 76.0531234741211, 1.1150016784667969, 24.94618034362793, 5.335460662841797, -8.478500366210938, 40.08686065673828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000112.npy"} +{"epoch": 0.1644640234948605, "step": 113, "batch_size": 64, "mean": 30.034034729003906, "std": 39.00908660888672, "min": -41.707366943359375, "p10": -16.230777740478512, "median": 31.603471755981445, "p90": 83.51098861694337, "max": 120.21651458740234, "pos_frac": 0.734375, "sample": [-33.230003356933594, 76.31378173828125, -17.62386131286621, 89.03143310546875, 9.434814453125, 38.918067932128906, 14.222757339477539, 36.973297119140625, 3.363546371459961, -10.260009765625, 10.441902160644531, 86.71684265136719, 55.13788604736328, 112.12588500976562, 38.64588928222656, -19.565359115600586, -7.254753112792969, -8.957450866699219, 7.7239532470703125, 65.89720153808594, -21.026535034179688, 48.87291717529297, 82.45619201660156, -21.000396728515625, 55.31645965576172, -6.94512939453125, 2.755369186401367, 120.21651458740234, 48.96247100830078, -41.707366943359375, 19.503799438476562, 75.4683837890625, 36.27580642700195, -10.307586669921875, 38.28834533691406, 53.31769561767578, 12.410285949707031, 83.76190948486328, 20.204200744628906, -12.769439697265625, 52.755828857421875, 71.0283203125, -12.980249404907227, 2.502511978149414, -24.709426879882812, 45.88615036010742, 36.81488800048828, 41.08943176269531, -9.74945068359375, 15.376745223999023, 115.518310546875, -2.6969871520996094, 34.08042907714844, 32.932586669921875, 100.79485321044922, 82.92550659179688, 34.49665069580078, 63.55286407470703, -6.500543594360352, 10.537155151367188, 12.370147705078125, 16.45499038696289, 30.274356842041016, 47.31343078613281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000113.npy"} +{"epoch": 0.16593245227606462, "step": 114, "batch_size": 64, "mean": 47.85365295410156, "std": 65.90361785888672, "min": -42.71179962158203, "p10": -27.82858657836914, "median": 42.72231101989746, "p90": 115.18463134765629, "max": 355.17401123046875, "pos_frac": 0.765625, "sample": [183.2822265625, 90.87767791748047, 53.962059020996094, 50.813316345214844, 28.5003662109375, 119.48541259765625, 19.74507713317871, 48.70451354980469, -37.874359130859375, 68.07124328613281, -33.10297393798828, 77.4588623046875, 44.53905487060547, 22.53491973876953, -38.769386291503906, 87.18252563476562, 14.734928131103516, -34.888572692871094, 34.702117919921875, 124.00785827636719, 12.816949844360352, 56.22858428955078, -5.171661376953125, 119.99337005615234, 41.653804779052734, 60.17579650878906, 35.34033966064453, 25.082324981689453, 38.0047607421875, 60.4914436340332, 64.70337677001953, -4.931953430175781, -9.23828125, 56.31929016113281, -26.52850341796875, 16.029333114624023, 51.29545593261719, 69.58523559570312, 41.56060791015625, 105.14947509765625, 60.46816635131836, 87.93800354003906, 6.023403167724609, 50.872032165527344, 40.89762496948242, -14.555229187011719, 236.89491271972656, 6.239631652832031, 43.79081726074219, 145.12222290039062, -10.667455673217773, 74.15347290039062, -42.71179962158203, -28.385765075683594, 355.17401123046875, 56.97761154174805, -3.847871780395508, 88.43257141113281, 36.62495422363281, -32.79924392700195, 26.897323608398438, 88.71153259277344, -18.982593536376953, 76.8388671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000114.npy"} +{"epoch": 0.16740088105726872, "step": 115, "batch_size": 64, "mean": 32.80569076538086, "std": 66.77465057373047, "min": -154.61972045898438, "p10": -29.25564651489258, "median": 18.657734870910645, "p90": 128.55659713745123, "max": 240.00863647460938, "pos_frac": 0.734375, "sample": [-0.7601432800292969, -2.6755237579345703, 135.2223663330078, 59.688568115234375, -53.96971130371094, -1.2582473754882812, -46.7086181640625, 10.139310836791992, 12.890682220458984, 17.318443298339844, 25.313629150390625, -6.7218170166015625, 44.89437484741211, 57.030181884765625, 9.603960037231445, 169.90187072753906, 22.488893508911133, -8.04510498046875, 240.00863647460938, -11.998830795288086, 77.45281219482422, 6.5669708251953125, 141.56326293945312, 73.64765167236328, 173.1439208984375, -29.661712646484375, 21.887527465820312, 25.99264907836914, 8.09518814086914, 97.37138366699219, 176.3772430419922, 101.15408325195312, 1.197113037109375, -123.91739654541016, 8.89401626586914, 35.54155731201172, 59.76957702636719, 15.776535034179688, 9.662071228027344, 44.69298553466797, 27.552154541015625, 51.38441467285156, 26.754173278808594, 30.925308227539062, 145.06643676757812, -29.43603515625, 22.31170654296875, 17.276840209960938, -7.69342041015625, 12.164299011230469, 19.997026443481445, 80.37666320800781, 4.012504577636719, 12.483806610107422, -21.282939910888672, 113.00313568115234, 67.10203552246094, -16.764829635620117, 65.27091979980469, -154.61972045898438, 109.6143798828125, -49.141990661621094, 4.471822738647461, -28.834739685058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000115.npy"} +{"epoch": 0.16886930983847284, "step": 116, "batch_size": 64, "mean": 36.67411422729492, "std": 47.031639099121094, "min": -36.47880554199219, "p10": -17.71062602996826, "median": 31.761539459228516, "p90": 91.36781463623048, "max": 218.24913024902344, "pos_frac": 0.78125, "sample": [7.2645721435546875, 4.288516998291016, -32.812015533447266, -29.943153381347656, 25.090394973754883, 45.74934387207031, 65.17117309570312, 23.140907287597656, 49.28599548339844, 59.40991973876953, 31.22748565673828, -10.285207748413086, 44.06676483154297, 27.040687561035156, 9.301563262939453, -6.692771911621094, 55.916343688964844, -36.47880554199219, 86.44837951660156, 89.60987854003906, 39.684234619140625, -18.91876220703125, 23.635757446289062, 17.80217742919922, 50.4635009765625, 25.884185791015625, 77.85992431640625, 107.52108764648438, 32.771636962890625, -30.91851806640625, 124.96075439453125, 35.55176544189453, 23.398914337158203, 44.88047790527344, -14.891641616821289, -28.15515899658203, 7.041584014892578, 7.928581237792969, -4.424478530883789, 102.80963134765625, 4.717960357666016, 135.42503356933594, 32.29559326171875, 88.35145568847656, 82.26826477050781, 35.419315338134766, 56.41392517089844, -32.825439453125, -8.770814895629883, 83.53401947021484, 23.58156967163086, -6.4043426513671875, 218.24913024902344, 68.06648254394531, 12.97943115234375, 51.802734375, 68.55400085449219, 64.60450744628906, -11.000020980834961, 15.33099365234375, 0.9154205322265625, 40.753570556640625, 99.07383728027344, 92.1212158203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000116.npy"} +{"epoch": 0.17033773861967694, "step": 117, "batch_size": 64, "mean": 29.90760612487793, "std": 51.708351135253906, "min": -101.38301086425781, "p10": -25.567924880981437, "median": 24.133713722229004, "p90": 88.79919891357424, "max": 188.38104248046875, "pos_frac": 0.765625, "sample": [13.443355560302734, 53.05120086669922, 50.83949279785156, -17.324016571044922, 22.637537002563477, -2.9664535522460938, 19.619768142700195, 39.465492248535156, 21.907196044921875, 40.94786071777344, 44.525882720947266, 68.64865112304688, 134.91290283203125, 114.41522216796875, 18.83342742919922, 46.54035186767578, -60.30613708496094, 91.77955627441406, 4.423828125, -95.92092895507812, -29.101028442382812, 8.845687866210938, 16.394153594970703, 65.45166015625, 45.00901412963867, -12.859710693359375, -35.558616638183594, 3.886484146118164, 25.62989044189453, -15.096710205078125, 39.87992858886719, 122.04266357421875, -3.5242538452148438, 2.456939697265625, 152.14154052734375, 13.908332824707031, 188.38104248046875, 16.409820556640625, 30.76520538330078, 67.03890991210938, 14.679927825927734, 9.317577362060547, 8.282245635986328, -9.279541015625, -101.38301086425781, 30.47394561767578, 81.84503173828125, 33.830718994140625, 92.10930633544922, 40.03273010253906, -46.769622802734375, -9.755910873413086, 7.818271636962891, 54.48391342163086, 81.31777954101562, 80.66860961914062, 6.396942138671875, 49.16773986816406, 73.6393051147461, 29.196212768554688, 44.254127502441406, -2.6672210693359375, -36.747222900390625, 71.59969329833984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000117.npy"} +{"epoch": 0.17180616740088106, "step": 118, "batch_size": 64, "mean": 36.418636322021484, "std": 47.34600830078125, "min": -55.367218017578125, "p10": -13.181039047241207, "median": 24.210975646972656, "p90": 111.10452117919925, "max": 159.87820434570312, "pos_frac": 0.765625, "sample": [-7.303394317626953, 22.016704559326172, 0.3593921661376953, -26.72845458984375, 19.83367919921875, 16.779613494873047, 61.37601089477539, 44.12724304199219, 48.527488708496094, 114.83573913574219, 87.79495239257812, 23.1553955078125, 127.13302612304688, 30.32538604736328, -7.989307403564453, 55.13813018798828, -16.370716094970703, -15.121337890625, 32.37045669555664, 159.87820434570312, 121.05738830566406, -8.653675079345703, -55.367218017578125, 2.8754425048828125, -39.90943908691406, 21.01310157775879, 2.402872085571289, 33.517967224121094, 14.235980987548828, 14.213180541992188, 10.679412841796875, 76.33991241455078, 3.5087127685546875, 76.83246612548828, 25.233612060546875, 49.810150146484375, 22.266889572143555, 42.60003662109375, 151.7835693359375, -2.562450408935547, 31.0313720703125, -18.32373809814453, -2.6290130615234375, 41.343231201171875, 146.21041870117188, 87.21880340576172, 117.42613220214844, 43.61451721191406, 80.42098236083984, 13.297395706176758, -0.35356903076171875, 69.24150848388672, 102.39834594726562, 71.35924530029297, 51.8612060546875, 23.675155639648438, 36.14942169189453, 17.79030990600586, -0.8593177795410156, 24.746795654296875, -21.458084106445312, 85.01007843017578, 4.614898681640625, -5.009574890136719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000118.npy"} +{"epoch": 0.17327459618208516, "step": 119, "batch_size": 64, "mean": 35.89599609375, "std": 57.14711380004883, "min": -69.7904052734375, "p10": -30.321307373046874, "median": 37.3317985534668, "p90": 97.57997741699221, "max": 211.30941772460938, "pos_frac": 0.6875, "sample": [13.909841537475586, 59.56272888183594, 38.44801330566406, 149.19363403320312, 39.3428955078125, -34.15357971191406, -7.9835052490234375, 51.131011962890625, -5.260486602783203, 51.67803192138672, -10.113731384277344, -12.446662902832031, 211.30941772460938, 48.96196746826172, 19.435836791992188, 122.60592651367188, 22.407390594482422, 12.076522827148438, 184.1217803955078, 49.10856628417969, 93.31806945800781, 75.84992980957031, 77.43982696533203, -7.738079071044922, 31.297531127929688, 37.62848663330078, -34.847618103027344, 17.709487915039062, 55.9488525390625, 57.40216064453125, 0.972381591796875, -3.1845016479492188, 99.40650939941406, 37.61137771606445, 174.73809814453125, -3.8936004638671875, 65.87670135498047, 81.79545593261719, -2.3208789825439453, 27.938858032226562, -56.26591491699219, 44.57013702392578, -59.599945068359375, 130.28497314453125, 50.6749267578125, 67.27041625976562, -25.720748901367188, -31.105209350585938, 86.9720687866211, 37.05221939086914, -28.492202758789062, -69.7904052734375, -0.623260498046875, 88.4783935546875, 15.477142333984375, 58.2187614440918, 51.085060119628906, 10.402101516723633, 66.58283996582031, -7.331760406494141, -42.66530990600586, 3.703765869140625, 42.044189453125, -20.16339874267578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000119.npy"} +{"epoch": 0.17474302496328928, "step": 120, "batch_size": 64, "mean": 49.90165328979492, "std": 60.92795181274414, "min": -85.1566162109375, "p10": -19.805996322631827, "median": 40.57330322265625, "p90": 137.2021438598633, "max": 195.42529296875, "pos_frac": 0.84375, "sample": [154.5394744873047, 89.409912109375, 32.052398681640625, 111.28125, 36.581153869628906, 9.48872184753418, -61.51133728027344, 102.87380981445312, 20.922590255737305, 72.81734466552734, 53.99250030517578, 166.542724609375, 82.23482513427734, 7.2748260498046875, 18.160192489624023, 24.0743408203125, 139.36260986328125, 60.79853820800781, 69.15618133544922, -39.017372131347656, -40.844879150390625, 132.1610565185547, 42.26402282714844, 31.613388061523438, 44.04875946044922, 38.10643005371094, 49.45269775390625, 7.845489501953125, 168.13076782226562, -85.1566162109375, -23.61602783203125, 41.97831726074219, 27.533164978027344, 3.8558197021484375, 107.1156005859375, 10.243711471557617, 195.42529296875, 54.93964767456055, 72.64335632324219, 4.051198959350586, 11.50583267211914, -8.053237915039062, 55.75421905517578, 188.746337890625, 34.37815856933594, 42.738189697265625, 131.90301513671875, 176.39865112304688, 39.16828918457031, 47.270599365234375, 0.3577117919921875, 90.86168670654297, -42.694549560546875, 18.51519012451172, 64.29674530029297, 36.74609375, -2.4556617736816406, 115.95074462890625, 78.39971923828125, -13.343517303466797, 77.76309204101562, -22.57563018798828, 36.7359619140625, 2.5121917724609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000120.npy"} +{"epoch": 0.1762114537444934, "step": 121, "batch_size": 64, "mean": 54.51830291748047, "std": 70.97077178955078, "min": -150.0506591796875, "p10": -9.695475769042966, "median": 49.638938903808594, "p90": 151.12088470458986, "max": 219.7302703857422, "pos_frac": 0.765625, "sample": [8.873899459838867, -7.183631896972656, -2.4401702880859375, 219.7302703857422, -6.114606857299805, 10.488059997558594, 97.05757141113281, -16.62500762939453, 79.22713470458984, 76.24776458740234, -2.848907470703125, 64.12995147705078, 139.9593048095703, 109.38724517822266, 82.5220947265625, -1.6766738891601562, 89.54806518554688, 67.12173461914062, 54.06590270996094, -41.49725341796875, 38.006500244140625, 113.88722229003906, -59.308677673339844, 150.7321014404297, 171.9572296142578, 65.78280639648438, 35.657859802246094, 16.324237823486328, 112.19673156738281, 3.67669677734375, 77.53450012207031, -21.40644073486328, 49.293365478515625, 86.47810363769531, 2.1629161834716797, 201.76986694335938, 4.048614501953125, 182.30166625976562, 44.8675537109375, 208.90499877929688, 8.925804138183594, 128.76785278320312, -10.771980285644531, 16.74261474609375, -24.915374755859375, 11.102943420410156, 77.03428649902344, 24.45508575439453, -6.4144439697265625, 54.381622314453125, 5.976921081542969, 19.078765869140625, 62.17393493652344, 132.76522827148438, 151.28750610351562, 72.97723388671875, -150.0506591796875, 56.5897216796875, -5.4134368896484375, 79.50196838378906, 213.29147338867188, 19.53594398498535, -2.6767616271972656, 49.98451232910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000121.npy"} +{"epoch": 0.1776798825256975, "step": 122, "batch_size": 64, "mean": 68.18690490722656, "std": 67.45944213867188, "min": -91.22843933105469, "p10": 0.6514129638671886, "median": 52.04137420654297, "p90": 163.76800384521485, "max": 235.412353515625, "pos_frac": 0.90625, "sample": [154.61512756347656, 21.482276916503906, 77.2289810180664, 83.73464965820312, 19.816804885864258, 26.367919921875, 51.01652526855469, 101.48186492919922, 22.762527465820312, 117.16461181640625, 15.06005859375, 131.55792236328125, 126.41223907470703, 235.412353515625, 106.33892059326172, 35.70105743408203, 16.518226623535156, 55.264381408691406, 163.5077362060547, 50.039276123046875, 20.66376495361328, 142.4174346923828, 35.77676010131836, -8.278564453125, 85.50431823730469, 11.024368286132812, 64.55586242675781, 140.27352905273438, 73.78472137451172, 67.33543395996094, 77.01203155517578, -26.939666748046875, 172.0748291015625, -7.48039436340332, 53.06622314453125, 164.24853515625, 63.12432098388672, 23.92078399658203, 183.26150512695312, 159.97291564941406, 8.893026351928711, 49.750701904296875, -33.62603759765625, 234.92393493652344, 17.992286682128906, 39.47880172729492, 32.87866973876953, 43.02925491333008, 23.602500915527344, -91.22843933105469, -7.7478179931640625, 29.283309936523438, 33.97517395019531, 66.5549087524414, 207.90570068359375, 1.6806221008300781, 117.905517578125, 54.62462615966797, 0.21032333374023438, 13.960426330566406, 163.87954711914062, 76.01884460449219, 19.607139587402344, 153.60696411132812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000122.npy"} +{"epoch": 0.17914831130690162, "step": 123, "batch_size": 64, "mean": 44.62263488769531, "std": 74.36831665039062, "min": -150.82763671875, "p10": -27.755376625061032, "median": 22.759469985961914, "p90": 145.96360626220712, "max": 238.867919921875, "pos_frac": 0.65625, "sample": [81.90826416015625, 82.44892883300781, -6.955223083496094, 8.967288970947266, 89.35844421386719, 79.47175598144531, 125.93978881835938, 115.33859252929688, 80.90746307373047, 49.069496154785156, -0.3360424041748047, -19.870803833007812, 8.449634552001953, -40.72650909423828, 154.5452423095703, 88.52861022949219, 16.359569549560547, -12.711227416992188, -3.353057861328125, 67.89859008789062, -7.0127716064453125, 125.72650146484375, 67.40907287597656, -76.33677673339844, 22.640914916992188, 96.58291625976562, 215.51263427734375, 22.87802505493164, 154.69622802734375, 164.91586303710938, -25.04292869567871, 64.06024169921875, 20.065174102783203, 8.816978454589844, 85.28563690185547, 123.0103759765625, 20.37811279296875, 49.155479431152344, 178.78440856933594, -20.47734832763672, 92.20367431640625, 85.03816223144531, -44.170352935791016, -69.98319244384766, 78.01274108886719, -8.652191162109375, -41.5565299987793, -5.139482498168945, -16.54627227783203, 238.867919921875, -28.91785430908203, 66.70249938964844, 52.307899475097656, 208.7911376953125, -10.677520751953125, 21.44505500793457, 122.70240783691406, 34.75746154785156, -150.82763671875, -18.041290283203125, -0.378204345703125, -9.337787628173828, 0.4953422546386719, 2.465250015258789], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000123.npy"} +{"epoch": 0.18061674008810572, "step": 124, "batch_size": 64, "mean": 65.97357177734375, "std": 96.20331573486328, "min": -121.34342956542969, "p10": -26.351386260986327, "median": 43.79602813720703, "p90": 179.0110565185547, "max": 379.7846984863281, "pos_frac": 0.78125, "sample": [152.47731018066406, -24.907363891601562, 41.98591613769531, 34.45823669433594, 28.111408233642578, 151.16871643066406, -99.10636901855469, -9.177104949951172, 165.7275390625, 190.12911987304688, -4.109230041503906, 29.16973876953125, 4.732141494750977, 70.98075103759766, 79.04280853271484, 233.62518310546875, 159.77590942382812, 52.35368728637695, -19.512664794921875, 52.63665008544922, -121.34342956542969, -41.286109924316406, 142.28897094726562, -1.1579399108886719, 206.349609375, 130.12692260742188, 78.9552001953125, -3.8353347778320312, 48.41490173339844, 121.35792541503906, -26.970252990722656, 33.27128982543945, 89.50144958496094, 13.732988357543945, 10.346466064453125, 349.92315673828125, 103.85391235351562, 180.5086669921875, 30.482948303222656, 175.51663208007812, 106.8206558227539, -53.30622100830078, 18.857166290283203, -18.763519287109375, 39.88665771484375, 126.13034057617188, -100.98159790039062, 103.9158935546875, 20.828855514526367, 45.60614013671875, 39.49203872680664, 81.21533966064453, -67.15815734863281, 258.1259460449219, 22.904251098632812, 379.7846984863281, 49.847076416015625, 169.12020874023438, 1.1062946319580078, 33.002952575683594, 50.9111328125, 80.00568389892578, 14.685325622558594, 10.6707763671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000124.npy"} +{"epoch": 0.18208516886930984, "step": 125, "batch_size": 64, "mean": 58.60149383544922, "std": 79.16519927978516, "min": -91.34652709960938, "p10": -28.99659042358398, "median": 33.299259185791016, "p90": 167.7408294677735, "max": 254.54681396484375, "pos_frac": 0.78125, "sample": [-3.767576217651367, 149.3168487548828, 91.87120056152344, 23.039291381835938, -14.42984390258789, 9.361820220947266, 115.76310729980469, -27.839271545410156, 142.28976440429688, 17.0423641204834, 11.09981918334961, -29.492584228515625, 23.570863723754883, 0.6484298706054688, 102.86168670654297, 112.76519775390625, 89.64076232910156, 54.94977569580078, 18.59876823425293, 4.148273468017578, 108.75489807128906, 19.232711791992188, -55.41642761230469, -10.9505615234375, 92.33829498291016, 254.54681396484375, -35.91820526123047, 31.41789436340332, 108.89198303222656, 36.55158233642578, -5.626533508300781, -82.5436019897461, 35.08833312988281, 15.78713607788086, 31.51018524169922, 212.2133331298828, 7.430339813232422, 212.91513061523438, 122.50491333007812, -23.667869567871094, 11.840763092041016, 104.347900390625, 191.49493408203125, 93.44037628173828, 237.6697998046875, 92.87699890136719, 57.279685974121094, 45.790550231933594, 172.34487915039062, 51.37572479248047, 14.33251953125, 131.90994262695312, 42.04253387451172, -18.205106735229492, 113.10263061523438, 28.163349151611328, -42.400726318359375, 154.70509338378906, 31.054533004760742, -91.34652709960938, 211.16383361816406, 156.998046875, -34.53448486328125, 28.549341201782227], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000125.npy"} +{"epoch": 0.18355359765051396, "step": 126, "batch_size": 64, "mean": 54.387474060058594, "std": 72.72525024414062, "min": -77.66942596435547, "p10": -22.23567352294922, "median": 38.14095115661621, "p90": 158.50478973388672, "max": 253.807373046875, "pos_frac": 0.828125, "sample": [120.682373046875, 64.40339660644531, 11.623357772827148, 90.70087432861328, 13.423210144042969, 53.056114196777344, -38.90839385986328, 2.3035411834716797, 62.32862854003906, 154.60171508789062, -52.9302978515625, 0.6986808776855469, 253.807373046875, 19.233726501464844, 86.08181762695312, 1.5380401611328125, 133.60186767578125, 43.88842010498047, 130.38182067871094, 67.97773742675781, 45.205528259277344, 52.24240493774414, -7.971767425537109, 38.30398178100586, 66.5689926147461, 187.49484252929688, -22.34319305419922, 160.1775360107422, 32.782108306884766, 47.76130676269531, 19.942684173583984, 25.51595687866211, 74.02896118164062, 14.660987854003906, 183.46295166015625, 37.97792053222656, 57.29698181152344, -75.04756164550781, 149.63397216796875, -17.053958892822266, -62.64604187011719, -35.36515808105469, 240.14138793945312, -4.678186416625977, 27.569303512573242, 10.640033721923828, 2.464038848876953, 20.985286712646484, 126.82861328125, 118.93130493164062, -77.66942596435547, 192.3697052001953, 86.35562133789062, 23.870620727539062, 35.098175048828125, 97.76052856445312, 32.00923156738281, 63.54607391357422, 17.20873260498047, -21.98479461669922, 13.472343444824219, 92.818359375, 12.719245910644531, 179.21865844726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000126.npy"} +{"epoch": 0.18502202643171806, "step": 127, "batch_size": 64, "mean": 39.8004264831543, "std": 63.418663024902344, "min": -126.01162719726562, "p10": -35.92738723754883, "median": 43.01145553588867, "p90": 117.56906204223634, "max": 183.78099060058594, "pos_frac": 0.78125, "sample": [55.745033264160156, 80.63714599609375, -10.027362823486328, 40.87238311767578, 2.533355712890625, -96.06739807128906, 152.11573791503906, 8.951431274414062, 96.59795379638672, -126.01162719726562, 8.208992004394531, 62.920066833496094, 75.04118347167969, -34.81016540527344, 18.53407096862793, 36.88175964355469, 95.09819793701172, 10.998231887817383, -122.55271911621094, 76.00249481201172, 25.50774383544922, 72.6611328125, 128.99119567871094, 20.911865234375, 68.82792663574219, 173.54440307617188, 21.927753448486328, 85.873046875, -24.26811981201172, 16.155948638916016, 121.64974212646484, 77.74373626708984, 110.6220703125, 25.096817016601562, 51.99986267089844, 150.39645385742188, -2.2198009490966797, -18.98064422607422, 19.543243408203125, -2.4312820434570312, 17.239261627197266, 113.1712417602539, 9.25700569152832, 77.07500457763672, 45.15052795410156, 64.10781860351562, 46.24805450439453, 119.45384216308594, 110.22852325439453, 53.50395202636719, -46.36797332763672, 47.73143005371094, 18.986133575439453, 183.78099060058594, -61.08580780029297, 62.01298522949219, 48.09070587158203, 93.54360961914062, 46.910125732421875, 12.95356559753418, 31.902320861816406, -25.942893981933594, -36.40619659423828, -39.53868103027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000127.npy"} +{"epoch": 0.18649045521292218, "step": 128, "batch_size": 64, "mean": 42.07066345214844, "std": 74.8244400024414, "min": -172.208984375, "p10": -47.180390930175776, "median": 36.74879455566406, "p90": 147.1278259277344, "max": 255.3750762939453, "pos_frac": 0.6875, "sample": [48.085636138916016, 44.246070861816406, 136.12960815429688, -13.653480529785156, 91.7744140625, -61.456729888916016, 102.789794921875, 145.99075317382812, 35.90592956542969, 43.510536193847656, 169.25714111328125, 52.247398376464844, 49.85890197753906, 35.59208679199219, -89.03960418701172, 100.94447326660156, 154.69369506835938, 4.7227020263671875, -0.4044208526611328, 75.49761962890625, -73.37199401855469, -3.901378631591797, 255.3750762939453, -172.208984375, 148.99649047851562, 49.265220642089844, 160.45947265625, 147.61514282226562, 71.09644317626953, 48.20101547241211, -11.10055923461914, -59.03953552246094, 105.3876724243164, -5.241086959838867, -18.85101318359375, 21.808128356933594, -79.00047302246094, -3.803089141845703, 34.23906707763672, 46.2161750793457, 88.76590728759766, -39.85832214355469, 54.94384765625, -24.608470916748047, -10.558023452758789, 150.3783416748047, 33.04079055786133, 37.59165954589844, 22.389362335205078, 52.26076126098633, 22.24742889404297, -9.572202682495117, 128.492919921875, -1.4572620391845703, 12.116052627563477, 3.442291259765625, -13.206001281738281, 55.410545349121094, 103.75776672363281, 12.04507827758789, -50.31842041015625, 129.85687255859375, 145.35446166992188, 1.172780990600586], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000128.npy"} +{"epoch": 0.18795888399412627, "step": 129, "batch_size": 64, "mean": 44.24713134765625, "std": 84.6582260131836, "min": -143.8267822265625, "p10": -43.857168197631836, "median": 31.81517219543457, "p90": 145.1571304321289, "max": 366.42816162109375, "pos_frac": 0.671875, "sample": [2.589620590209961, 32.9114875793457, -44.71953201293945, 110.64881896972656, -48.42308807373047, 122.4066162109375, 9.238349914550781, -41.84498596191406, 169.72616577148438, 26.78441619873047, -12.775238037109375, 106.68450927734375, 26.91397476196289, 45.748321533203125, 58.54249954223633, 73.32524108886719, 145.38967895507812, -38.961822509765625, -29.877593994140625, 53.5382080078125, 220.2698974609375, 87.44638061523438, 220.69454956054688, -97.3742446899414, 87.05934143066406, -88.80789184570312, -45.08143615722656, 40.26701736450195, 366.42816162109375, 141.29217529296875, 28.87376594543457, 19.559967041015625, 1.259979248046875, 77.61170196533203, -11.50799560546875, -16.980701446533203, 30.718856811523438, 122.10250854492188, 29.252967834472656, -16.478317260742188, -74.69718170166016, -7.231512069702148, 49.450927734375, 80.85285949707031, 144.61451721191406, -143.8267822265625, 17.742700576782227, 20.64092254638672, -8.987546920776367, -6.997249603271484, 138.00115966796875, 173.8178253173828, 39.646766662597656, 47.36297607421875, 70.86164855957031, -15.701858520507812, 43.975311279296875, 55.334503173828125, -38.43450164794922, 86.65399932861328, 148.14944458007812, -29.575599670410156, 76.2066421508789, -0.4958019256591797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000129.npy"} +{"epoch": 0.1894273127753304, "step": 130, "batch_size": 64, "mean": 66.64889526367188, "std": 84.98664855957031, "min": -110.17389678955078, "p10": -20.95257949829101, "median": 46.01510238647461, "p90": 195.54732666015627, "max": 327.4662170410156, "pos_frac": 0.78125, "sample": [74.14311218261719, 39.257896423339844, 30.2291259765625, 186.57537841796875, -15.702445983886719, 11.830398559570312, 200.85836791992188, 80.97039031982422, -53.400245666503906, -7.779638290405273, 15.922788619995117, 59.825923919677734, 34.323360443115234, 188.35092163085938, 205.03903198242188, 23.00115203857422, 93.99772644042969, -8.081140518188477, 36.74986267089844, 72.73160552978516, -11.98135757446289, 32.19758605957031, 7.982582092285156, 87.54350280761719, 92.0772705078125, -4.841499328613281, 33.314666748046875, 91.67399597167969, 57.685874938964844, 179.07733154296875, 142.3121337890625, 215.24908447265625, -33.440467834472656, 27.855697631835938, 148.2082977294922, -104.83761596679688, 33.94114685058594, 29.514862060546875, 34.90743637084961, -23.20263671875, 61.18055725097656, 52.43659591674805, 74.68010711669922, 241.1837615966797, 155.51934814453125, -7.762763977050781, 327.4662170410156, -40.239051818847656, -3.8189563751220703, 87.3604965209961, 33.25653839111328, 39.59360885620117, 198.63150024414062, -32.17736053466797, 146.16029357910156, -110.17389678955078, 131.8668212890625, 1.8132801055908203, 123.68524169921875, 114.1156005859375, 214.11312866210938, 57.90028381347656, 32.964820861816406, 61.69132995605469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000130.npy"} +{"epoch": 0.19089574155653452, "step": 131, "batch_size": 64, "mean": 66.88613891601562, "std": 73.83077239990234, "min": -150.745849609375, "p10": -0.9118213653564453, "median": 50.76441192626953, "p90": 161.0794677734375, "max": 268.0539855957031, "pos_frac": 0.875, "sample": [50.671485900878906, 45.73102569580078, 50.857337951660156, 29.065448760986328, 268.0539855957031, -150.745849609375, 57.494937896728516, 40.072731018066406, 31.326438903808594, 104.98413848876953, 108.09930419921875, 20.369873046875, 61.28289794921875, 13.323423385620117, 99.79534912109375, 35.81904602050781, 41.81603240966797, -1.9133739471435547, 139.78372192382812, 75.35658264160156, 42.923118591308594, 158.13424682617188, -12.669319152832031, -12.629457473754883, 50.888519287109375, 42.59715270996094, 84.66783142089844, 7.496063232421875, 202.76934814453125, 39.6044807434082, 41.13534164428711, -84.38619995117188, 162.34170532226562, 9.72296142578125, 226.15029907226562, 4.211210250854492, 100.43233489990234, 29.64214324951172, 139.25987243652344, 47.246315002441406, 127.70156860351562, 31.271665573120117, 207.32418823242188, 27.02365493774414, -92.03451538085938, 125.60549926757812, 6.8065643310546875, 105.51616668701172, -0.9300575256347656, 25.11556625366211, -0.8692703247070312, 137.2013397216797, 9.182395935058594, 175.9803924560547, 153.99789428710938, 165.8990478515625, 89.39884948730469, 66.43673706054688, 29.70473861694336, 79.99937438964844, 129.05245971679688, 95.18366241455078, 99.82186889648438, 85.54093933105469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000131.npy"} +{"epoch": 0.19236417033773862, "step": 132, "batch_size": 64, "mean": 54.898109436035156, "std": 87.75269317626953, "min": -157.57757568359375, "p10": -17.226745414733884, "median": 42.74197959899902, "p90": 146.60068664550784, "max": 374.057861328125, "pos_frac": 0.796875, "sample": [13.698753356933594, 10.271419525146484, 148.92132568359375, -61.33917999267578, 68.39337158203125, 37.07658386230469, 64.85995483398438, 289.3802185058594, 315.1575927734375, 7.772727966308594, 71.37637329101562, 108.69886016845703, 21.25434684753418, 76.93858337402344, -157.57757568359375, 5.1299285888671875, 220.1453857421875, -36.29956817626953, 65.80107879638672, -14.948793411254883, 1.8960094451904297, -12.194190979003906, 129.56314086914062, 67.58480834960938, 20.60918426513672, -18.20301055908203, -0.030374526977539062, 114.23509979248047, 8.790939331054688, 4.8127593994140625, -13.867523193359375, -12.71380615234375, 19.1331729888916, 93.39441680908203, 80.8216552734375, 147.8988037109375, 51.44226837158203, 68.12718200683594, 78.26217651367188, 50.03322982788086, 55.20643615722656, 33.97700119018555, 29.93265151977539, 143.57174682617188, -106.64505004882812, 48.40737533569336, 121.1141128540039, -67.46979522705078, 13.47250747680664, 93.89939880371094, -19.36277961730957, 58.321807861328125, 105.29974365234375, 18.53327178955078, 56.205963134765625, -14.727630615234375, 32.519981384277344, 103.51771545410156, 27.03717041015625, 12.337200164794922, 178.98016357421875, 27.666297912597656, 53.318763732910156, 374.057861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000132.npy"} +{"epoch": 0.19383259911894274, "step": 133, "batch_size": 64, "mean": 48.50770568847656, "std": 75.95181274414062, "min": -106.50254821777344, "p10": -20.619416809082026, "median": 38.41263961791992, "p90": 145.71980285644537, "max": 250.32887268066406, "pos_frac": 0.765625, "sample": [11.450790405273438, 70.19551086425781, -9.470413208007812, 110.44021606445312, 7.905525207519531, -39.86585998535156, -7.684600830078125, 35.73150634765625, -74.80424499511719, 250.32887268066406, 51.16747283935547, 72.50774383544922, -5.9777984619140625, 104.28533172607422, -79.26629638671875, 18.523956298828125, 92.58616638183594, -7.974395751953125, 215.0245819091797, 5.683374404907227, 66.58155059814453, 83.586181640625, -12.010047912597656, 131.11236572265625, 2.4013824462890625, 105.13865661621094, 46.65257263183594, 77.24784851074219, 85.63806915283203, 69.82940673828125, 10.5792236328125, 21.01844024658203, 26.1191463470459, 242.88540649414062, -2.0299072265625, 50.90636444091797, 71.4315414428711, -68.40620422363281, 207.55691528320312, 39.321128845214844, 40.95303726196289, 37.504150390625, 53.856590270996094, 241.4422607421875, -106.50254821777344, -14.81396484375, 64.97897338867188, 176.08184814453125, 18.847732543945312, 50.889774322509766, 7.803924560546875, 121.17156982421875, -54.675537109375, 151.98013305664062, 102.30829620361328, -23.107467651367188, 65.86026000976562, 48.247467041015625, -10.140777587890625, 20.28761863708496, 7.6946563720703125, 0.4360218048095703, 9.830558776855469, 17.210914611816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000133.npy"} +{"epoch": 0.19530102790014683, "step": 134, "batch_size": 64, "mean": 51.83448028564453, "std": 68.40628051757812, "min": -45.251365661621094, "p10": -32.7641523361206, "median": 42.87419319152832, "p90": 132.65450897216797, "max": 335.3938293457031, "pos_frac": 0.8125, "sample": [16.668716430664062, -45.251365661621094, -38.964866638183594, 125.37594604492188, 199.124267578125, 34.2952880859375, 16.53670883178711, 42.626590728759766, -34.64887237548828, 50.002357482910156, 120.36476135253906, 25.092266082763672, 12.080509185791016, 3.8259048461914062, 53.13298797607422, 42.57194519042969, -28.366472244262695, 47.695220947265625, 78.79133605957031, 25.592647552490234, 6.099428176879883, 10.607948303222656, 99.19393157958984, 18.7249755859375, 166.8382110595703, -2.2501983642578125, 109.9957275390625, 97.15385437011719, 54.74343490600586, 18.819293975830078, 50.74171447753906, 134.3798065185547, 1.817464828491211, 47.69788360595703, 169.15695190429688, 114.57222747802734, 128.62881469726562, -41.05005645751953, 85.29579162597656, -18.90259552001953, 59.281280517578125, -35.9244384765625, 65.54557800292969, 7.212553024291992, 335.3938293457031, 11.260379791259766, 78.5142822265625, 40.568939208984375, 73.625, -4.969264984130859, 63.99214172363281, -26.237701416015625, -34.919822692871094, 176.95169067382812, 60.658287048339844, 2.7618408203125, 60.32405090332031, 39.03977584838867, 43.121795654296875, 72.19475555419922, -39.559791564941406, 81.24671936035156, 32.52219009399414, 155.99224853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000134.npy"} +{"epoch": 0.19676945668135096, "step": 135, "batch_size": 64, "mean": 74.76806640625, "std": 94.97622680664062, "min": -89.15797424316406, "p10": -34.659814453124994, "median": 53.61056900024414, "p90": 218.41308135986333, "max": 360.62164306640625, "pos_frac": 0.8125, "sample": [30.19976043701172, 134.7310791015625, 33.0059928894043, 240.74728393554688, 129.65777587890625, 73.9731216430664, 247.2167205810547, 44.1781120300293, 245.22280883789062, 221.88572692871094, 134.0233154296875, 90.50712585449219, 191.8585205078125, 128.1136474609375, 272.86712646484375, 117.899169921875, 30.757659912109375, 205.65646362304688, 12.57058334350586, -41.513938903808594, 1.0876026153564453, 113.56199645996094, 160.8594970703125, 80.00648498535156, 39.027740478515625, 97.97537231445312, 1.1419525146484375, 360.62164306640625, -23.378002166748047, 117.5244140625, -61.29840087890625, 204.21893310546875, 210.31024169921875, -73.554443359375, 40.22994613647461, -16.99625015258789, 27.39023208618164, 57.12340545654297, 128.58160400390625, -2.67156982421875, -59.84349822998047, 54.04302978515625, 85.24189758300781, 57.97434997558594, 1.5195960998535156, 53.17810821533203, 4.176296234130859, -15.490066528320312, 47.15080261230469, 17.61638641357422, 33.316158294677734, 112.55110168457031, 93.94845581054688, 87.74516296386719, 7.093406677246094, 100.91346740722656, 44.387939453125, 231.50869750976562, 14.587425231933594, -89.15797424316406, -28.80047607421875, -37.17095947265625, -43.15907287597656, 6.505790710449219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000135.npy"} +{"epoch": 0.19823788546255505, "step": 136, "batch_size": 64, "mean": 49.48542022705078, "std": 93.52610778808594, "min": -77.690185546875, "p10": -46.89946174621582, "median": 28.828155517578125, "p90": 123.77716903686523, "max": 414.677490234375, "pos_frac": 0.6875, "sample": [63.820491790771484, 74.20240783691406, 103.80743408203125, 100.7135009765625, -8.816696166992188, -49.33280563354492, 300.21600341796875, 133.0731201171875, -18.69085693359375, 65.46549987792969, 101.60490417480469, 66.05010986328125, -24.860931396484375, 13.36301040649414, 76.58926391601562, 54.410858154296875, 81.75119018554688, 34.58427429199219, 115.83851623535156, 324.4103088378906, -1.0311279296875, 72.4036865234375, 414.677490234375, 2.2528228759765625, 114.68550109863281, -0.03725433349609375, 223.99554443359375, -7.500217437744141, 260.75244140625, 25.53207778930664, 9.835945129394531, 1.9523582458496094, 58.71296691894531, -75.04489135742188, 53.68067932128906, -64.01760864257812, 19.921493530273438, -25.431442260742188, 89.64976501464844, 88.14419555664062, 10.437803268432617, -16.060089111328125, -15.262619018554688, 62.60755920410156, 9.601421356201172, 2.925508499145508, -25.323246002197266, 32.12423324584961, 93.44119262695312, 123.54505157470703, -47.050758361816406, -64.42195892333984, 39.259246826171875, 77.60334014892578, -77.690185546875, 2.1385574340820312, 72.12785339355469, -21.864368438720703, -46.54643630981445, 23.898605346679688, -4.659721374511719, 123.87664794921875, 3.5702743530273438, -62.54491424560547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000136.npy"} +{"epoch": 0.19970631424375918, "step": 137, "batch_size": 64, "mean": 77.95802307128906, "std": 114.30103302001953, "min": -161.82867431640625, "p10": -27.800493621826167, "median": 51.081417083740234, "p90": 253.38302612304713, "max": 392.46124267578125, "pos_frac": 0.84375, "sample": [-44.65516662597656, 11.57132339477539, 75.4420166015625, 122.77413177490234, 154.26528930664062, 33.089149475097656, 47.24574279785156, 90.27700805664062, -22.186565399169922, 102.64602661132812, 3.701955795288086, 96.39686584472656, 387.7835693359375, 18.956506729125977, 39.55836486816406, -114.7435302734375, 112.69003295898438, 11.074142456054688, 37.003211975097656, 153.7771453857422, 159.62060546875, 81.71639251708984, 2.4948806762695312, 392.46124267578125, 111.2386474609375, 329.4830322265625, 190.00234985351562, 126.82048034667969, 18.345861434936523, 170.09939575195312, -94.82646179199219, 10.420379638671875, 70.46642303466797, 9.386329650878906, 135.42343139648438, 9.590465545654297, 128.10708618164062, 51.11383819580078, 8.900520324707031, 51.04899597167969, 64.10289001464844, 43.43470001220703, -3.9035873413085938, -161.82867431640625, -45.80242156982422, -73.02498626708984, 365.6900634765625, -30.206462860107422, 40.05803680419922, 297.7340393066406, 54.18719482421875, 129.817138671875, 3.6495437622070312, 94.46035766601562, -17.347753524780273, 29.097749710083008, 52.21382141113281, 9.59372329711914, 0.3145313262939453, 106.08202362060547, 310.1812744140625, 36.31074523925781, 125.37210845947266, 280.5461730957031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000137.npy"} +{"epoch": 0.2011747430249633, "step": 138, "batch_size": 64, "mean": 59.33885192871094, "std": 73.00718688964844, "min": -114.8970947265625, "p10": -17.728237152099606, "median": 43.85134315490723, "p90": 151.6490127563477, "max": 279.9593505859375, "pos_frac": 0.78125, "sample": [159.50167846679688, -13.23321533203125, 73.83746337890625, 138.51199340820312, 6.743072509765625, 115.68904876708984, -18.617019653320312, -1.1247138977050781, 8.659149169921875, 174.80445861816406, 32.25532531738281, 20.373855590820312, 43.13926696777344, 11.15827751159668, 28.307905197143555, -114.8970947265625, 17.10135841369629, -22.128421783447266, -1.6357879638671875, -11.515388488769531, 190.6724853515625, 179.79049682617188, 15.18792724609375, 9.486076354980469, 119.58828735351562, 130.32774353027344, 132.5316162109375, -4.1106109619140625, 95.13944244384766, 139.05360412597656, 56.624267578125, 29.281476974487305, 128.5288848876953, 7.48883056640625, 139.54620361328125, 172.2701873779297, 75.49755859375, 44.563419342041016, -68.42843627929688, 62.28887176513672, 66.33100128173828, 28.029205322265625, 89.82847595214844, 0.615753173828125, -6.074089050292969, -54.53428649902344, 99.0897445678711, 140.5001220703125, 14.888263702392578, 38.90126037597656, 114.41725158691406, 33.96812438964844, 138.01792907714844, -34.495262145996094, -20.486486434936523, 60.764644622802734, -15.654411315917969, 72.62052917480469, 20.549270629882812, 279.9593505859375, 103.7309799194336, 58.18751525878906, 109.84484100341797, 156.42710876464844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000138.npy"} +{"epoch": 0.2026431718061674, "step": 139, "batch_size": 64, "mean": 70.53996276855469, "std": 87.66817474365234, "min": -108.65182495117188, "p10": -31.359179687499996, "median": 57.8461799621582, "p90": 164.7234588623047, "max": 374.8907775878906, "pos_frac": 0.8125, "sample": [-19.229698181152344, 156.37188720703125, 100.28099822998047, 174.9274444580078, -33.6737060546875, 67.67375946044922, 121.14110565185547, -12.234764099121094, 83.27711486816406, 147.52691650390625, -108.65182495117188, 58.61222839355469, -45.89175796508789, 276.8145751953125, 139.2911376953125, 46.59648895263672, 88.61579895019531, 57.08013153076172, 34.76782989501953, 62.991207122802734, 143.968017578125, 73.84156036376953, -71.9052505493164, 123.8735580444336, 49.55657958984375, 1.5494194030761719, 154.4517364501953, 85.46607971191406, 14.641096115112305, 40.08753204345703, 153.3822021484375, 25.830598831176758, 124.71481323242188, 116.37779998779297, 39.374114990234375, 94.54457092285156, -21.970909118652344, -1.234731674194336, 9.75373649597168, 15.861732482910156, 81.59024810791016, 1.080902099609375, 17.088348388671875, 150.50234985351562, 23.458499908447266, 271.2991943359375, 96.84900665283203, -25.9586181640625, 77.70600128173828, -47.93799591064453, 83.29241180419922, -47.864845275878906, 33.87999725341797, 86.0699691772461, 374.8907775878906, 40.13493728637695, 250.70481872558594, 39.43623352050781, 49.416038513183594, -39.20494079589844, 188.51162719726562, 30.892730712890625, 41.96625518798828, 168.30270385742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000139.npy"} +{"epoch": 0.20411160058737152, "step": 140, "batch_size": 64, "mean": 64.14215850830078, "std": 90.56130981445312, "min": -130.58775329589844, "p10": -49.99583778381348, "median": 64.58736038208008, "p90": 174.87683105468753, "max": 352.08740234375, "pos_frac": 0.734375, "sample": [-27.33707046508789, 125.84860229492188, 352.08740234375, -77.07378387451172, -13.112030029296875, 126.20585632324219, 84.95116424560547, 146.88165283203125, 59.01335906982422, 166.3990936279297, 119.24427795410156, 72.21455383300781, 114.41844177246094, 42.18519973754883, 24.943984985351562, 85.65824890136719, 275.5821533203125, 12.33224868774414, 5.880435943603516, 76.09062957763672, 20.284439086914062, 65.3894271850586, 86.13496398925781, 136.09341430664062, -2.1944351196289062, -58.08671569824219, 256.15069580078125, -51.93510437011719, 37.72174072265625, 80.38798522949219, 23.3339900970459, 59.92894744873047, 8.26099967956543, 115.57498168945312, -32.596923828125, 64.55636596679688, 93.79353332519531, 64.61835479736328, -9.651824951171875, 162.53750610351562, 133.26361083984375, -34.063995361328125, 178.51014709472656, -130.58775329589844, -1.0319366455078125, 63.62799072265625, 62.02961730957031, 92.98213195800781, 189.02349853515625, 25.051734924316406, -9.855775833129883, 155.6230010986328, -14.645952224731445, 21.192245483398438, -89.12168884277344, -50.468875885009766, -72.74842071533203, 68.13517761230469, 184.94512939453125, 181.38870239257812, -48.89208221435547, 87.38377380371094, 84.23551940917969, 136.40570068359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000140.npy"} +{"epoch": 0.2055800293685756, "step": 141, "batch_size": 64, "mean": 60.85835647583008, "std": 96.27452850341797, "min": -244.26358032226562, "p10": -47.03740921020507, "median": 59.39303398132324, "p90": 184.0755676269532, "max": 299.31732177734375, "pos_frac": 0.765625, "sample": [61.317474365234375, 40.405303955078125, -14.87496566772461, 103.99492645263672, 85.22883605957031, -33.11695098876953, 110.12603759765625, 62.705902099609375, 1.1693649291992188, -7.457389831542969, 57.46859359741211, 260.32342529296875, -244.26358032226562, 50.869178771972656, -6.3417816162109375, 34.67927551269531, 0.0774993896484375, -51.9285888671875, 268.44439697265625, -62.19514465332031, -35.624656677246094, 202.3866424560547, 48.72331619262695, 84.91107177734375, 145.43536376953125, 81.21739196777344, 106.90803527832031, 2.8755264282226562, -59.878326416015625, 73.30067443847656, 111.47706604003906, 78.19598388671875, 33.942230224609375, 170.09103393554688, 253.75692749023438, 27.015649795532227, 117.59581756591797, 24.847274780273438, -20.328771591186523, 17.603832244873047, -24.70555877685547, 48.38990783691406, -53.240867614746094, 190.06893920898438, 5.662336349487305, 139.31039428710938, 196.220947265625, 122.16761779785156, 11.663461685180664, -164.39901733398438, 19.27050018310547, 96.09379577636719, 63.66682434082031, 91.76741027832031, 116.87615966796875, 133.90911865234375, 299.31732177734375, 128.79153442382812, -0.7142620086669922, 162.75331115722656, 70.95167541503906, 105.90921020507812, -51.950233459472656, 6.070159912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000141.npy"} +{"epoch": 0.20704845814977973, "step": 142, "batch_size": 64, "mean": 69.63619995117188, "std": 107.96939849853516, "min": -212.34603881835938, "p10": -43.39850845336913, "median": 37.03660583496094, "p90": 239.59020385742187, "max": 294.63739013671875, "pos_frac": 0.765625, "sample": [-21.70842742919922, 263.4952697753906, 33.55150604248047, 43.226112365722656, -31.433273315429688, 244.59458923339844, -27.433467864990234, -22.42462730407715, -90.964599609375, 32.612613677978516, 136.78155517578125, 96.7135009765625, 161.26373291015625, 30.341806411743164, 38.29601287841797, 223.29713439941406, 14.587955474853516, -212.34603881835938, -151.19680786132812, 74.35883331298828, 20.961952209472656, 32.16282653808594, 17.078716278076172, 294.63739013671875, 167.48387145996094, 35.777198791503906, 147.36541748046875, 26.834257125854492, 145.2664794921875, -21.53009796142578, 108.35263061523438, 121.50483703613281, 240.2198028564453, 83.47425079345703, -1.898468017578125, 208.66880798339844, 216.95240783691406, 17.11532211303711, 20.909292221069336, 42.45973205566406, -62.81233215332031, -50.81142044067383, 3.1866836547851562, 238.1211395263672, 69.2642593383789, 246.56504821777344, 14.20547103881836, 21.52027130126953, 71.6627197265625, -48.526466369628906, 57.91012954711914, -19.826644897460938, 6.71806526184082, -82.61714172363281, 193.96031188964844, 248.8304443359375, 27.222930908203125, 195.26889038085938, 26.698577880859375, -7.974020004272461, 127.4983139038086, 255.85736083984375, 39.740150451660156, 125.64398956298828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000142.npy"} +{"epoch": 0.20851688693098386, "step": 143, "batch_size": 64, "mean": 44.3311882019043, "std": 94.11837005615234, "min": -154.46664428710938, "p10": -55.794631195068355, "median": 28.741188049316406, "p90": 146.9894561767578, "max": 332.42822265625, "pos_frac": 0.65625, "sample": [83.85661315917969, -7.4458465576171875, 54.195152282714844, 24.793930053710938, 280.78369140625, 91.80457305908203, -57.816436767578125, 100.43743133544922, 106.64947509765625, 23.356409072875977, 145.91268920898438, -83.96920776367188, -56.515098571777344, 136.55233764648438, 83.57758331298828, -127.92499542236328, -48.048675537109375, -2.3018569946289062, 104.6448974609375, -3.936859130859375, 34.32061004638672, 255.7193603515625, -66.42009735107422, 98.72380065917969, 18.701934814453125, -17.005401611328125, 45.4134521484375, 147.450927734375, 30.068458557128906, 16.44791030883789, -11.995452880859375, -47.569244384765625, -54.11354064941406, 5.029439926147461, 136.28526306152344, 4.779327392578125, 44.93561553955078, 66.95441436767578, 26.811981201171875, 332.42822265625, -35.579750061035156, -13.043556213378906, 35.1865234375, 83.84286499023438, -3.0336246490478516, 56.377471923828125, 155.26193237304688, 138.44342041015625, -6.824981689453125, -46.51446533203125, -91.80433654785156, 288.6719970703125, -34.28263854980469, 79.40673065185547, 67.01463317871094, 31.70103645324707, 110.27629089355469, 27.413917541503906, 180.18408203125, -154.46664428710938, -11.095664978027344, 42.18034362792969, 3.9459762573242188, 18.361648559570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000143.npy"} +{"epoch": 0.20998531571218795, "step": 144, "batch_size": 64, "mean": 54.54907989501953, "std": 79.60639190673828, "min": -111.27597045898438, "p10": -33.957157135009766, "median": 46.890663146972656, "p90": 172.07891235351562, "max": 243.69369506835938, "pos_frac": 0.734375, "sample": [-34.354278564453125, 111.54488372802734, -5.134702682495117, 172.07284545898438, -105.77982330322266, -19.528221130371094, 71.29181671142578, 120.19506072998047, 46.438316345214844, 10.43221664428711, 65.01214599609375, -37.93246078491211, 73.00907897949219, 104.30690002441406, 57.87358856201172, 172.08151245117188, 51.01305389404297, 204.97409057617188, 105.5365219116211, 142.09152221679688, 180.64364624023438, 192.925537109375, -25.079782485961914, -111.27597045898438, 105.21549224853516, -26.72320556640625, 48.293434143066406, -3.7655410766601562, 69.73299407958984, 22.97079086303711, -53.721580505371094, 74.0296859741211, 67.83695983886719, 19.23333740234375, -48.25493621826172, 139.09803771972656, 48.32122039794922, 27.51089096069336, 24.56085205078125, 93.43099212646484, 32.55085754394531, 94.14856719970703, 243.69369506835938, 27.967618942260742, 26.046104431152344, 76.35786437988281, -33.030540466308594, 24.201919555664062, 19.731338500976562, 103.08360290527344, 122.05113983154297, 233.8687286376953, -3.749725341796875, -29.72320556640625, -16.496551513671875, -22.56281280517578, 222.66172790527344, 17.115245819091797, 23.62563133239746, 0.01879119873046875, 47.34300994873047, 28.544281005859375, 150.21266174316406, -46.645843505859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000144.npy"} +{"epoch": 0.21145374449339208, "step": 145, "batch_size": 64, "mean": 58.77191925048828, "std": 81.89253234863281, "min": -121.53300476074219, "p10": -42.566898727416984, "median": 46.077348709106445, "p90": 151.4165008544922, "max": 269.8736877441406, "pos_frac": 0.75, "sample": [23.566268920898438, -48.880035400390625, -96.89442443847656, 269.8736877441406, 150.66781616210938, 116.69783782958984, 250.42636108398438, 3.487812042236328, -121.53300476074219, 58.5994987487793, 4.069793701171875, 78.29945373535156, 147.94705200195312, -54.36239242553711, 10.536670684814453, 120.6658706665039, 56.83618927001953, 43.42522430419922, 120.57025146484375, -46.26476287841797, -10.624181747436523, 39.454254150390625, 43.34038543701172, 138.848876953125, 90.09188842773438, -66.00941467285156, -10.141220092773438, 30.899089813232422, -8.8253173828125, 181.49728393554688, 24.927724838256836, 12.818023681640625, 114.64085388183594, -33.93854904174805, -54.439453125, 120.64863586425781, 18.326583862304688, 21.872255325317383, 79.7257308959961, -10.313827514648438, 67.24980163574219, -7.013782501220703, 119.38089752197266, -16.596210479736328, 117.4560546875, 190.94175720214844, 104.08363342285156, -22.704910278320312, 51.04478073120117, 127.34252166748047, 30.787372589111328, 42.92241287231445, 145.78187561035156, 159.80636596679688, 58.67721176147461, 18.540225982666016, -4.349403381347656, 64.58854675292969, 134.3128662109375, 119.01306915283203, 237.32943725585938, 48.72947311401367, 11.806373596191406, 151.73736572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000145.npy"} +{"epoch": 0.21292217327459617, "step": 146, "batch_size": 64, "mean": 45.2476806640625, "std": 89.26947784423828, "min": -240.04562377929688, "p10": -70.22802505493163, "median": 60.02116012573242, "p90": 158.3383682250977, "max": 246.0668487548828, "pos_frac": 0.671875, "sample": [33.80925750732422, -35.98333740234375, 108.64041137695312, -64.73918914794922, 18.73065757751465, 2.6288833618164062, 75.6666488647461, 69.18661499023438, 78.56178283691406, 66.63970947265625, 213.03643798828125, 101.71903228759766, 43.78227233886719, -9.728012084960938, 25.278282165527344, -7.952766418457031, 94.4140853881836, -6.364532470703125, 38.289344787597656, -18.080766677856445, -5.479991912841797, 31.972915649414062, 62.28368377685547, 116.03068542480469, -104.14808654785156, 69.7757339477539, 149.3812713623047, -240.04562377929688, -72.58038330078125, 208.22500610351562, -81.80741119384766, 74.22357940673828, -6.076421737670898, 76.80340576171875, 191.5637969970703, -80.98619842529297, 48.1865234375, 61.029457092285156, 119.6664047241211, 246.0668487548828, 6.786382675170898, -5.7089691162109375, 61.14096450805664, 79.95719909667969, 61.2725830078125, 105.32357025146484, 103.46951293945312, -98.99031066894531, 59.01286315917969, -77.82958221435547, 79.49520874023438, 141.68704223632812, 162.1771240234375, 74.91209411621094, -8.178916931152344, 95.1988296508789, 36.539634704589844, -36.349159240722656, 86.31565856933594, -55.04174041748047, 227.76795959472656, -33.40986633300781, 189.07888793945312, -50.39540100097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000146.npy"} +{"epoch": 0.2143906020558003, "step": 147, "batch_size": 64, "mean": 61.400856018066406, "std": 91.30735778808594, "min": -147.00946044921875, "p10": -31.272994995117184, "median": 48.359161376953125, "p90": 192.55957336425783, "max": 295.91058349609375, "pos_frac": 0.75, "sample": [110.427490234375, -67.10455322265625, 43.52522277832031, 234.57638549804688, -29.19812774658203, -9.336540222167969, 97.34573364257812, 98.17864227294922, 51.61180114746094, -43.451316833496094, 29.95597267150879, 76.39166259765625, 60.29998016357422, 124.23625183105469, 25.213733673095703, 195.79571533203125, 115.28657531738281, 20.44283676147461, -1.2415008544921875, 185.00857543945312, 161.32801818847656, -26.04138946533203, 174.2873077392578, 23.624353408813477, 278.3216247558594, 49.40504455566406, 202.58424377441406, 110.9608154296875, -14.875898361206055, 47.31327819824219, 31.534191131591797, -135.26124572753906, 5.6396026611328125, 117.95216369628906, 105.23385620117188, -26.457382202148438, 61.81951904296875, 295.91058349609375, 78.04644012451172, 20.722400665283203, 180.05471801757812, 15.79498291015625, -12.475667953491211, 14.682456970214844, 32.56471252441406, 60.211326599121094, -11.858993530273438, 80.52548217773438, 66.21759796142578, -34.291908264160156, 18.18939208984375, 51.74411392211914, 19.817699432373047, 177.91123962402344, -26.50672149658203, 59.9261589050293, 242.54946899414062, -32.16222381591797, 31.985679626464844, -147.00946044921875, 215.9365234375, -59.68025207519531, 60.83238220214844, 44.684165954589844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000147.npy"} +{"epoch": 0.21585903083700442, "step": 148, "batch_size": 64, "mean": 52.805450439453125, "std": 83.90935516357422, "min": -105.18499755859375, "p10": -30.25096054077148, "median": 36.593475341796875, "p90": 146.58033752441406, "max": 339.08526611328125, "pos_frac": 0.796875, "sample": [-6.665958404541016, 50.56139373779297, 7.1011962890625, 83.42156982421875, -59.28496551513672, 105.4561538696289, 78.70579528808594, 39.285762786865234, -41.016441345214844, 200.58839416503906, 21.355079650878906, 143.77471923828125, 19.634244918823242, -13.323829650878906, 156.7622833251953, 256.81500244140625, 2.7370147705078125, 16.178192138671875, -40.190940856933594, -73.44721984863281, 136.0157012939453, 129.78753662109375, -11.501089096069336, 92.89224243164062, 96.78077697753906, 42.44117736816406, 65.87222290039062, 49.62784194946289, 108.84526062011719, 1.1037921905517578, 147.78274536132812, 32.426300048828125, 38.435035705566406, 71.30250549316406, -31.347869873046875, 47.22694778442383, 32.458229064941406, 17.796890258789062, 113.26041412353516, 58.91145706176758, 132.85800170898438, 35.427391052246094, -85.51924133300781, 49.591522216796875, 37.759559631347656, 23.928421020507812, 187.03448486328125, 51.89311981201172, -3.936126708984375, -27.691505432128906, -105.18499755859375, 41.401885986328125, 9.499324798583984, 6.740358352661133, 33.920127868652344, 6.762537002563477, 34.29541778564453, 339.08526611328125, 321.94866943359375, 7.67913818359375, -4.312967300415039, 0.7863006591796875, 90.78616333007812, 6.23045539855957], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000148.npy"} +{"epoch": 0.2173274596182085, "step": 149, "batch_size": 64, "mean": 65.48056030273438, "std": 82.64147186279297, "min": -187.12940979003906, "p10": -9.28653106689453, "median": 57.63277626037598, "p90": 154.80462493896485, "max": 310.38116455078125, "pos_frac": 0.828125, "sample": [113.61742401123047, 69.36297607421875, 208.39312744140625, 60.55031967163086, 108.9148941040039, 71.59739685058594, 74.69542694091797, -10.285345077514648, -18.96465301513672, -9.846675872802734, 21.340253829956055, -7.8790435791015625, 60.6342658996582, 45.02500915527344, 61.84967041015625, -67.12255859375, 92.14498901367188, 94.715087890625, 47.126068115234375, 135.05023193359375, 111.61161804199219, 175.83541870117188, 106.0239486694336, 29.24824333190918, 14.671480178833008, 100.14938354492188, 48.558990478515625, 128.07342529296875, 75.40402221679688, 135.2601318359375, 65.27013397216797, -1.6859703063964844, 310.38116455078125, 41.91725158691406, 155.52133178710938, 24.198455810546875, 225.40234375, 66.39543914794922, 28.119407653808594, 110.17243194580078, 25.193965911865234, 71.58549499511719, 11.672914505004883, 42.551361083984375, -0.43494415283203125, 54.715232849121094, 13.162555694580078, 53.425621032714844, -13.556121826171875, -187.12940979003906, 96.81231689453125, 13.643028259277344, 2.6265411376953125, 153.13230895996094, 10.759611129760742, 278.69482421875, 15.568338394165039, 41.60790252685547, 139.06378173828125, 266.47918701171875, 7.207557678222656, -7.979526519775391, 87.40090942382812, -86.89550018310547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000149.npy"} +{"epoch": 0.21879588839941264, "step": 150, "batch_size": 64, "mean": 64.3185043334961, "std": 95.61563110351562, "min": -86.17083740234375, "p10": -20.220893859863278, "median": 38.37028694152832, "p90": 172.80112152099613, "max": 422.55218505859375, "pos_frac": 0.8125, "sample": [298.5655517578125, 322.20477294921875, 20.76972198486328, 30.884857177734375, 61.52635192871094, -39.280357360839844, 184.69839477539062, 37.565887451171875, -81.22783660888672, 73.2588882446289, 422.55218505859375, -8.761667251586914, -16.157997131347656, 18.458091735839844, -9.331642150878906, 132.30430603027344, 18.737686157226562, 53.032691955566406, 48.51499938964844, 74.12467193603516, 39.174686431884766, 57.475120544433594, 149.27056884765625, 27.424903869628906, 110.03178405761719, 90.22395324707031, 194.20411682128906, 133.81626892089844, 16.65972900390625, 68.098388671875, 14.27215576171875, -48.597686767578125, 84.03229522705078, 176.29298400878906, 69.83746337890625, 8.159072875976562, 164.6534423828125, 22.00141716003418, 146.1175537109375, -21.962135314941406, 96.18589782714844, -36.28430938720703, -41.955726623535156, 6.1757354736328125, 122.71648406982422, 5.160915374755859, 29.45215606689453, 63.475563049316406, 19.43254852294922, 39.418434143066406, 57.73390197753906, 69.87274169921875, 65.61550903320312, 19.84239959716797, 30.502599716186523, 352.49322509765625, 12.963338851928711, 17.66286849975586, 11.546243667602539, 29.802093505859375, 98.54462432861328, -11.347679138183594, -86.17083740234375, -0.080047607421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000150.npy"} +{"epoch": 0.22026431718061673, "step": 151, "batch_size": 64, "mean": 64.55509185791016, "std": 80.68579864501953, "min": -145.84878540039062, "p10": -27.499513626098633, "median": 56.72659683227539, "p90": 182.42498168945312, "max": 252.0250244140625, "pos_frac": 0.828125, "sample": [-145.84878540039062, 222.18055725097656, 47.52418518066406, -3.2543563842773438, 43.53062438964844, 56.80586242675781, 68.81178283691406, 113.53555297851562, 70.89768981933594, 27.417404174804688, 182.69381713867188, -46.134674072265625, -28.26776123046875, 103.804443359375, 56.529632568359375, -45.493717193603516, -5.591312408447266, 42.39295959472656, 57.046142578125, 52.410072326660156, 231.96551513671875, 56.94431686401367, 147.3452606201172, 56.9832763671875, -55.506446838378906, 177.52218627929688, 25.834793090820312, 10.779144287109375, -25.70693588256836, 22.064483642578125, 36.20233154296875, 12.958181381225586, 19.697940826416016, 8.893728256225586, 113.09709167480469, 181.01092529296875, -64.05778503417969, 64.32633209228516, 104.5760269165039, 99.51142883300781, 8.007226943969727, 57.785858154296875, 100.14201354980469, 78.86814880371094, 11.692703247070312, 197.61740112304688, 106.69981384277344, 56.64733123779297, 121.68504333496094, -6.416862487792969, 215.7384490966797, 7.992469787597656, 2.9131107330322266, 142.11947631835938, 12.720962524414062, 15.452949523925781, 204.90426635742188, 109.83185577392578, 252.0250244140625, 181.79769897460938, -48.44849395751953, 77.63621520996094, 20.32494354248047, 108.35630798339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000151.npy"} +{"epoch": 0.22173274596182085, "step": 152, "batch_size": 64, "mean": 78.45030212402344, "std": 95.8574447631836, "min": -88.484130859375, "p10": -39.69433593749999, "median": 61.300127029418945, "p90": 217.458805847168, "max": 318.10968017578125, "pos_frac": 0.796875, "sample": [29.7049560546875, 58.22221374511719, 163.18431091308594, 174.90562438964844, 74.92831420898438, 205.15036010742188, 61.9343147277832, 146.072998046875, 318.10968017578125, 36.070396423339844, 155.43890380859375, 132.8471221923828, 36.28676223754883, -67.44741821289062, -4.7999267578125, 230.036376953125, 140.01849365234375, 50.65270233154297, 67.89938354492188, 62.84446716308594, 56.962486267089844, 53.09003448486328, -46.81907653808594, 74.83013916015625, 1.8626327514648438, 316.157958984375, 130.89559936523438, 229.1743621826172, 42.55989074707031, 22.60344696044922, 63.14524841308594, -51.07525634765625, 20.692516326904297, -7.029632568359375, 1.3106040954589844, 60.66593933105469, 19.60645294189453, 200.60757446289062, 32.05999755859375, 46.00885009765625, 288.5059509277344, -24.465652465820312, 54.9130859375, 210.35104370117188, 66.31829833984375, -32.4072265625, 240.55079650878906, 34.75640869140625, -75.9747085571289, 107.07586669921875, 3.178224563598633, 142.73947143554688, 92.50860595703125, 71.6360092163086, 220.50498962402344, -46.046539306640625, 161.95591735839844, -88.484130859375, -8.844017028808594, 116.71928405761719, -42.8173828125, -29.755203247070312, 135.73477172851562, 82.79550170898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000152.npy"} +{"epoch": 0.22320117474302498, "step": 153, "batch_size": 64, "mean": 67.03783416748047, "std": 102.10991668701172, "min": -177.0665283203125, "p10": -35.946903228759766, "median": 55.83584976196289, "p90": 200.53486633300787, "max": 413.35040283203125, "pos_frac": 0.75, "sample": [87.40528869628906, -2.150390625, 9.444442749023438, -7.052928924560547, 19.427898406982422, -10.164459228515625, 28.175861358642578, 158.10464477539062, -40.50312805175781, 189.87892150878906, 93.665283203125, 9.563499450683594, -41.7713737487793, 84.00343322753906, 245.5714874267578, -69.15013885498047, 78.21981811523438, 16.984159469604492, 55.91522979736328, 104.0960922241211, 55.7564697265625, 413.35040283203125, 41.06024932861328, 65.76296997070312, 82.94845581054688, 185.40359497070312, -25.725074768066406, -42.53279113769531, 205.10169982910156, 206.32656860351562, 140.0193634033203, 39.57648468017578, 246.778076171875, 6.34022331237793, 1.8860912322998047, 1.173980712890625, 166.229736328125, 96.92352294921875, -34.42654800415039, -36.59848403930664, 128.8219757080078, 16.406158447265625, 11.106170654296875, 270.7976379394531, 24.333648681640625, -15.277366638183594, -62.89318084716797, 63.92045593261719, 9.05426025390625, -177.0665283203125, 90.09117126464844, 360.82183837890625, 76.36860656738281, -32.46864700317383, 65.80506896972656, -4.162954330444336, 96.89019012451172, 45.53759765625, 135.06381225585938, 136.34210205078125, 77.87054443359375, -20.648799896240234, 63.635765075683594, 105.05345153808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000153.npy"} +{"epoch": 0.22466960352422907, "step": 154, "batch_size": 64, "mean": 64.08331298828125, "std": 106.6012954711914, "min": -205.96124267578125, "p10": -54.61430282592774, "median": 56.99863052368164, "p90": 193.59736633300784, "max": 302.6189270019531, "pos_frac": 0.65625, "sample": [-9.88703727722168, 57.49940490722656, -205.96124267578125, 24.916505813598633, 302.6189270019531, 56.49785614013672, -11.487634658813477, 152.03350830078125, 178.22555541992188, -54.51351547241211, 93.73035430908203, 234.31260681152344, -89.83499145507812, 128.97665405273438, 119.41500854492188, 62.13457107543945, 2.4511051177978516, 23.32598876953125, 36.64101028442383, -4.649662017822266, 113.02639770507812, 58.3194694519043, -6.702770233154297, 64.960693359375, -27.445831298828125, 188.88058471679688, -54.65749740600586, 210.78195190429688, 153.03152465820312, -90.43986511230469, -37.540977478027344, 178.63262939453125, -75.3308334350586, 169.33114624023438, -52.901031494140625, 200.86973571777344, 298.1790771484375, 171.73431396484375, -3.7738189697265625, 179.43626403808594, 195.57875061035156, 118.89157104492188, 18.770626068115234, 78.43844604492188, 188.97413635253906, -11.585037231445312, -17.474884033203125, -8.213624954223633, -91.89745330810547, 119.19099426269531, 157.8840789794922, -23.802276611328125, 3.994779586791992, 29.026611328125, 10.986007690429688, -80.10240173339844, 111.48617553710938, 141.34463500976562, 121.00789642333984, -51.31964111328125, -7.86041259765625, 1.1787548065185547, 82.88063049316406, 279.11761474609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000154.npy"} +{"epoch": 0.2261380323054332, "step": 155, "batch_size": 64, "mean": 64.64971923828125, "std": 95.74845886230469, "min": -212.2616729736328, "p10": -24.976444625854484, "median": 63.63713836669922, "p90": 170.57701721191415, "max": 356.83038330078125, "pos_frac": 0.734375, "sample": [-8.083381652832031, 91.2346420288086, 58.509971618652344, -7.42242431640625, 91.96157836914062, 13.481781005859375, 80.68283081054688, -28.271530151367188, 74.58609771728516, 22.36745834350586, -8.187271118164062, 65.18678283691406, 102.79256439208984, -81.21002197265625, 110.52153778076172, 73.60395050048828, -27.95828628540039, 48.433563232421875, 62.087493896484375, -0.370391845703125, 76.38394165039062, 67.86483764648438, 41.89178466796875, 356.83038330078125, 188.689208984375, 8.891447067260742, 137.1332550048828, 104.01384735107422, 9.067481994628906, -129.37196350097656, 24.16043472290039, -1.7895965576171875, 23.150856018066406, 95.11141967773438, 189.81289672851562, -212.2616729736328, 282.50213623046875, 46.533607482910156, 137.35336303710938, -15.358146667480469, 2.434467315673828, 96.27340698242188, 148.6998291015625, 44.80303955078125, 282.5453186035156, 98.35501098632812, 285.5980529785156, 151.797607421875, -76.01101684570312, 102.5621337890625, -0.9167327880859375, 178.62533569335938, -13.20615005493164, 113.26343536376953, 89.96426391601562, -49.079490661621094, 61.22590637207031, 131.6688232421875, 124.9864273071289, 12.811725616455078, 136.58226013183594, -4.034730911254883, -18.018814086914062, 72.09554290771484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000155.npy"} +{"epoch": 0.2276064610866373, "step": 156, "batch_size": 64, "mean": 37.15288543701172, "std": 108.95697021484375, "min": -283.4932861328125, "p10": -97.74614868164062, "median": 23.362640380859375, "p90": 137.91751861572266, "max": 290.43695068359375, "pos_frac": 0.734375, "sample": [135.0177459716797, 58.966278076171875, 290.43695068359375, 258.3602294921875, 176.72735595703125, 120.79803466796875, -200.26724243164062, 2.40771484375, -86.67803955078125, 20.492149353027344, -200.25599670410156, 133.69076538085938, 99.7439956665039, -23.7413330078125, -64.91392517089844, -139.0936279296875, 41.03180694580078, 106.38809204101562, 253.2207489013672, 77.65397644042969, 21.126937866210938, 122.16644287109375, 50.141510009765625, 17.62118911743164, 3.0261173248291016, -6.730094909667969, 15.698606491088867, 31.06580352783203, 258.44818115234375, -16.22643280029297, 244.73028564453125, 139.1602783203125, 122.33340454101562, 71.2939224243164, 10.975799560546875, 7.284666061401367, 76.57847595214844, -126.0819091796875, -283.4932861328125, -80.0125503540039, 81.90335845947266, -54.05492401123047, 1.4122772216796875, 128.04359436035156, 11.552413940429688, -102.4896240234375, 84.4781265258789, 118.47112274169922, 76.00785827636719, 8.590593338012695, -4.780632019042969, 77.20198059082031, 59.37751007080078, 54.952247619628906, -109.24581146240234, -49.00856018066406, 7.2607574462890625, 20.412582397460938, 4.462921142578125, -4.3916168212890625, 25.598342895507812, 76.8227310180664, 7.651622772216797, 118.46295166015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000156.npy"} +{"epoch": 0.2290748898678414, "step": 157, "batch_size": 64, "mean": 82.09357452392578, "std": 106.38275909423828, "min": -306.0540771484375, "p10": -28.342701148986816, "median": 73.64865112304688, "p90": 200.03189392089848, "max": 389.4761962890625, "pos_frac": 0.828125, "sample": [161.04061889648438, 167.76483154296875, 87.32076263427734, 192.2500762939453, 131.56710815429688, 10.322914123535156, 77.9969482421875, 147.61737060546875, 4.798896789550781, 152.39035034179688, 57.704837799072266, 212.62033081054688, 51.50694274902344, 91.11224365234375, 225.18206787109375, 203.36695861816406, 151.10757446289062, 107.7701416015625, 42.68257141113281, 161.97793579101562, 11.929359436035156, 39.00837707519531, 69.30035400390625, -29.01201629638672, 114.26910400390625, 251.96043395996094, -4.746040344238281, 31.886829376220703, -2.21282958984375, 115.67726135253906, -306.0540771484375, -40.500816345214844, 27.661331176757812, 52.39885711669922, 125.43904113769531, 82.07254791259766, 253.00936889648438, 129.119873046875, 108.00993347167969, -33.63114929199219, 29.037479400634766, 285.4210510253906, 34.46831512451172, 64.43445587158203, 8.323932647705078, 178.8129119873047, 107.31468200683594, -26.78096580505371, 171.80941772460938, 126.20561218261719, 188.37493896484375, 46.90437316894531, -58.98466110229492, 20.67901611328125, 187.11431884765625, 158.02484130859375, 389.4761962890625, 58.443809509277344, 46.33189392089844, -141.868408203125, 17.637969970703125, -63.518829345703125, 13.80514144897461, -21.165924072265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000157.npy"} +{"epoch": 0.2305433186490455, "step": 158, "batch_size": 64, "mean": 87.40489959716797, "std": 133.1334228515625, "min": -180.47567749023438, "p10": -46.25967750549316, "median": 54.35513687133789, "p90": 279.46174011230477, "max": 486.6083068847656, "pos_frac": 0.71875, "sample": [8.39107894897461, 10.902673721313477, 43.38999938964844, 51.04541015625, 53.56141662597656, -30.52568817138672, 33.03133773803711, 87.01850128173828, 36.62632751464844, -50.686466217041016, 213.511962890625, -115.0401611328125, 251.72679138183594, 81.9052963256836, -12.367462158203125, -57.367523193359375, -14.73713493347168, 104.76788330078125, -12.077322006225586, 15.943199157714844, 232.5135498046875, -36.735382080078125, 288.6636962890625, -180.47567749023438, 306.41656494140625, 74.56583404541016, 144.30747985839844, 20.541414260864258, 36.91795349121094, 13.028629302978516, 79.12629699707031, 87.5238265991211, -20.73406982421875, 157.46653747558594, -55.36789321899414, -3.3957977294921875, 343.01885986328125, 103.6924819946289, -120.28150177001953, 334.7313232421875, 419.67852783203125, -3.3037872314453125, 67.98184967041016, 486.6083068847656, 18.600494384765625, 12.428802490234375, -50.34151840209961, 121.63232421875, 237.52206420898438, 110.81958770751953, 379.04901123046875, 170.06649780273438, 101.66166687011719, 174.2176055908203, -14.961027145385742, 178.517333984375, 104.11421203613281, -8.455825805664062, 26.72960662841797, 101.5287857055664, 257.9905090332031, 55.14885711669922, 161.9903564453125, -19.85470962524414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000158.npy"} +{"epoch": 0.23201174743024963, "step": 159, "batch_size": 64, "mean": 59.80369567871094, "std": 106.1305160522461, "min": -209.13551330566406, "p10": -57.67332382202149, "median": 59.89497375488281, "p90": 196.59541625976564, "max": 319.7441101074219, "pos_frac": 0.75, "sample": [11.798843383789062, 41.29871368408203, 142.6573028564453, -49.45042419433594, 250.99441528320312, 136.82968139648438, -209.13551330566406, -39.48856735229492, 125.97526550292969, 166.83270263671875, 91.48747253417969, -57.84953308105469, 14.787797927856445, -22.430084228515625, 90.91651916503906, -71.25882720947266, -117.20559692382812, 52.81682205200195, 186.75372314453125, 145.07257080078125, 319.7441101074219, 9.207141876220703, -47.702789306640625, 18.9053955078125, 29.622718811035156, -74.74482727050781, 20.48670196533203, -54.75140380859375, 88.30764770507812, 54.08677673339844, -21.97130584716797, 199.02456665039062, 89.09809112548828, 14.549671173095703, 117.55518341064453, -23.13899040222168, 60.423919677734375, -100.07623291015625, -4.432807922363281, 225.03787231445312, 142.7142333984375, 223.28109741210938, 62.672882080078125, 71.93079376220703, 1.3066635131835938, 274.4764709472656, 13.12852668762207, 1.6833667755126953, 72.98451232910156, 59.36602783203125, 134.78475952148438, 29.68445587158203, -57.262168884277344, -167.2371826171875, 71.0155029296875, 99.22161865234375, 97.51932525634766, 95.6416015625, 29.280466079711914, 305.8613586425781, 190.92739868164062, 68.63027954101562, 69.84251403808594, 125.34733581542969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000159.npy"} +{"epoch": 0.23348017621145375, "step": 160, "batch_size": 64, "mean": 80.51261901855469, "std": 131.21035766601562, "min": -152.8600311279297, "p10": -69.3838020324707, "median": 65.3844985961914, "p90": 248.41092224121098, "max": 466.6248779296875, "pos_frac": 0.671875, "sample": [105.20232391357422, -11.262327194213867, 189.70867919921875, 159.1498260498047, -13.48819351196289, -70.99128723144531, 194.95556640625, 186.0095672607422, 170.89065551757812, 13.636215209960938, -80.62036895751953, -5.849090576171875, 143.4285125732422, 240.29489135742188, 121.66114044189453, -72.81153869628906, 38.81503677368164, -37.71788024902344, -145.81216430664062, 230.4324493408203, 282.510498046875, -119.38990783691406, 251.88922119140625, -63.44342803955078, 145.38424682617188, 44.121673583984375, 5.390892028808594, -12.631149291992188, 466.6248779296875, 64.89567565917969, 14.382654190063477, 65.87332153320312, -30.669769287109375, 395.14190673828125, -1.67510986328125, 74.28221130371094, 107.92784881591797, 164.1731719970703, 380.344970703125, 305.41412353515625, 31.44710922241211, 220.04443359375, -152.8600311279297, 207.03128051757812, 93.65630340576172, 14.891326904296875, -31.007158279418945, 17.610605239868164, 115.84957122802734, 111.6920166015625, -38.94928741455078, 34.09966278076172, 112.44563293457031, -65.63300323486328, 93.6822509765625, 78.94389343261719, -11.210458755493164, -0.4527759552001953, -96.56273651123047, 160.4774169921875, 31.758712768554688, 258.07806396484375, -43.14970397949219, 114.74461364746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000160.npy"} +{"epoch": 0.23494860499265785, "step": 161, "batch_size": 64, "mean": 70.55889129638672, "std": 105.04612731933594, "min": -134.43753051757812, "p10": -48.2220474243164, "median": 48.69301414489746, "p90": 235.35553436279304, "max": 325.6353759765625, "pos_frac": 0.765625, "sample": [77.4606704711914, 202.02749633789062, -49.53868103027344, 134.5590057373047, 303.1046447753906, -134.43753051757812, -53.287322998046875, -67.49325561523438, 99.38511657714844, -44.22106170654297, 43.10150146484375, 98.46245574951172, 79.58433532714844, -72.02006530761719, 3.797435760498047, 122.76863098144531, 325.6353759765625, 85.38706970214844, 139.62860107421875, 268.18096923828125, 15.442298889160156, 55.55350875854492, -7.22186279296875, -35.7234992980957, 34.02827453613281, -9.372516632080078, 128.772705078125, 38.41901397705078, 279.9692077636719, 302.7119445800781, 125.78291320800781, -32.17433166503906, 0.3219108581542969, -29.066959381103516, 243.7313232421875, 111.6175765991211, 36.91743469238281, -45.14990234375, 17.150833129882812, 82.75723266601562, -13.966323852539062, 26.336753845214844, 11.691902160644531, 30.254592895507812, 14.501914978027344, 25.904754638671875, 209.86074829101562, 76.26143646240234, 15.554641723632812, 148.59317016601562, 215.81202697753906, -106.99296569824219, 76.61784362792969, 168.95501708984375, 37.61662292480469, 41.16474914550781, 33.02340316772461, 128.56072998046875, 91.50495910644531, -89.52037811279297, 54.28452682495117, 255.27484130859375, 116.14910888671875, 71.77237701416016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000161.npy"} +{"epoch": 0.23641703377386197, "step": 162, "batch_size": 64, "mean": 69.96736145019531, "std": 112.02008056640625, "min": -177.0685272216797, "p10": -44.371298217773436, "median": 61.53647232055664, "p90": 203.45760498046877, "max": 454.6783752441406, "pos_frac": 0.75, "sample": [-0.34050750732421875, 51.68807601928711, 105.09695434570312, 248.69764709472656, 61.60475158691406, 221.4638671875, 31.2498836517334, 136.92225646972656, 46.33892059326172, -7.686971664428711, -21.661203384399414, 198.84005737304688, 70.36396789550781, -111.01229095458984, -40.47212219238281, -18.681861877441406, 161.70901489257812, 166.58547973632812, -4.5296630859375, 122.15840148925781, 76.19719696044922, 28.212425231933594, 56.67805480957031, 7.4232025146484375, 21.230051040649414, 205.43655395507812, 130.75318908691406, -37.00016784667969, -177.0685272216797, 147.3805389404297, 102.40422821044922, -113.1112060546875, -15.668107986450195, 18.73868751525879, 150.82266235351562, 69.35835266113281, 233.38571166992188, 454.6783752441406, 64.19624328613281, 119.11327362060547, 85.35677337646484, 96.30400085449219, 152.04957580566406, 166.29762268066406, 145.9080047607422, 2.8889007568359375, -46.04237365722656, 0.05641937255859375, 94.97244262695312, 43.24053192138672, -84.75050354003906, 166.97589111328125, 4.6196441650390625, -94.15645599365234, 394.4644775390625, 22.885841369628906, -115.3205337524414, 30.221176147460938, 78.75163269042969, 87.50385284423828, 61.46819305419922, -32.553741455078125, 38.92195129394531, 216.35244750976562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000162.npy"} +{"epoch": 0.23788546255506607, "step": 163, "batch_size": 64, "mean": 68.33741760253906, "std": 110.75975799560547, "min": -161.59222412109375, "p10": -50.1995361328125, "median": 60.60329818725586, "p90": 258.93974609375005, "max": 353.47332763671875, "pos_frac": 0.671875, "sample": [-7.141273498535156, -8.734115600585938, 265.11138916015625, 353.47332763671875, -27.369285583496094, -124.58625793457031, 93.9194564819336, -10.068695068359375, -9.500801086425781, 80.10557556152344, 134.28375244140625, -35.84563446044922, 104.22669219970703, 321.89990234375, 107.7421646118164, 43.41698455810547, 100.63874053955078, -17.576889038085938, 14.833274841308594, -62.16051483154297, 56.953758239746094, 155.80496215820312, 109.87646484375, -34.208351135253906, 99.14558410644531, 0.7299098968505859, 122.33930969238281, -161.59222412109375, -69.22197723388672, 110.3117904663086, 268.4188232421875, 14.288105010986328, 139.54696655273438, 12.91218376159668, 67.63882446289062, -10.557008743286133, -52.40226745605469, 299.1934814453125, 128.49948120117188, 65.97769165039062, -69.71798706054688, 33.300071716308594, -45.05982971191406, 244.53924560546875, 43.52758026123047, 104.9181137084961, -32.288063049316406, 298.05340576171875, 148.5526123046875, 31.52629280090332, 60.71631622314453, 108.98295593261719, 60.49028015136719, 181.41427612304688, 76.1912612915039, -22.84160614013672, 81.9156265258789, 12.499130249023438, -97.59927368164062, -1.3405628204345703, 268.87060546875, 91.5682373046875, 165.88092041015625, -10.828346252441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000163.npy"} +{"epoch": 0.2393538913362702, "step": 164, "batch_size": 64, "mean": 83.9617919921875, "std": 105.3608627319336, "min": -152.96243286132812, "p10": -22.498446464538574, "median": 70.7585220336914, "p90": 210.04867553710943, "max": 427.15570068359375, "pos_frac": 0.84375, "sample": [-145.79869079589844, 165.735595703125, 168.21871948242188, 164.44277954101562, 227.21255493164062, 51.53064727783203, 81.88818359375, 126.21014404296875, 71.17594909667969, 75.19034576416016, 245.90225219726562, 49.48252868652344, 20.963945388793945, 51.11522674560547, -66.38925170898438, 132.951416015625, 64.205078125, 66.96712493896484, -22.862268447875977, 70.39354705810547, 4.643720626831055, 131.73245239257812, 71.12349700927734, 198.24578857421875, 31.146608352661133, 154.2598419189453, -26.27291488647461, -90.22249603271484, 146.8785400390625, 16.2722110748291, 45.15989685058594, -17.900436401367188, 153.88978576660156, 178.49575805664062, 141.60635375976562, 113.58042907714844, 49.80689239501953, 102.55961608886719, 16.6402587890625, 170.1090545654297, 38.399200439453125, 89.58656311035156, 67.37547302246094, 31.711669921875, 45.442832946777344, 9.471649169921875, 71.61297607421875, 35.52030944824219, -152.96243286132812, 138.26058959960938, 75.6522445678711, 48.66304016113281, 339.0492858886719, 7.25909423828125, 343.2734680175781, 244.88751220703125, 0.73260498046875, 215.1070556640625, 85.29676055908203, -55.82969665527344, -21.64952850341797, 104.52316284179688, 427.15570068359375, -5.275611877441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000164.npy"} +{"epoch": 0.24082232011747431, "step": 165, "batch_size": 64, "mean": 53.86854553222656, "std": 100.76016998291016, "min": -151.38934326171875, "p10": -80.43705673217774, "median": 38.56016159057617, "p90": 205.93166198730478, "max": 280.81622314453125, "pos_frac": 0.703125, "sample": [93.32916259765625, 90.90364837646484, -84.39227294921875, 19.198741912841797, -92.9344482421875, -2.6824684143066406, 76.85248565673828, 117.97055053710938, -16.151077270507812, -82.46891021728516, 128.62860107421875, -151.38934326171875, -81.36885070800781, 35.48545837402344, 34.40718078613281, -96.32714080810547, -10.463184356689453, -49.3284912109375, 160.62893676757812, 148.552490234375, 83.30101013183594, 26.51708984375, 16.514957427978516, 280.81622314453125, -146.0260772705078, 36.294952392578125, -19.410017013549805, -47.34156036376953, -4.440582275390625, 142.38858032226562, -78.26287078857422, 17.35700035095215, 214.09030151367188, 184.46035766601562, 48.452117919921875, -25.37464141845703, 81.45311737060547, 61.56785583496094, 78.2337417602539, 4.295042037963867, 40.82537078857422, 61.06262969970703, 156.6898651123047, 76.6985855102539, 216.90682983398438, -18.517013549804688, 98.69482421875, 60.464599609375, 4.595924377441406, -72.81117248535156, 30.317047119140625, 225.39938354492188, 27.09217071533203, -45.572364807128906, 60.27833938598633, 270.12823486328125, 27.548309326171875, 266.940673828125, 220.134033203125, 121.23771667480469, 114.70964050292969, 12.221473693847656, 112.30926513671875, 186.89483642578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000165.npy"} +{"epoch": 0.2422907488986784, "step": 166, "batch_size": 64, "mean": 99.76808166503906, "std": 140.90550231933594, "min": -93.13206481933594, "p10": -36.639979553222645, "median": 67.35939979553223, "p90": 275.68581848144544, "max": 567.310546875, "pos_frac": 0.796875, "sample": [458.46575927734375, 318.0750732421875, 31.326580047607422, 497.30902099609375, -40.59046173095703, 3.867115020751953, 20.100505828857422, 71.93490600585938, 174.06369018554688, 44.723724365234375, 130.7084197998047, -10.531227111816406, 11.748512268066406, 161.0897674560547, -67.25267028808594, 79.22901916503906, 161.17843627929688, 20.087203979492188, -2.084972381591797, 193.1771240234375, 4.512796401977539, 567.310546875, 41.07619094848633, -51.0707893371582, -55.919837951660156, -52.60776138305664, 39.36595916748047, 147.46041870117188, -93.13206481933594, 240.00057983398438, 290.9794921875, 471.7228088378906, 113.00089263916016, 71.36122131347656, 27.757614135742188, 366.47625732421875, 13.782981872558594, 204.3936767578125, 28.416900634765625, 156.2817840576172, -15.935565948486328, 86.63619995117188, 18.745834350585938, 11.994247436523438, 176.81024169921875, 129.13015747070312, 213.9091033935547, -5.634967803955078, 75.27725219726562, 172.14947509765625, 32.057918548583984, 86.63078308105469, -63.28961181640625, 63.35757827758789, -5.199989318847656, -27.42218780517578, 119.4989242553711, 1.7649993896484375, 118.52595520019531, 5.7782440185546875, 91.37542724609375, 17.796125411987305, 206.24954223632812, 87.1561508178711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000166.npy"} +{"epoch": 0.24375917767988253, "step": 167, "batch_size": 64, "mean": 105.04924011230469, "std": 117.84534454345703, "min": -137.65362548828125, "p10": -19.9156379699707, "median": 88.15605926513672, "p90": 255.92240447998057, "max": 435.33892822265625, "pos_frac": 0.828125, "sample": [105.78132629394531, 63.739131927490234, 147.19619750976562, 215.6769561767578, 115.3182373046875, 124.14129638671875, 91.50396728515625, 70.31278228759766, 96.58576965332031, 48.48805618286133, 435.33892822265625, 192.00936889648438, 73.75558471679688, -15.8115234375, 27.3853759765625, 62.380680084228516, 82.96517181396484, -59.51848602294922, 131.17916870117188, 187.43914794921875, 22.376718521118164, 42.005615234375, 78.95674133300781, 57.22931671142578, 107.46083068847656, -137.65362548828125, 279.86260986328125, 229.34805297851562, -21.320709228515625, 173.8180694580078, -12.358848571777344, 105.36256408691406, 77.68516540527344, 60.208831787109375, 376.66534423828125, 180.859619140625, 268.5859680175781, 84.80815124511719, 208.55409240722656, -127.7354736328125, 229.9967803955078, 267.03338623046875, 331.7766418457031, 147.89193725585938, 410.94561767578125, -16.63713836669922, 179.32003784179688, 146.50692749023438, 220.429443359375, 177.38047790527344, 99.37727355957031, 11.221237182617188, -4.405759811401367, 32.068824768066406, 63.47703552246094, 134.00579833984375, 98.6823501586914, 58.752296447753906, 22.050216674804688, -101.3241195678711, 24.292964935302734, 13.549081802368164, -52.30348205566406, -21.522705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000167.npy"} +{"epoch": 0.24522760646108663, "step": 168, "batch_size": 64, "mean": 77.36841583251953, "std": 105.36457824707031, "min": -87.0561752319336, "p10": -49.16412849426269, "median": 54.86359214782715, "p90": 201.2569549560547, "max": 425.1435546875, "pos_frac": 0.796875, "sample": [20.2237548828125, 122.12883758544922, 27.342479705810547, 22.236186981201172, 4.788488388061523, 32.547550201416016, 140.4140625, -46.81093978881836, 23.197010040283203, 12.325538635253906, -85.4559326171875, 187.11732482910156, 425.1435546875, 91.76275634765625, -50.172637939453125, 197.091796875, 179.92584228515625, 233.2145233154297, 203.04202270507812, 49.52240753173828, 220.98455810546875, 271.1435546875, 181.18234252929688, 170.21566772460938, -46.520729064941406, -55.374786376953125, -14.802513122558594, 132.05686950683594, 150.6558837890625, 134.72781372070312, 92.78382873535156, 32.70756530761719, -63.592140197753906, 98.02510070800781, 36.32963562011719, 118.36019897460938, -71.11531066894531, -10.905147552490234, -28.459110260009766, 33.32655334472656, 28.240327835083008, 92.18912506103516, 64.5655517578125, 95.05375671386719, 115.79887390136719, 67.60635375976562, 28.919279098510742, -61.24773406982422, 278.63494873046875, 58.679588317871094, 156.9958038330078, 22.278379440307617, 27.636049270629883, 27.063507080078125, 129.07833862304688, 56.90037536621094, 28.05988121032715, -87.0561752319336, 52.82680892944336, -35.35614013671875, 376.5641174316406, 19.74732208251953, 95.81056213378906, 141.27532958984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000168.npy"} +{"epoch": 0.24669603524229075, "step": 169, "batch_size": 64, "mean": 67.25150299072266, "std": 130.41305541992188, "min": -278.51068115234375, "p10": -65.56857528686523, "median": 44.435529708862305, "p90": 221.2905197143555, "max": 559.8470458984375, "pos_frac": 0.75, "sample": [77.12918853759766, 87.72942352294922, 2.6770362854003906, 17.179237365722656, 215.95896911621094, 112.43114471435547, 17.072525024414062, 159.64308166503906, 91.15335845947266, 35.6613883972168, -1.2204170227050781, 47.246238708496094, -278.51068115234375, 3.0577430725097656, 59.73982620239258, -25.207565307617188, 25.305931091308594, -83.70726013183594, 87.26791381835938, 170.46713256835938, 156.96847534179688, -53.26298522949219, 66.85647583007812, -89.35009765625, 208.29263305664062, 223.57546997070312, 63.3774299621582, 4.50823974609375, -15.904159545898438, 31.43706512451172, 209.09609985351562, -6.744117736816406, 19.5330753326416, 72.1963119506836, 92.671875, 128.6515655517578, 254.00851440429688, 25.73212242126465, 69.59909057617188, 234.3160858154297, 6.369474411010742, 386.58306884765625, -123.7557601928711, 88.98876953125, 6.238670349121094, 58.794212341308594, 29.34112548828125, -70.84239959716797, 341.67633056640625, -16.250411987304688, 559.8470458984375, -4.5849761962890625, 96.2872085571289, 14.546791076660156, 292.9928283691406, -9.090911865234375, 83.39244079589844, 17.0482177734375, 41.624820709228516, -31.71662139892578, -127.39253234863281, 92.44425964355469, 198.89439392089844, -143.973388671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000169.npy"} +{"epoch": 0.24816446402349487, "step": 170, "batch_size": 64, "mean": 79.6080093383789, "std": 107.50672912597656, "min": -230.70791625976562, "p10": -31.328610229492185, "median": 65.97804260253906, "p90": 217.2148651123047, "max": 331.7049255371094, "pos_frac": 0.828125, "sample": [189.07269287109375, -137.26397705078125, 135.63946533203125, 85.95503997802734, 216.690185546875, 82.13070678710938, 133.07638549804688, 13.48101806640625, 143.60379028320312, 4.0812225341796875, 90.2801513671875, -4.905719757080078, 76.209228515625, -36.837310791015625, 9.114522933959961, -50.20855712890625, 115.29468536376953, 237.07162475585938, 16.80401611328125, 304.0160827636719, 4.687892913818359, -22.038442611694336, 173.62806701660156, 47.85878372192383, 2.3967819213867188, 21.771766662597656, -230.70791625976562, -28.225318908691406, 73.01200866699219, 197.97360229492188, 70.1759033203125, 127.27313232421875, 57.32497787475586, 79.90666198730469, 179.75540161132812, -52.02320861816406, 312.7408752441406, 5.8968048095703125, 9.042264938354492, 331.7049255371094, 74.38697814941406, -98.11317443847656, 57.841373443603516, 118.06364440917969, 200.56494140625, -32.658592224121094, 27.865015029907227, 4.022775650024414, 38.55005645751953, 61.780181884765625, 91.82234954833984, 47.71570587158203, 238.35067749023438, 40.985713958740234, 136.00802612304688, 57.89878845214844, 187.49891662597656, 217.43972778320312, -17.234554290771484, 196.811767578125, 237.293701171875, 10.764116287231445, 194.20510864257812, 17.58913803100586], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000170.npy"} +{"epoch": 0.24963289280469897, "step": 171, "batch_size": 64, "mean": 64.2116470336914, "std": 134.4303436279297, "min": -262.27056884765625, "p10": -89.86251602172851, "median": 58.528282165527344, "p90": 212.06755676269532, "max": 585.4961547851562, "pos_frac": 0.6875, "sample": [-79.81629943847656, 219.65162658691406, 19.664710998535156, 93.28742980957031, -102.3691177368164, -49.242835998535156, 7.4203033447265625, 107.730224609375, -77.05718994140625, 138.3459014892578, 12.248374938964844, 156.39698791503906, 84.34341430664062, 109.14848327636719, 50.46794128417969, -77.64810943603516, 101.97938537597656, -98.17070007324219, 111.24620056152344, -8.112266540527344, 208.82696533203125, 106.37733459472656, 52.29148864746094, 87.005615234375, 335.5057067871094, 72.47222900390625, -98.93183135986328, 585.4961547851562, -94.16803741455078, 48.070892333984375, 64.76507568359375, -262.27056884765625, 431.319580078125, 138.25137329101562, 91.64227294921875, -114.37673950195312, 212.488525390625, 105.71611022949219, 8.115142822265625, 306.568115234375, -0.021484375, 67.07201385498047, 211.08529663085938, -23.524656295776367, -95.30079650878906, 11.9384765625, 65.01283264160156, 85.51454162597656, -60.1773796081543, 106.70347595214844, -25.73554229736328, -61.146240234375, 139.73764038085938, 40.96106719970703, 82.95095825195312, -19.564722061157227, 33.727691650390625, 159.38082885742188, -19.633766174316406, -13.43902587890625, 88.0879898071289, 17.24045181274414, 264.00823974609375, 49.98775863647461], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000171.npy"} +{"epoch": 0.2511013215859031, "step": 172, "batch_size": 64, "mean": 68.72427368164062, "std": 113.76294708251953, "min": -197.5997314453125, "p10": -71.0709014892578, "median": 74.08094787597656, "p90": 196.8456756591797, "max": 477.4154052734375, "pos_frac": 0.703125, "sample": [-61.56288146972656, 104.24256896972656, -100.6605224609375, 88.93963623046875, 37.68231201171875, -47.367454528808594, 146.36465454101562, 101.58680725097656, 74.01374053955078, -75.14576721191406, -126.10638427734375, 53.19636154174805, 51.991798400878906, -76.9537353515625, 154.29696655273438, 88.22663879394531, 20.352245330810547, 71.32305908203125, -50.05154800415039, 72.81856536865234, -33.00212860107422, 93.91018676757812, 190.58453369140625, 144.19329833984375, 20.841651916503906, 74.14815521240234, 229.30667114257812, 199.52902221679688, -76.53001403808594, 263.427734375, -28.242538452148438, 111.38176727294922, 104.91558074951172, 90.87045288085938, 119.62722778320312, 307.69805908203125, 477.4154052734375, 44.92195129394531, 127.13372039794922, 31.499454498291016, -20.26360321044922, -43.849037170410156, 77.9085693359375, 79.4219970703125, 185.96240234375, -25.67803955078125, 13.01698112487793, 24.710126876831055, -30.837677001953125, 172.86187744140625, 158.31040954589844, -4.1725921630859375, -56.422332763671875, 101.2314453125, 107.4374771118164, -197.5997314453125, 72.81813049316406, 105.1297607421875, 228.00900268554688, 98.47184753417969, -16.663291931152344, -81.3362045288086, 248.2403564453125, 180.82867431640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000172.npy"} +{"epoch": 0.2525697503671072, "step": 173, "batch_size": 64, "mean": 67.58930969238281, "std": 96.3588638305664, "min": -186.70962524414062, "p10": -27.002701950073238, "median": 50.85917091369629, "p90": 208.99430236816414, "max": 350.13751220703125, "pos_frac": 0.796875, "sample": [44.691192626953125, 78.45745086669922, 28.44734001159668, -3.1277389526367188, 0.5102996826171875, 52.15159606933594, 2.102825164794922, 139.38131713867188, 119.05183410644531, 9.458892822265625, -16.989501953125, -43.2431640625, 16.28758430480957, 0.7807693481445312, 104.52381896972656, 263.02178955078125, 70.0145492553711, 58.996158599853516, 70.55950164794922, 176.71405029296875, 115.33955383300781, 87.17535400390625, 83.80960845947266, -48.22039031982422, 53.02849578857422, 30.587547302246094, 350.13751220703125, 28.041522979736328, 163.1488037109375, 216.995361328125, 65.35507202148438, 18.08870506286621, -60.063995361328125, 116.00298309326172, 101.793701171875, -22.655929565429688, 118.55529022216797, 3.61297607421875, -2.2142276763916016, 230.6927490234375, 61.5976448059082, 190.32516479492188, -28.865604400634766, 39.77771759033203, -6.783821105957031, 17.041454315185547, 229.38754272460938, 116.54559326171875, -45.42639923095703, -1.0452880859375, 94.99334716796875, 49.56674575805664, 128.82313537597656, -68.66201782226562, -186.70962524414062, 336.168701171875, 37.03575897216797, 5.491565704345703, 93.10604858398438, 20.121295928955078, 270.5169372558594, 61.04972839355469, 46.622501373291016, 44.036224365234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000173.npy"} +{"epoch": 0.2540381791483113, "step": 174, "batch_size": 64, "mean": 79.87288665771484, "std": 118.65287017822266, "min": -153.55213928222656, "p10": -52.55796585083007, "median": 59.16422653198242, "p90": 257.6055648803712, "max": 454.4465637207031, "pos_frac": 0.765625, "sample": [23.22722625732422, -6.7552642822265625, -54.79027557373047, -87.57698059082031, -4.3173828125, 14.527351379394531, -14.430980682373047, 13.069509506225586, 113.7577896118164, 203.1428985595703, 223.61131286621094, -1.4463577270507812, 63.837432861328125, 19.021146774291992, 58.65656280517578, -47.3492431640625, 113.6920166015625, 162.05613708496094, 150.59445190429688, 272.1745300292969, 58.35704040527344, 35.27022933959961, 59.67189025878906, -16.10898208618164, 330.2198181152344, -83.58013916015625, 133.78045654296875, 298.9754638671875, 29.422714233398438, 93.51612854003906, 13.12989616394043, -14.322031021118164, 61.012481689453125, -81.65950012207031, 141.08282470703125, -46.982032775878906, 166.00790405273438, 109.27477264404297, -119.42251586914062, 288.429931640625, 311.06890869140625, 328.2022399902344, 32.20647430419922, 123.47421264648438, 12.594400405883789, -153.55213928222656, 3.369901657104492, 142.16305541992188, 119.11677551269531, 145.91061401367188, 78.74639892578125, 50.08561706542969, 24.23230743408203, 141.10699462890625, -95.22782135009766, 181.59632873535156, 152.7963409423828, 95.102294921875, 47.49010467529297, 13.635128021240234, 454.4465637207031, 131.25950622558594, 70.47879028320312, 30.78354835510254], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000174.npy"} +{"epoch": 0.2555066079295154, "step": 175, "batch_size": 64, "mean": 109.91596221923828, "std": 109.30248260498047, "min": -86.94302368164062, "p10": -8.455738067626951, "median": 87.88285827636719, "p90": 241.4574737548828, "max": 435.2728271484375, "pos_frac": 0.828125, "sample": [21.749181747436523, 37.54454803466797, 57.82658386230469, -86.94302368164062, 87.09626770019531, 153.65634155273438, 228.62307739257812, 85.42479705810547, 23.989282608032227, 146.6823272705078, 322.950927734375, -72.92796325683594, -9.409866333007812, 312.8867492675781, 118.56472778320312, 35.4345703125, 221.67947387695312, 218.5330047607422, 238.05563354492188, 163.49069213867188, 181.06817626953125, -61.276710510253906, 201.81805419921875, 435.2728271484375, 39.53607177734375, 133.648193359375, 324.0606689453125, 49.899818420410156, 133.95343017578125, 83.34037017822266, -6.229438781738281, 54.263648986816406, 310.1455993652344, 18.47903060913086, 4.019233703613281, 38.68457794189453, -1.7557563781738281, 265.6088562011719, 8.386978149414062, 84.09288787841797, 123.7314453125, 117.34820556640625, 125.0355453491211, 222.1778564453125, -1.7021961212158203, 221.455322265625, 242.9154052734375, 183.3749237060547, -25.769702911376953, 101.75157928466797, 174.22616577148438, 94.44316864013672, 88.66944885253906, 153.64822387695312, 44.521583557128906, 72.02793884277344, 141.8016815185547, -9.769403457641602, 43.19641876220703, 235.34555053710938, -40.10504913330078, 70.69065856933594, -0.6289825439453125, 54.31170654296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000175.npy"} +{"epoch": 0.25697503671071953, "step": 176, "batch_size": 64, "mean": 91.51618957519531, "std": 121.04995727539062, "min": -63.20860290527344, "p10": -35.57849044799804, "median": 64.62747192382812, "p90": 225.26477203369143, "max": 507.38775634765625, "pos_frac": 0.78125, "sample": [6.040130615234375, 30.37567138671875, 98.85832214355469, 206.5806121826172, 68.14401245117188, 28.672077178955078, 10.887199401855469, 324.2493591308594, 17.821426391601562, -26.18365478515625, -37.170814514160156, 472.4325256347656, -15.021116256713867, -63.20860290527344, 236.32958984375, 71.27005004882812, 26.250823974609375, 219.3449249267578, 179.72323608398438, -7.346534729003906, -3.3500213623046875, 70.13260650634766, 148.4080352783203, -48.813209533691406, 227.80184936523438, 3.3029747009277344, 29.63020133972168, 194.5563507080078, 178.93096923828125, 10.296791076660156, 116.38294982910156, -60.84893798828125, 149.30291748046875, -48.981056213378906, 41.06834411621094, 171.22421264648438, -43.381587982177734, 82.22611236572266, 182.33526611328125, 139.5215606689453, 190.0406494140625, 0.1099700927734375, -41.24256896972656, -0.5920600891113281, 156.41876220703125, 140.01565551757812, 128.71481323242188, 72.88025665283203, 140.54342651367188, 285.91485595703125, 35.46128845214844, -31.863067626953125, 25.200347900390625, 13.664373397827148, 138.0795440673828, 61.110931396484375, -15.393600463867188, 12.98678207397461, 130.62338256835938, 1.3003196716308594, 167.81503295898438, 507.38775634765625, 7.854713439941406, 342.20880126953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000176.npy"} +{"epoch": 0.25844346549192365, "step": 177, "batch_size": 64, "mean": 91.91362762451172, "std": 119.79065704345703, "min": -107.15956115722656, "p10": -21.937765502929683, "median": 69.43682479858398, "p90": 245.03480834960948, "max": 587.4664306640625, "pos_frac": 0.75, "sample": [166.9478759765625, 49.52874755859375, 220.0040740966797, 122.82691192626953, 64.1234130859375, 128.19351196289062, -107.15956115722656, 90.08621215820312, 210.12832641601562, 168.23904418945312, 255.67820739746094, 45.749473571777344, 92.47411346435547, 260.2010192871094, 130.5270233154297, 84.36561584472656, 20.22356414794922, 363.6938781738281, 29.62420654296875, 169.35992431640625, 71.39874267578125, 178.0516357421875, -75.89056396484375, 150.94940185546875, -3.0220985412597656, 43.855255126953125, 201.53912353515625, -23.6158447265625, -7.7573089599609375, 5.74969482421875, 67.47490692138672, 289.12957763671875, 104.53968811035156, -16.764007568359375, 27.08538818359375, 81.67216491699219, -18.022247314453125, 258.23443603515625, 0.4106616973876953, -27.985082626342773, -77.70570373535156, 151.3128662109375, -4.214656829833984, 50.56725311279297, -80.4508056640625, 154.8612823486328, 127.03181457519531, 16.29263687133789, -11.497596740722656, -4.52239990234375, 220.20021057128906, 10.087724685668945, 82.16818237304688, -14.82476806640625, 190.6954345703125, 37.33858871459961, 37.63511657714844, -26.85261344909668, 165.18618774414062, 40.925750732421875, 587.4664306640625, 289.16632080078125, 86.4887924194336, -16.732833862304688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000177.npy"} +{"epoch": 0.2599118942731278, "step": 178, "batch_size": 64, "mean": 115.63043975830078, "std": 144.10374450683594, "min": -139.31419372558594, "p10": -36.25558395385741, "median": 96.29513549804688, "p90": 322.2686462402345, "max": 572.281005859375, "pos_frac": 0.765625, "sample": [54.10666275024414, 18.934814453125, 165.58749389648438, -12.821746826171875, 142.08099365234375, 91.3922348022461, 112.6785659790039, 38.838470458984375, 21.83417510986328, 294.10736083984375, -0.9043045043945312, 118.75372314453125, 287.0229187011719, 111.77191162109375, 51.37501525878906, 158.6597900390625, 403.3098449707031, 116.19105529785156, 572.281005859375, -55.44630432128906, 209.82371520996094, 61.22126007080078, 13.732391357421875, 66.1924819946289, -4.493280410766602, 182.98663330078125, 386.9621276855469, -50.572166442871094, 58.330230712890625, -3.5035629272460938, -78.44651794433594, 99.66055297851562, 222.30160522460938, 22.767078399658203, 130.44647216796875, 192.6047821044922, 134.77127075195312, -94.301513671875, 65.42481231689453, 260.58795166015625, 31.398571014404297, 117.27452087402344, -139.31419372558594, 446.2888488769531, -10.657159805297852, 158.33041381835938, 120.02189636230469, 160.98495483398438, -41.09436798095703, 458.1703796386719, 36.801048278808594, 142.06515502929688, 347.7118225097656, -4.885873794555664, 16.255859375, 222.8315887451172, 92.92971801757812, -24.965087890625, 64.18760681152344, 219.98468017578125, 334.3377685546875, -90.82888793945312, 186.89907836914062, -10.630134582519531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000178.npy"} +{"epoch": 0.26138032305433184, "step": 179, "batch_size": 64, "mean": 80.14815521240234, "std": 140.57762145996094, "min": -288.9915771484375, "p10": -78.82952270507812, "median": 79.91036224365234, "p90": 296.60810852050787, "max": 386.7801208496094, "pos_frac": 0.6875, "sample": [118.1596908569336, 161.3231658935547, 177.95120239257812, 386.7801208496094, 61.317481994628906, -95.35015869140625, 187.7766876220703, 23.988140106201172, -21.084213256835938, 13.683378219604492, 83.5926513671875, 82.20540618896484, 46.80413818359375, 93.59711456298828, -14.432929992675781, 97.77616882324219, -79.9471435546875, 317.9468078613281, -59.06380844116211, -50.64154052734375, 74.37906646728516, -76.22174072265625, 356.8216247558594, 25.36960792541504, 230.51670837402344, 126.0455322265625, 159.9047393798828, 300.8836669921875, 170.48171997070312, 37.94551086425781, 81.61322021484375, 286.6318054199219, 343.53753662109375, 94.244140625, 80.07225036621094, 130.20936584472656, -36.8311653137207, -32.68855667114258, -1.0793838500976562, 89.7564468383789, 73.21729278564453, -36.828582763671875, 79.74847412109375, -92.40583801269531, 128.49623107910156, 332.6499938964844, -91.79967498779297, 242.5204620361328, 115.4428939819336, -119.16204071044922, -54.02680206298828, 242.1188507080078, 31.255386352539062, 245.02679443359375, 307.9029846191406, 217.5767059326172, -171.8388671875, 138.9603271484375, -58.87049102783203, -70.19285583496094, -288.9915771484375, 15.7867431640625, 23.726308822631836, -54.80530548095703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000179.npy"} +{"epoch": 0.26284875183553597, "step": 180, "batch_size": 64, "mean": 103.84219360351562, "std": 127.082275390625, "min": -209.7353973388672, "p10": -24.787814903259278, "median": 88.10892105102539, "p90": 267.4300903320313, "max": 500.0198974609375, "pos_frac": 0.796875, "sample": [103.5105972290039, 163.63021850585938, -84.51838684082031, 3.247478485107422, -69.53398895263672, 500.0198974609375, -24.39803695678711, 68.71945190429688, 105.59915161132812, 60.317535400390625, 5.113574981689453, 231.10031127929688, -209.7353973388672, 139.25213623046875, 185.17738342285156, 120.90267944335938, 86.96788024902344, 86.49710083007812, 109.04695892333984, 65.01506042480469, 168.81521606445312, -149.03712463378906, 59.26237869262695, 295.3663024902344, 70.77997589111328, 233.68121337890625, 166.22409057617188, -3.6300125122070312, 39.044654846191406, 55.01454544067383, 282.76690673828125, 3.193490982055664, 257.917724609375, 160.015380859375, 77.48433685302734, -4.94837760925293, 69.71073150634766, 208.54598999023438, -20.03636932373047, 183.58187866210938, 89.24996185302734, 19.22724151611328, 244.1429443359375, 66.30436706542969, -132.67535400390625, 85.93852233886719, -0.713470458984375, 262.30450439453125, 245.38380432128906, 91.53162384033203, 152.3446044921875, 271.8307800292969, -24.954862594604492, 340.6273193359375, 91.33463287353516, -23.703445434570312, 197.22073364257812, 154.8586883544922, 28.32170867919922, -76.2656478881836, 269.62677001953125, 282.40399169921875, 29.75151824951172, 182.12496948242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000180.npy"} +{"epoch": 0.2643171806167401, "step": 181, "batch_size": 64, "mean": 64.73393249511719, "std": 117.56401062011719, "min": -212.371337890625, "p10": -60.10828399658203, "median": 45.760847091674805, "p90": 212.33860473632825, "max": 396.2471923828125, "pos_frac": 0.765625, "sample": [137.7688446044922, -61.423255920410156, 15.926233291625977, 63.84396743774414, 112.1402816772461, -3.442323684692383, 33.601497650146484, 27.38005828857422, 86.9144058227539, 129.00819396972656, -6.026096343994141, 16.833955764770508, 4.662223815917969, 18.30596160888672, -161.8089599609375, 224.58270263671875, 115.76100158691406, -29.32396697998047, 82.13848876953125, -18.65555191040039, 38.00368118286133, -59.73834228515625, -16.4208984375, 227.1697540283203, -82.32810974121094, 88.66976928710938, 135.93453979492188, 287.251953125, 47.18073272705078, 56.38688659667969, 137.7940673828125, 81.41515350341797, -206.08995056152344, 47.251792907714844, 23.034332275390625, 84.69497680664062, 12.457597732543945, 183.57936096191406, 130.019775390625, -60.26683044433594, 15.82244873046875, -212.371337890625, 227.873046875, -18.763538360595703, 353.99359130859375, 170.6468963623047, -159.0384063720703, 52.06696319580078, -31.055198669433594, 137.34609985351562, 176.76138305664062, 396.2471923828125, 156.53179931640625, 40.27798080444336, 135.17274475097656, 7.381877899169922, 5.031976699829102, 2.3440780639648438, 289.2879638671875, 44.34096145629883, 29.684181213378906, 183.76904296875, 152.37890625, 43.05298614501953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000181.npy"} +{"epoch": 0.2657856093979442, "step": 182, "batch_size": 64, "mean": 114.658935546875, "std": 139.10092163085938, "min": -248.81594848632812, "p10": -54.9115077972412, "median": 105.5335464477539, "p90": 288.75898437500007, "max": 398.6893310546875, "pos_frac": 0.828125, "sample": [292.9827880859375, 78.73104858398438, 326.6080322265625, 201.73385620117188, 87.989013671875, 225.1903533935547, 240.15728759765625, 278.9034423828125, 354.04241943359375, 73.68341064453125, 128.51365661621094, 46.49259567260742, 28.160402297973633, -16.302783966064453, 275.89990234375, -248.81594848632812, 237.01107788085938, 65.49613952636719, 5.108518600463867, 48.68595886230469, 398.6893310546875, -3.441099166870117, 262.7229919433594, 121.03266906738281, 156.55426025390625, -61.099510192871094, 254.70620727539062, 90.034423828125, 14.744319915771484, 67.95519256591797, 148.85568237304688, 277.6973876953125, -116.69710540771484, -127.59160614013672, 166.84402465820312, 244.15283203125, 48.513336181640625, 147.10690307617188, 226.83071899414062, 235.33018493652344, -65.97342681884766, 76.14275360107422, 33.51329040527344, 136.52085876464844, 162.77391052246094, 179.38742065429688, 185.80023193359375, 128.1341094970703, -133.6134796142578, 5.196403503417969, 387.1581726074219, 146.9771728515625, 51.25251770019531, 9.823732376098633, -31.514373779296875, 6.141674041748047, -129.88302612304688, 71.10472106933594, 296.6849060058594, 19.179725646972656, 356.4549560546875, -40.472835540771484, 45.735023498535156, 158.43479919433594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000182.npy"} +{"epoch": 0.26725403817914833, "step": 183, "batch_size": 64, "mean": 61.56635665893555, "std": 112.04437255859375, "min": -159.7234344482422, "p10": -81.91487197875976, "median": 58.70620155334473, "p90": 196.79857788085943, "max": 355.31103515625, "pos_frac": 0.765625, "sample": [19.454299926757812, 76.65479278564453, -7.325223922729492, 56.27925109863281, 15.772151947021484, 175.9486846923828, 151.2696075439453, 141.46157836914062, 42.18890380859375, 52.75267791748047, -83.25495910644531, 141.9830322265625, 65.01518249511719, 17.051721572875977, 29.07501220703125, 88.03447723388672, 338.68585205078125, 247.5704345703125, 143.1065216064453, 28.69093132019043, 65.8365249633789, 118.71992492675781, 301.43524169921875, 98.10586547851562, -15.365486145019531, 5.241554260253906, 69.48931884765625, 61.13315200805664, 0.835174560546875, -159.7234344482422, 180.9149169921875, 85.20333099365234, -54.515106201171875, 13.48521614074707, 182.73739624023438, 70.33218383789062, 44.88228988647461, 100.4394760131836, 133.9619140625, 238.48306274414062, -135.4040985107422, -43.18439483642578, 355.31103515625, -78.78800201416016, 26.94713592529297, -36.99665832519531, 82.67940521240234, -100.27810668945312, -128.23419189453125, -110.62982177734375, 159.76519775390625, 112.28253173828125, 205.97874450683594, 202.82479858398438, 90.79096984863281, 52.847068786621094, 27.918128967285156, -73.12812042236328, 153.98886108398438, 86.64990234375, -154.58685302734375, -62.390079498291016, 23.1707763671875, 0.6651153564453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000183.npy"} +{"epoch": 0.2687224669603524, "step": 184, "batch_size": 64, "mean": 65.5010986328125, "std": 114.14273834228516, "min": -155.44125366210938, "p10": -49.804735565185545, "median": 33.50018501281738, "p90": 211.72426757812502, "max": 347.9111633300781, "pos_frac": 0.75, "sample": [29.442108154296875, -108.43221282958984, 65.34984588623047, 81.61365509033203, 204.3796844482422, 195.02020263671875, 2.971405029296875, 14.4266357421875, 161.39151000976562, -17.231712341308594, 319.4879150390625, 347.9111633300781, 63.646728515625, 219.51119995117188, -46.31969451904297, 204.11639404296875, 10.449264526367188, 214.2528076171875, 96.26113891601562, 287.9521484375, -23.61920166015625, 311.17230224609375, -88.293212890625, 16.800155639648438, 41.35942459106445, 37.210357666015625, 43.09368896484375, 5.700447082519531, 89.11306762695312, 3.908620834350586, 37.10383605957031, 98.09640502929688, 152.1560516357422, 202.01905822753906, -92.62928771972656, 205.8243408203125, 17.106971740722656, -23.985923767089844, -41.0647087097168, 24.624874114990234, 74.65047454833984, -19.713821411132812, 31.800369262695312, 6.222208023071289, 14.590141296386719, 200.0603485107422, 72.76458740234375, 55.61098861694336, -53.21612548828125, -132.3682403564453, -51.29832458496094, 24.8355770111084, -5.213230133056641, 3.4405059814453125, -155.44125366210938, 34.08213806152344, -41.423065185546875, 177.19369506835938, 32.91823196411133, 109.0103988647461, 17.498497009277344, -1.082672119140625, 99.39936828613281, 335.8515930175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000184.npy"} +{"epoch": 0.2701908957415565, "step": 185, "batch_size": 64, "mean": 51.37147521972656, "std": 123.07954406738281, "min": -195.67514038085938, "p10": -65.13702812194823, "median": 27.264354705810547, "p90": 205.4223220825196, "max": 429.9853820800781, "pos_frac": 0.609375, "sample": [-162.80783081054688, -67.66688537597656, -21.135393142700195, 85.56881713867188, 81.32414245605469, 17.33884620666504, -5.581220626831055, 76.71903991699219, 410.0514831542969, -0.10239410400390625, 17.424510955810547, 110.7847671508789, 107.59173583984375, -7.208225250244141, 264.133056640625, 35.06982421875, -9.872882843017578, 35.075416564941406, 142.6636505126953, -116.99212646484375, -9.968955993652344, 38.61940002441406, 26.35598373413086, 77.04429626464844, 80.73941040039062, -6.052619934082031, -118.81037902832031, 22.70264434814453, 87.27124786376953, -137.21530151367188, 38.501922607421875, 21.230220794677734, 93.74945068359375, 97.06199645996094, 26.530426025390625, 368.2930908203125, -53.04481506347656, 29.940040588378906, 190.44415283203125, -44.61690902709961, 34.59629440307617, 0.3412761688232422, -19.398048400878906, -9.192245483398438, -24.3110294342041, -1.2578182220458984, 324.0140075683594, 140.56005859375, 152.43984985351562, -0.11380577087402344, 429.9853820800781, -33.029563903808594, 27.99828338623047, 67.45146179199219, -14.964859008789062, 211.84153747558594, 34.94781494140625, 126.92523193359375, -195.67514038085938, 271.0198669433594, -78.75127410888672, 90.42090606689453, -9.993240356445312, -59.23402786254883], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000185.npy"} +{"epoch": 0.27165932452276065, "step": 186, "batch_size": 64, "mean": 97.591064453125, "std": 119.95823669433594, "min": -197.9208984375, "p10": -24.304633712768556, "median": 79.59574127197266, "p90": 237.80258789062506, "max": 463.24365234375, "pos_frac": 0.8125, "sample": [-197.9208984375, 125.27824401855469, 317.3908386230469, 149.8588409423828, 150.91146850585938, 2.5003662109375, 80.56857299804688, 44.960079193115234, 214.43377685546875, 157.0322265625, 68.91973876953125, 221.3173828125, 1.1148605346679688, 83.561767578125, 38.70840835571289, -24.404926300048828, 94.6572265625, -117.32179260253906, 41.98378372192383, 49.149749755859375, 313.1305236816406, 45.926273345947266, -5.631217956542969, 174.65806579589844, 40.612281799316406, 78.62290954589844, 102.49497985839844, 148.815185546875, 330.039306640625, 104.85832214355469, 179.77749633789062, -28.47464942932129, 154.115234375, 69.30937194824219, 28.38435173034668, 125.62791442871094, 396.67791748046875, 244.86767578125, 49.12312698364258, 4.630329132080078, 158.7857666015625, 29.226091384887695, 76.60235595703125, 62.257354736328125, -73.37734985351562, 403.8427734375, -20.463241577148438, 73.44806671142578, -24.07061767578125, -44.94629669189453, 463.24365234375, 158.96200561523438, 70.46829223632812, 144.1658172607422, -0.109161376953125, 143.4207763671875, 94.09178161621094, -26.429588317871094, 5.0985870361328125, 122.63691711425781, 145.3740234375, 134.36036682128906, -5.628509521484375, 94.60321044921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000186.npy"} +{"epoch": 0.27312775330396477, "step": 187, "batch_size": 64, "mean": 80.84567260742188, "std": 107.22187805175781, "min": -151.29153442382812, "p10": -36.967428588867186, "median": 61.01144790649414, "p90": 215.2120788574219, "max": 391.4598388671875, "pos_frac": 0.78125, "sample": [-151.29153442382812, 210.62228393554688, -22.471471786499023, 58.923274993896484, -77.01469421386719, 234.81985473632812, 82.64594268798828, 157.84576416015625, 53.595680236816406, 41.36354446411133, 83.28648376464844, 63.045204162597656, 337.5389404296875, 78.81194305419922, 124.44697570800781, 212.44000244140625, 162.08035278320312, 65.6378173828125, 113.44790649414062, -46.118831634521484, 57.51863098144531, 36.23063659667969, 391.4598388671875, -32.1004638671875, 161.17137145996094, 57.37837219238281, 232.61184692382812, -37.28829574584961, 189.92384338378906, 43.17295837402344, -45.705841064453125, 78.4260482788086, 19.058792114257812, -55.99598693847656, -36.2187385559082, 155.75128173828125, -34.57048797607422, 9.611181259155273, -6.902549743652344, 215.33541870117188, 149.5516815185547, 348.96051025390625, 93.95647430419922, 43.515037536621094, 65.60675048828125, 34.433712005615234, 57.75136184692383, 40.200897216796875, -119.37720489501953, 41.79035186767578, 104.56928253173828, -13.847587585449219, 156.4573516845703, 87.82362365722656, 76.04244995117188, 199.2375946044922, 86.28164672851562, 2.1301345825195312, 58.977691650390625, 228.05699157714844, 214.92428588867188, 38.74524688720703, -22.869565963745117, 18.681467056274414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000187.npy"} +{"epoch": 0.2745961820851689, "step": 188, "batch_size": 64, "mean": 97.13883209228516, "std": 119.12627410888672, "min": -127.86939239501953, "p10": -17.246684265136714, "median": 74.27271270751953, "p90": 285.40931396484376, "max": 496.2091064453125, "pos_frac": 0.8125, "sample": [290.2723083496094, 123.96797943115234, 243.93577575683594, 63.77437210083008, 75.2160873413086, -2.8163604736328125, -29.729171752929688, -120.1723403930664, 290.8521728515625, 80.96211242675781, 53.370330810546875, -31.296730041503906, 112.74907684326172, 81.02186584472656, 282.52386474609375, 246.92079162597656, 94.46343231201172, -19.84845733642578, 61.642486572265625, 9.300674438476562, 6.8990020751953125, 29.336669921875, 36.81242370605469, 142.58206176757812, 72.79125213623047, 25.433250427246094, 11.581863403320312, -4.812534332275391, 318.6917419433594, 47.745574951171875, 354.86822509765625, -127.86939239501953, 286.64593505859375, 112.34742736816406, 159.6602783203125, 112.97247314453125, 64.9224853515625, 188.31666564941406, -9.992233276367188, 189.45004272460938, 166.6415557861328, 13.684890747070312, -83.53652954101562, -21.31757164001465, 193.97364807128906, 41.46690368652344, 213.07098388671875, 25.864116668701172, 12.233627319335938, 324.31329345703125, 87.32511138916016, 167.12136840820312, 53.46279525756836, -11.026042938232422, 167.197509765625, 81.17190551757812, 99.86173248291016, 496.2091064453125, 94.66293334960938, 73.32933807373047, 76.2674560546875, -11.175880432128906, 13.632026672363281, 16.957517623901367], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000188.npy"} +{"epoch": 0.27606461086637296, "step": 189, "batch_size": 64, "mean": 80.89956665039062, "std": 133.24851989746094, "min": -170.42987060546875, "p10": -64.2357925415039, "median": 58.43536949157715, "p90": 200.66311645507815, "max": 576.6710205078125, "pos_frac": 0.78125, "sample": [41.32386016845703, 178.7076416015625, -118.47453308105469, 350.2637939453125, -68.23917388916016, -54.894569396972656, 43.35022735595703, 4.322807312011719, 1.4209346771240234, 40.35174560546875, 148.8533935546875, 153.38150024414062, 149.88751220703125, -7.117279052734375, 158.20913696289062, 46.378265380859375, 193.52874755859375, 45.31422805786133, -170.42987060546875, 5.808633804321289, -81.0353775024414, 29.476579666137695, 78.95573425292969, 172.0858154296875, 145.64114379882812, 81.72482299804688, -43.65266418457031, 55.89651107788086, 33.453495025634766, 190.59100341796875, 5.662120819091797, 21.699880599975586, 95.11495971679688, 482.040283203125, 61.02204895019531, -23.26372528076172, 24.34520721435547, 221.15126037597656, 85.03257751464844, 12.992149353027344, -161.31997680664062, 121.53812408447266, -50.61260986328125, 121.7003402709961, 58.739070892333984, 331.4154052734375, 168.55751037597656, 576.6710205078125, -21.79486846923828, 128.50894165039062, 173.1173095703125, 29.380168914794922, 79.92527770996094, 157.7870330810547, 109.21273803710938, 203.720703125, 244.15121459960938, 95.43302917480469, -0.9066619873046875, -150.06402587890625, 58.13166809082031, -75.11103820800781, 23.690231323242188, 164.82078552246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000189.npy"} +{"epoch": 0.2775330396475771, "step": 190, "batch_size": 64, "mean": 95.25032043457031, "std": 138.31202697753906, "min": -198.79246520996094, "p10": -63.017374420166014, "median": 67.86361312866211, "p90": 307.9510498046875, "max": 420.7372741699219, "pos_frac": 0.796875, "sample": [-3.6136474609375, 114.65856170654297, 184.90927124023438, 157.78964233398438, 19.29041290283203, -81.81708526611328, 252.85855102539062, 43.64189910888672, 12.856464385986328, 356.8375244140625, 82.97314453125, 69.33780670166016, 134.86061096191406, 20.097007751464844, 358.85638427734375, 142.62298583984375, 103.66789245605469, 76.05201721191406, 193.75283813476562, 18.40631103515625, 2.7015552520751953, 265.0103759765625, -198.79246520996094, 65.61922454833984, 262.1053161621094, -23.449996948242188, 61.044036865234375, 321.3260498046875, 154.43247985839844, 54.6015625, -17.514328002929688, 306.4403076171875, 20.667373657226562, -4.658349990844727, 128.3443603515625, 79.87776184082031, -94.02729034423828, -64.2806396484375, -150.20806884765625, 16.480026245117188, 116.37032318115234, -60.06975555419922, 100.24895477294922, 54.05702209472656, -26.696748733520508, -87.98382568359375, 97.3602294921875, 308.5985107421875, 52.91033935546875, 59.546897888183594, -188.8704376220703, 390.06744384765625, 119.35333251953125, 25.87711524963379, 206.83543395996094, 106.28458404541016, 66.38941955566406, 381.2120361328125, 4.822486877441406, 420.7372741699219, 206.5726318359375, 221.81922912597656, 34.97187423706055, 41.847503662109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000190.npy"} +{"epoch": 0.2790014684287812, "step": 191, "batch_size": 64, "mean": 136.34649658203125, "std": 148.77737426757812, "min": -142.0181884765625, "p10": -7.230722808837889, "median": 113.85742950439453, "p90": 318.81248474121094, "max": 557.7564697265625, "pos_frac": 0.859375, "sample": [465.1551208496094, 109.53019714355469, 557.7564697265625, 32.07133483886719, 297.1974182128906, 72.05302429199219, 135.91441345214844, 268.7774353027344, 15.762395858764648, 355.41180419921875, 99.47569274902344, 162.39828491210938, -7.647926330566406, 263.71319580078125, 282.0091857910156, 38.40186309814453, 37.61028289794922, 80.10523223876953, 30.933853149414062, 81.31778717041016, 263.6033935546875, 60.359832763671875, 43.14996337890625, 301.701171875, 123.39925384521484, 4.1375274658203125, 8.52629280090332, 530.9663696289062, 176.57521057128906, -23.17523956298828, 217.98724365234375, -72.80975341796875, 17.020172119140625, -49.47098159790039, 140.96730041503906, 177.385498046875, 172.07296752929688, 143.99652099609375, 320.88592529296875, 29.935575485229492, 8.26637077331543, 245.93008422851562, 217.98294067382812, -142.0181884765625, 73.22334289550781, 504.7397766113281, 32.11531066894531, -56.35105895996094, 118.18466186523438, 102.06297302246094, 182.48228454589844, 313.9744567871094, 30.723960876464844, -1.8846855163574219, 126.02976989746094, 175.09947204589844, -62.496009826660156, 122.89871215820312, -6.2572479248046875, 126.5656509399414, 357.0443115234375, 35.57379913330078, 86.28129577636719, 170.84317016601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000191.npy"} +{"epoch": 0.28046989720998533, "step": 192, "batch_size": 64, "mean": 91.21162414550781, "std": 145.2598114013672, "min": -246.18869018554688, "p10": -54.44992065429687, "median": 73.8321418762207, "p90": 311.84131469726566, "max": 526.2483520507812, "pos_frac": 0.734375, "sample": [-57.137611389160156, -137.5897674560547, 40.77069091796875, 166.63092041015625, 132.63027954101562, -32.11711883544922, -48.17864227294922, -3.3007373809814453, 32.3861083984375, -246.18869018554688, 30.792694091796875, 97.80740356445312, -36.92445373535156, 217.6219940185547, 18.595382690429688, 40.95652770996094, -76.76213073730469, 106.05494689941406, -44.311187744140625, 169.0894775390625, -117.18419647216797, 403.8091125488281, -42.96608352661133, 75.5770492553711, -17.655532836914062, 317.1234130859375, 354.43304443359375, 377.3632507324219, 160.23141479492188, 359.20208740234375, 96.38920593261719, 78.84541320800781, 526.2483520507812, 125.15524291992188, 172.65289306640625, 25.25758934020996, 115.11044311523438, 213.87379455566406, 270.0832214355469, 57.86105728149414, 44.79008483886719, 299.51641845703125, 7.457178115844727, 37.35224914550781, 132.16456604003906, 57.997398376464844, 60.84192657470703, -164.3693084716797, 203.65487670898438, 74.40772247314453, 82.51175689697266, -11.16058349609375, -44.401206970214844, -62.82025909423828, 16.783533096313477, 229.03482055664062, 73.25656127929688, -28.947006225585938, 39.27618408203125, 210.32772827148438, 139.67822265625, 104.11811828613281, 79.23588562011719, 334.6003112792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000192.npy"} +{"epoch": 0.28193832599118945, "step": 193, "batch_size": 64, "mean": 53.221702575683594, "std": 139.67852783203125, "min": -414.28167724609375, "p10": -82.29847106933592, "median": 54.093040466308594, "p90": 225.68218536376958, "max": 361.3994140625, "pos_frac": 0.65625, "sample": [115.52641296386719, -16.70764923095703, 29.171119689941406, 189.62185668945312, -414.28167724609375, 95.9330825805664, 65.65380096435547, 57.23219299316406, -403.68035888671875, 304.5050048828125, 65.51436614990234, -4.878225326538086, 187.11090087890625, -35.51499557495117, -19.79193115234375, 91.36997985839844, 94.27257537841797, 125.60302734375, -0.25078582763671875, 229.086669921875, 3.3283119201660156, -17.63404083251953, 14.24551773071289, 153.417724609375, 23.413175582885742, 126.99151611328125, -47.22829055786133, -33.369056701660156, 1.9430694580078125, -59.97917175292969, -25.620553970336914, 217.73838806152344, -89.40281677246094, -94.8261489868164, 243.46267700195312, -125.45197296142578, 186.30654907226562, -99.0498275756836, 14.300233840942383, 50.953887939453125, 10.318984985351562, 140.84835815429688, 267.7999572753906, 166.75146484375, 111.73695373535156, -135.94973754882812, 316.3897705078125, 91.90767669677734, 123.02814483642578, 124.13631439208984, -57.98255157470703, 96.95939636230469, -18.818462371826172, -65.72166442871094, -44.61616516113281, 141.58279418945312, 361.3994140625, 131.45358276367188, -53.79117965698242, 21.018035888671875, 276.407470703125, 81.7709732055664, 19.776611328125, 100.74811553955078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000193.npy"} +{"epoch": 0.2834067547723935, "step": 194, "batch_size": 64, "mean": 86.59368133544922, "std": 100.32608032226562, "min": -98.33474731445312, "p10": -51.169614410400385, "median": 66.06909561157227, "p90": 244.2615966796875, "max": 300.4742126464844, "pos_frac": 0.8125, "sample": [140.03421020507812, 20.19281768798828, 33.09564208984375, 12.800125122070312, 120.76046752929688, 260.1180419921875, 105.70558166503906, 156.3927459716797, -26.75299072265625, 19.692394256591797, 29.09063148498535, 250.33560180664062, -69.08206176757812, -58.35516357421875, 44.49176788330078, 62.291259765625, 210.0174560546875, 60.24555206298828, 62.48280715942383, 132.55105590820312, 31.172794342041016, 78.22789001464844, 14.68350601196289, 7.056621551513672, 119.43238067626953, -75.014892578125, -3.724172592163086, 214.99343872070312, 66.3163070678711, 77.83385467529297, 254.60414123535156, 133.90269470214844, 241.9353485107422, 177.10592651367188, 29.39706802368164, 285.28924560546875, 63.90064239501953, 32.842742919921875, 144.71974182128906, 47.8609619140625, 189.29893493652344, -84.80752563476562, 44.04087829589844, -14.652565002441406, 104.85430908203125, 168.29092407226562, -3.429584503173828, -98.33474731445312, 40.90430450439453, -55.221282958984375, 109.92164611816406, -64.71967315673828, 63.602203369140625, 245.25856018066406, 79.70785522460938, 300.4742126464844, 131.02809143066406, 65.82188415527344, 292.31298828125, 87.1045913696289, 178.26519775390625, 180.41787719726562, 114.92793273925781, -41.715721130371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000194.npy"} +{"epoch": 0.28487518355359764, "step": 195, "batch_size": 64, "mean": 93.7316665649414, "std": 116.76734924316406, "min": -96.92083740234375, "p10": -44.9694034576416, "median": 79.96736145019531, "p90": 243.4078491210938, "max": 394.6134948730469, "pos_frac": 0.796875, "sample": [82.76483917236328, -26.420989990234375, 221.55516052246094, 46.13014602661133, 71.85902404785156, 394.6134948730469, 72.9573745727539, 311.96026611328125, 223.3995361328125, -9.172346115112305, -6.670257568359375, 49.961395263671875, 3.2907791137695312, -93.45954895019531, 248.1053466796875, 117.60935974121094, 55.972900390625, 217.80738830566406, -42.68903350830078, 330.34991455078125, -43.5693473815918, 103.35182189941406, -96.92083740234375, 38.415374755859375, 105.4461669921875, 101.97879791259766, 221.79940795898438, 314.9622802734375, 147.62649536132812, 124.6088638305664, 147.05841064453125, -79.39969635009766, 73.07042694091797, 179.09805297851562, 13.47027587890625, 153.0439910888672, -29.896177291870117, 1.4317245483398438, 172.03921508789062, 205.8827362060547, -95.81145477294922, 256.57049560546875, 43.49553298950195, 80.46819305419922, 389.27020263671875, 145.1142578125, 79.4665298461914, 60.45713806152344, -56.342994689941406, 12.75459098815918, 159.5299530029297, -72.04566955566406, 232.447021484375, 111.12763977050781, 20.884963989257812, 86.83279418945312, 31.077354431152344, 22.89759635925293, 18.3452205657959, 85.69930267333984, -45.569427490234375, 51.86738967895508, 147.1588134765625, 109.70831298828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000195.npy"} +{"epoch": 0.28634361233480177, "step": 196, "batch_size": 64, "mean": 106.43045043945312, "std": 142.9573974609375, "min": -218.5308837890625, "p10": -49.92964172363281, "median": 80.9216194152832, "p90": 307.9012451171875, "max": 444.94635009765625, "pos_frac": 0.765625, "sample": [195.91085815429688, 205.08221435546875, 312.79827880859375, 218.32005310058594, -46.75910186767578, 289.98565673828125, 301.927490234375, 437.60784912109375, 232.06686401367188, 73.92831420898438, 9.931865692138672, 48.609291076660156, 276.94970703125, 196.42483520507812, 48.194854736328125, 11.75370979309082, -28.12875747680664, 90.74562072753906, -6.3095245361328125, 49.35328674316406, 427.35772705078125, -6.034168243408203, 52.74949645996094, 35.772151947021484, 131.37002563476562, 133.59530639648438, 124.67033386230469, 146.56948852539062, 48.04328918457031, -11.829381942749023, 113.70805358886719, 77.1927261352539, 438.9598388671875, 444.94635009765625, -20.152244567871094, 177.5365447998047, 84.6505126953125, 64.41368103027344, 109.75102233886719, 89.01718139648438, 66.54898071289062, 62.087547302246094, 220.61376953125, -2.5924072265625, 140.70758056640625, 31.694355010986328, 84.98680114746094, -151.9547119140625, 69.27911376953125, -218.5308837890625, -68.31256103515625, 154.10043334960938, 93.23782348632812, -58.510929107666016, 311.891357421875, -51.28844451904297, -140.95510864257812, 66.05641174316406, -2.1135711669921875, 126.97350311279297, 262.03546142578125, 310.46142578125, -80.173828125, 4.625080108642578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000196.npy"} +{"epoch": 0.2878120411160059, "step": 197, "batch_size": 64, "mean": 82.47078704833984, "std": 116.71819305419922, "min": -120.32475280761719, "p10": -39.616673278808584, "median": 53.25123405456543, "p90": 239.30527191162113, "max": 423.7657470703125, "pos_frac": 0.8125, "sample": [61.141700744628906, 334.37384033203125, 140.17575073242188, -24.825225830078125, 159.514892578125, 7.4412689208984375, 12.999481201171875, -62.760738372802734, 65.93265533447266, 423.7657470703125, 355.3138732910156, 243.5802764892578, 66.0057601928711, 128.2298583984375, -104.51178741455078, 2.8931732177734375, 126.14661407470703, 283.67523193359375, 32.802406311035156, 39.698978424072266, 103.66775512695312, 218.0025177001953, 42.39496994018555, -7.111480712890625, 58.305213928222656, 6.330711364746094, -104.57799530029297, 129.64019775390625, 74.3625259399414, 97.76919555664062, 255.12994384765625, 147.91680908203125, 34.12779235839844, 229.33026123046875, 2.369293212890625, 194.00868225097656, -42.99725341796875, 0.3301239013671875, -78.99557495117188, -120.32475280761719, 33.849082946777344, 127.1346664428711, -1.432535171508789, 5.760902404785156, 17.237112045288086, 77.51719665527344, 338.073486328125, 208.27520751953125, 26.497024536132812, 197.13282775878906, -15.714380264282227, 48.1972541809082, 81.32769012451172, 16.217079162597656, 17.110191345214844, 128.6712646484375, 172.41085815429688, 71.14642333984375, 209.68521118164062, -31.728652954101562, -65.63827514648438, 19.12049102783203, 19.6744384765625, 46.332855224609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000197.npy"} +{"epoch": 0.28928046989721, "step": 198, "batch_size": 64, "mean": 105.940673828125, "std": 145.472412109375, "min": -244.53759765625, "p10": -29.44637622833251, "median": 89.70967483520508, "p90": 230.52896575927738, "max": 595.9739990234375, "pos_frac": 0.84375, "sample": [-128.18896484375, 106.16111755371094, -42.988182067871094, 538.579345703125, 124.48857116699219, 196.96383666992188, 342.57562255859375, 56.76620864868164, 151.42198181152344, 49.165504455566406, 110.09954833984375, 167.01104736328125, -139.07791137695312, 5.630943298339844, 0.534210205078125, 235.1439971923828, 145.16134643554688, 115.97525024414062, 86.87582397460938, 86.38105010986328, -22.3429012298584, -32.49072265625, 32.128379821777344, 166.4476776123047, 13.491178512573242, 70.85680389404297, -65.44412994384766, 201.23709106445312, 97.07339477539062, 79.23872375488281, 91.95972442626953, 115.15410614013672, 135.3131103515625, 72.44383239746094, 133.41380310058594, 14.385562896728516, 81.34587097167969, 161.59326171875, 155.27142333984375, 219.76055908203125, -244.53759765625, 542.0923461914062, 99.46222686767578, 65.45709991455078, 1.6118621826171875, 16.546920776367188, 249.87631225585938, -43.35792541503906, 2.2859363555908203, 76.05521392822266, 40.92525863647461, 94.98849487304688, 87.45962524414062, 140.04615783691406, -9.051401138305664, 142.99293518066406, 160.2446746826172, 208.0191650390625, 14.45939826965332, 424.9229736328125, -6.2322235107421875, 595.9739990234375, 48.04668426513672, 142.39817810058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000198.npy"} +{"epoch": 0.2907488986784141, "step": 199, "batch_size": 64, "mean": 104.91361999511719, "std": 141.22674560546875, "min": -170.7671661376953, "p10": -55.02019882202147, "median": 88.84841537475586, "p90": 281.1950927734375, "max": 459.794677734375, "pos_frac": 0.796875, "sample": [217.65762329101562, 168.74073791503906, -138.10897827148438, -9.278167724609375, 274.7083435058594, 76.06478118896484, -59.759552001953125, 208.4912567138672, -166.059814453125, 125.39055633544922, 2.5557022094726562, -81.671875, -168.00393676757812, 205.05223083496094, 209.4993896484375, 61.64942169189453, 15.014350891113281, 136.84356689453125, 399.1781005859375, 42.81806182861328, 89.58706665039062, 177.05543518066406, 372.36602783203125, 313.93145751953125, -97.15083312988281, 35.65235900878906, -25.576759338378906, 126.86345672607422, 210.70428466796875, 23.074249267578125, 39.213523864746094, 253.08456420898438, 459.794677734375, 82.32073974609375, 73.79491424560547, 185.6319580078125, -26.969573974609375, 116.43658447265625, 38.2275390625, 99.6492919921875, 63.424896240234375, 272.6875305175781, 124.8260726928711, 33.6004524230957, 283.9751281738281, 130.14263916015625, 58.26363754272461, -43.961708068847656, -41.88643264770508, -170.7671661376953, 269.4073791503906, 84.53211212158203, 111.76913452148438, 88.1097640991211, 356.92120361328125, 171.35682678222656, 6.102739334106445, 92.13031768798828, 58.924705505371094, 92.93099975585938, 49.097503662109375, 149.53585815429688, 424.030517578125, -19.155136108398438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000199.npy"} +{"epoch": 0.2922173274596182, "step": 200, "batch_size": 64, "mean": 103.79698181152344, "std": 158.9437713623047, "min": -288.65985107421875, "p10": -49.35160484313963, "median": 83.80240631103516, "p90": 338.51315917968753, "max": 536.0819091796875, "pos_frac": 0.765625, "sample": [232.25759887695312, 107.68302917480469, 59.00124740600586, 408.7912292480469, 36.09303665161133, 147.342529296875, 12.812736511230469, -3.580718994140625, 437.58709716796875, -61.9498176574707, 326.06024169921875, 245.50372314453125, 98.98458099365234, 6.125514984130859, 83.50404357910156, 327.4212951660156, 220.7760467529297, 394.53704833984375, 0.6065521240234375, 343.2668151855469, 67.1584243774414, -0.43192481994628906, 158.17306518554688, 225.8898162841797, -28.296550750732422, 70.48690795898438, 166.58399963378906, -211.230224609375, 536.0819091796875, 94.36022186279297, 100.57704162597656, -33.86141586303711, -0.6390380859375, 75.19039916992188, 52.22999572753906, -29.734174728393555, -288.65985107421875, 30.22357177734375, 97.48590087890625, 352.7225646972656, 0.7404384613037109, 85.8653335571289, 55.945892333984375, -16.20899200439453, 7.390705108642578, -157.86639404296875, 84.10076904296875, -55.990257263183594, 300.2573547363281, -33.021095275878906, 5.3977203369140625, 425.63818359375, 44.661712646484375, 18.744773864746094, 103.70903015136719, 259.8455810546875, 152.53167724609375, 164.4581298828125, 103.77923583984375, 170.2305908203125, -120.99433898925781, 139.87588500976562, -78.71687316894531, 125.49677276611328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000200.npy"} +{"epoch": 0.2936857562408223, "step": 201, "batch_size": 64, "mean": 102.45198059082031, "std": 146.0702362060547, "min": -291.5274963378906, "p10": -53.54555053710936, "median": 81.72940444946289, "p90": 285.98340148925786, "max": 481.3332824707031, "pos_frac": 0.8125, "sample": [253.06854248046875, 16.00981330871582, 227.81935119628906, -220.1278076171875, 46.21173858642578, -93.49711608886719, 8.53018569946289, 14.901311874389648, -27.458267211914062, 85.04730224609375, 3.020477294921875, 27.745513916015625, 35.807373046875, -18.815872192382812, 481.3332824707031, 106.66861724853516, 63.29881286621094, 102.73171997070312, 24.141204833984375, 238.10806274414062, 337.09130859375, 124.32079315185547, 11.497726440429688, 85.21424865722656, 224.28292846679688, 290.88946533203125, -85.63442993164062, 232.3788604736328, 202.25180053710938, -8.559438705444336, 70.327880859375, 122.74530029296875, 136.14761352539062, 119.87126159667969, -33.78620910644531, -75.6292724609375, 177.03158569335938, 73.33818817138672, 56.93351745605469, 468.46282958984375, 106.1561279296875, -24.860008239746094, 169.69155883789062, 77.87515258789062, 48.93701171875, 169.86717224121094, 161.78067016601562, 246.03724670410156, 400.6033630371094, 73.49331665039062, 91.9976577758789, -71.93350219726562, 148.15049743652344, 78.41150665283203, -291.5274963378906, 299.092041015625, 423.4743957519531, 141.76747131347656, 6.779352188110352, -62.01383972167969, 274.5359191894531, 48.773094177246094, 12.527481079101562, 123.59056854248047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000201.npy"} +{"epoch": 0.29515418502202645, "step": 202, "batch_size": 64, "mean": 73.0613021850586, "std": 144.16807556152344, "min": -219.86581420898438, "p10": -107.13042755126949, "median": 66.129150390625, "p90": 258.3034912109375, "max": 535.3772583007812, "pos_frac": 0.703125, "sample": [535.3772583007812, -52.10062789916992, 118.62454986572266, 322.021240234375, 2.5854129791259766, -60.787330627441406, 199.52752685546875, -58.19976043701172, 124.62033081054688, 94.87437438964844, 58.13652801513672, -186.70936584472656, 173.59146118164062, 242.83828735351562, 203.17990112304688, 50.28376770019531, -199.16070556640625, 48.760772705078125, 106.85539245605469, 348.71282958984375, 118.22762298583984, 286.45660400390625, -133.60916137695312, 196.976806640625, 28.559288024902344, 28.33910369873047, 390.752197265625, -17.182966232299805, 106.07785034179688, 132.27688598632812, 107.70732116699219, 126.48747253417969, 261.20343017578125, -28.439849853515625, -39.27081298828125, -124.10520935058594, 84.72218322753906, 63.821624755859375, 7.189933776855469, 235.57769775390625, 104.03111267089844, -7.2718963623046875, 22.164440155029297, 258.4341735839844, 166.5704345703125, 114.37623596191406, 82.22783660888672, -219.86581420898438, 68.43667602539062, 105.81570434570312, 16.638912200927734, 45.67668151855469, -43.58253479003906, -74.30620574951172, 44.3941650390625, -120.94432067871094, 83.62283325195312, -74.89801025390625, -45.769657135009766, 55.363868713378906, 257.9985656738281, -142.94102478027344, 95.52751159667969, -20.57644271850586], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000202.npy"} +{"epoch": 0.2966226138032305, "step": 203, "batch_size": 64, "mean": 90.73899841308594, "std": 125.97115325927734, "min": -185.37420654296875, "p10": -74.4354507446289, "median": 75.8685073852539, "p90": 261.6606369018555, "max": 463.81072998046875, "pos_frac": 0.78125, "sample": [19.024433135986328, 137.49044799804688, 164.79580688476562, 63.426536560058594, -29.888973236083984, 285.30084228515625, -79.21707153320312, 148.43515014648438, 24.46582794189453, 130.57003784179688, -0.380218505859375, 108.8886489868164, 106.71875762939453, 164.90676879882812, -112.59663391113281, 195.38645935058594, 141.42295837402344, -31.337783813476562, 192.86038208007812, 139.6648712158203, 272.0342102050781, 417.47784423828125, -80.04470825195312, 108.95506286621094, 126.55535125732422, 40.37724304199219, 463.81072998046875, 11.532844543457031, -90.4300537109375, 153.7486114501953, 11.474021911621094, 176.85214233398438, 191.89114379882812, 269.6022644042969, 44.24322509765625, 186.7405548095703, 87.21369934082031, 110.321044921875, 80.45632934570312, 131.5811767578125, 29.861663818359375, 67.7464370727539, 45.881248474121094, 238.97927856445312, -60.831764221191406, -185.37420654296875, -66.73625183105469, 71.28068542480469, -26.397768020629883, 7.282199859619141, 173.526123046875, 14.843605041503906, 25.41146469116211, 47.14164733886719, -82.82977294921875, -77.735107421875, 145.8377685546875, 58.17169189453125, 286.9734802246094, 324.7407531738281, -10.96385383605957, 243.1301727294922, 30.56743812561035, 22.45922088623047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000203.npy"} +{"epoch": 0.29809104258443464, "step": 204, "batch_size": 64, "mean": 82.12925720214844, "std": 146.71878051757812, "min": -239.21917724609375, "p10": -77.05341339111325, "median": 65.44140243530273, "p90": 309.27540283203143, "max": 487.60345458984375, "pos_frac": 0.703125, "sample": [218.2355194091797, 76.08462524414062, 40.38705062866211, 44.49109649658203, 124.2634048461914, -16.184837341308594, 215.94500732421875, -104.09838104248047, 95.00109100341797, 22.96709632873535, -42.250553131103516, 21.47902488708496, 13.645217895507812, 85.8820571899414, 55.60888671875, -42.74962615966797, -239.21917724609375, 172.43905639648438, 50.99833679199219, 8.952150344848633, 266.81884765625, -128.9794921875, -21.817062377929688, -5.566226959228516, 71.75094604492188, -16.312843322753906, 73.76564025878906, 148.8251190185547, 156.18695068359375, -88.72874450683594, 397.91851806640625, -21.80725860595703, 63.74769592285156, 31.269750595092773, -46.49200439453125, 156.61300659179688, 58.32505798339844, 150.92379760742188, 424.30596923828125, -209.50904846191406, 327.4710693359375, 172.45242309570312, 171.047119140625, 74.53022766113281, 92.21038818359375, -12.179903030395508, 42.57005310058594, 231.3274688720703, 334.0347595214844, -9.697296142578125, 82.8319091796875, 170.03179931640625, -129.84881591796875, 67.1351089477539, 487.60345458984375, 374.9876708984375, -38.78990936279297, 150.1005859375, 23.971086502075195, 356.2106018066406, -120.2734375, 81.06704711914062, 114.1746826171875, -49.81097412109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000204.npy"} +{"epoch": 0.29955947136563876, "step": 205, "batch_size": 64, "mean": 110.42106628417969, "std": 142.15138244628906, "min": -250.49148559570312, "p10": -68.68245162963866, "median": 116.51107406616211, "p90": 273.96396179199223, "max": 406.1178894042969, "pos_frac": 0.78125, "sample": [39.870418548583984, 140.43704223632812, 224.40127563476562, 376.6222839355469, 120.69709014892578, 12.209909439086914, -8.250391006469727, 35.45306396484375, 9.333160400390625, 233.50827026367188, 264.67236328125, -95.25018310546875, -104.11943054199219, 95.39449310302734, -16.831802368164062, 76.02474975585938, 16.72046661376953, 378.7012023925781, 239.36207580566406, 28.94808578491211, 111.56995391845703, 101.74676513671875, -130.52340698242188, 173.1090850830078, 307.83160400390625, 246.94903564453125, 196.92071533203125, -0.4632453918457031, -66.58863067626953, 213.34176635742188, -42.981788635253906, 406.1178894042969, 132.12405395507812, 82.06157684326172, 36.199501037597656, 132.35064697265625, 398.4334411621094, 261.15948486328125, 120.54423522949219, 138.22779846191406, -218.74435424804688, 135.2164764404297, 120.62300872802734, 225.1663360595703, 156.93701171875, -69.57980346679688, 151.8563690185547, 86.18952178955078, 277.9460754394531, 86.61547088623047, 245.0998077392578, 192.15834045410156, 177.25157165527344, 68.9072494506836, 152.7228546142578, 366.9892272949219, 112.47791290283203, -13.383598327636719, 20.496490478515625, -250.49148559570312, -73.00552368164062, 216.3035888671875, -27.59063720703125, 40.75151062011719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000205.npy"} +{"epoch": 0.3010279001468429, "step": 206, "batch_size": 64, "mean": 118.73365783691406, "std": 129.02308654785156, "min": -233.7353515625, "p10": -29.334462928771956, "median": 103.69136428833008, "p90": 298.5227508544923, "max": 406.1219482421875, "pos_frac": 0.84375, "sample": [53.019073486328125, 189.36529541015625, 83.74800109863281, 123.56716918945312, 406.1219482421875, 311.8946228027344, 78.72918701171875, 55.167320251464844, 214.4070281982422, 261.80792236328125, 54.65202331542969, 136.35423278808594, 25.216611862182617, 104.74884796142578, -233.7353515625, 59.554813385009766, 80.08863830566406, 168.46922302246094, 61.639862060546875, -13.969228744506836, 227.87515258789062, -35.91956329345703, 240.0263671875, 353.3362731933594, 123.66505432128906, 199.94293212890625, 23.09380340576172, 204.99879455566406, -107.68122863769531, -108.50305938720703, 182.00880432128906, 312.3082275390625, 218.99118041992188, 232.29637145996094, 6.114030838012695, 258.924560546875, 76.61103057861328, -69.103759765625, 159.96749877929688, -148.02915954589844, 49.52599334716797, 211.96145629882812, 6.723529815673828, 166.992919921875, 91.36295318603516, -12.984725952148438, 125.08647918701172, 82.9749526977539, 196.30918884277344, 98.07917785644531, -58.624427795410156, 12.115867614746094, 92.8233413696289, 184.76708984375, 365.30828857421875, 267.32171630859375, -4.377832412719727, 122.34077453613281, 102.63388061523438, 312.4188537597656, 70.30188751220703, 23.40375328063965, 314.86181640625, 205.85650634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000206.npy"} +{"epoch": 0.302496328928047, "step": 207, "batch_size": 64, "mean": 134.29856872558594, "std": 157.00250244140625, "min": -142.69869995117188, "p10": -31.282675170898433, "median": 90.75166320800781, "p90": 307.95274047851564, "max": 591.1517944335938, "pos_frac": 0.84375, "sample": [13.796077728271484, -142.69869995117188, 63.40319061279297, 107.30810546875, 181.67770385742188, 46.622764587402344, 306.700439453125, -27.84764862060547, -18.511032104492188, 9.36578369140625, 85.38824462890625, 76.18465423583984, 304.3537902832031, -7.5493927001953125, 132.57647705078125, 151.8542938232422, 46.57023620605469, 91.11798095703125, 37.021427154541016, 33.10900115966797, 47.39097213745117, 169.80264282226562, 30.82085418701172, 264.91864013671875, 35.42594909667969, -67.98104858398438, 308.48944091796875, 64.37136840820312, 29.049245834350586, -52.70058059692383, 284.0674743652344, 258.61614990234375, -58.54521179199219, 0.23253631591796875, 30.040794372558594, 257.5781555175781, 35.39067077636719, 240.2443389892578, 152.30795288085938, 257.083251953125, 258.39410400390625, 127.2259521484375, 112.100830078125, 134.55650329589844, 260.8211669921875, 246.84637451171875, 90.38534545898438, 385.80218505859375, 517.3919677734375, 277.11962890625, 48.45380401611328, 437.8953857421875, -60.11436462402344, 35.52611541748047, 591.1517944335938, 43.900115966796875, 92.94902038574219, 99.43598937988281, 87.88687133789062, 521.0414428710938, -87.55359649658203, 193.67080688476562, 435.9289855957031, -32.75482940673828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000207.npy"} +{"epoch": 0.3039647577092511, "step": 208, "batch_size": 64, "mean": 105.24873352050781, "std": 155.9709930419922, "min": -332.22088623046875, "p10": -43.0328025817871, "median": 81.1707534790039, "p90": 311.29040222167976, "max": 442.8934326171875, "pos_frac": 0.765625, "sample": [-200.9269561767578, 148.71302795410156, -160.68072509765625, 286.7371826171875, 26.413650512695312, -18.147342681884766, 389.3555908203125, 23.694623947143555, 116.45954132080078, 247.4610595703125, 242.8999786376953, 56.759376525878906, 84.87335205078125, 161.07069396972656, 219.33441162109375, 57.388099670410156, 442.8934326171875, 16.256561279296875, 436.80816650390625, 139.7373046875, -10.34771728515625, 35.493568420410156, 253.69561767578125, 177.5811767578125, 77.46815490722656, 27.77686309814453, 54.826690673828125, -9.772247314453125, -132.9364471435547, 274.7597961425781, -145.92019653320312, -9.92934799194336, 51.26030349731445, 108.1568374633789, 131.95416259765625, 294.8179931640625, 256.9632873535156, 38.641658782958984, -14.347160339355469, 228.27511596679688, -45.94879150390625, 72.49755859375, 27.40955352783203, 349.61029052734375, 136.10122680664062, 318.3500061035156, 86.5141830444336, -332.22088623046875, -36.22882843017578, 147.83099365234375, 15.742630004882812, 125.55415344238281, 327.63275146484375, 99.10188293457031, 54.23310852050781, -134.47369384765625, 25.179779052734375, 383.2433166503906, 183.95010375976562, 285.61053466796875, 200.61038208007812, 67.907470703125, -8.633819580078125, -19.17437744140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000208.npy"} +{"epoch": 0.3054331864904552, "step": 209, "batch_size": 64, "mean": 93.79731750488281, "std": 152.31752014160156, "min": -344.812255859375, "p10": -81.12847900390625, "median": 82.56779098510742, "p90": 315.53398742675785, "max": 431.5146484375, "pos_frac": 0.71875, "sample": [333.60150146484375, 82.71200561523438, 162.0950469970703, -47.466026306152344, 372.85931396484375, 406.6541748046875, 321.8120422363281, 113.32902526855469, 130.15631103515625, 92.40240478515625, -66.282470703125, 93.79869842529297, 170.08200073242188, 82.42357635498047, 166.80763244628906, 135.7464599609375, 165.07069396972656, -154.81886291503906, -81.97172546386719, 65.62004852294922, 260.12359619140625, 237.08172607421875, -2.292348861694336, -84.81820678710938, 44.82643127441406, -50.219879150390625, -15.245386123657227, 1.0721664428710938, 97.95942687988281, 156.61228942871094, 123.25651550292969, 85.25514221191406, -131.01998901367188, -47.98947525024414, 62.698692321777344, 68.88616943359375, -79.16090393066406, -24.544918060302734, 364.8768310546875, 292.945068359375, 52.71253967285156, 431.5146484375, 7.674598693847656, -44.968345642089844, -26.822509765625, 43.000648498535156, 57.58882141113281, 74.91416931152344, 253.41407775878906, 136.95884704589844, 149.02523803710938, -33.152183532714844, -113.3236083984375, -344.812255859375, 180.7476806640625, 76.29290771484375, 44.18503189086914, 48.91722869873047, 131.092041015625, -157.28033447265625, 335.96240234375, 300.88519287109375, 229.041259765625, 264.5257873535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000209.npy"} +{"epoch": 0.3069016152716593, "step": 210, "batch_size": 64, "mean": 108.67684173583984, "std": 148.003662109375, "min": -244.9134521484375, "p10": -27.657365608215333, "median": 80.71089553833008, "p90": 315.6121643066407, "max": 473.16015625, "pos_frac": 0.765625, "sample": [-5.640533447265625, 33.89916229248047, 133.53829956054688, 71.64907836914062, -15.261611938476562, -83.18377685546875, 181.9913330078125, -27.433385848999023, 135.52484130859375, -30.83721160888672, 194.58612060546875, 13.050315856933594, 25.696086883544922, 96.76110076904297, 271.028564453125, 149.12054443359375, -16.817489624023438, 68.60801696777344, 196.78253173828125, 10.63840103149414, 296.65093994140625, 89.77271270751953, 30.665002822875977, 237.606689453125, 273.98919677734375, 3.014341354370117, 10.815437316894531, 322.145751953125, 191.27337646484375, 361.5818786621094, 117.12853240966797, 245.5277862548828, 117.56751251220703, 300.36712646484375, -141.58932495117188, -27.75335693359375, -78.4293212890625, -13.547409057617188, 32.95454406738281, 18.209728240966797, 142.9185028076172, 167.93856811523438, 365.08905029296875, 110.66661071777344, 0.5939807891845703, -19.058197021484375, 434.2348327636719, -11.037612915039062, 130.73985290527344, 159.8555908203125, 281.06207275390625, 473.16015625, 286.6082763671875, 52.56830596923828, -60.723480224609375, 89.947021484375, -6.612510681152344, 9.357120513916016, 12.09486198425293, 367.6357421875, 5.006584167480469, 18.85655975341797, -244.9134521484375, 397.6778564453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000210.npy"} +{"epoch": 0.30837004405286345, "step": 211, "batch_size": 64, "mean": 92.21821594238281, "std": 165.3912353515625, "min": -222.24288940429688, "p10": -65.56961746215819, "median": 59.7443962097168, "p90": 265.26828308105473, "max": 566.3486328125, "pos_frac": 0.703125, "sample": [133.10214233398438, -55.21302795410156, 254.1651611328125, 104.82701873779297, 70.55703735351562, 155.224365234375, -48.711944580078125, 112.89942169189453, 533.4092407226562, -222.24288940429688, -40.94646453857422, 41.51115417480469, 26.43912124633789, -0.7652587890625, 70.95358276367188, 223.96095275878906, 152.39796447753906, 192.89202880859375, 163.9072265625, -17.368576049804688, -5.433685302734375, -43.32147216796875, 191.72341918945312, 108.47941589355469, -70.0081558227539, 3.2061920166015625, -46.828224182128906, 521.598388671875, 48.436161041259766, 566.3486328125, 453.974853515625, -39.31006622314453, -132.77218627929688, 143.2978973388672, 249.0118408203125, 329.3950500488281, -155.08111572265625, 138.44422912597656, 44.52043151855469, -3.6011505126953125, 123.78108215332031, 431.27197265625, -40.569740295410156, 78.1820297241211, 4.713056564331055, 52.11769104003906, 63.03333282470703, -84.88560485839844, 16.93899917602539, 199.477294921875, -157.8511199951172, 56.45545959472656, 207.62953186035156, 50.15215301513672, 160.9496612548828, 270.0267639160156, 95.95043182373047, 240.07858276367188, 9.078201293945312, 127.12641906738281, 39.4901008605957, 17.010345458984375, -14.822006225585938, -196.44766235351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000211.npy"} +{"epoch": 0.30983847283406757, "step": 212, "batch_size": 64, "mean": 132.64306640625, "std": 171.94717407226562, "min": -254.05787658691406, "p10": -57.40341491699217, "median": 132.4440574645996, "p90": 317.30950012207035, "max": 602.46630859375, "pos_frac": 0.75, "sample": [-254.05787658691406, -88.01900482177734, 318.4476013183594, 264.67620849609375, 59.681907653808594, 250.41909790039062, 106.49932861328125, -64.41500854492188, -37.5531005859375, -2.5286026000976562, 231.6271514892578, 171.1409912109375, -210.0536346435547, -41.04302978515625, -28.718387603759766, 7.177330017089844, 296.9002685546875, 217.22604370117188, 434.41717529296875, 249.03123474121094, 115.78515625, -110.78514862060547, 36.69033432006836, 106.82991027832031, 142.20635986328125, 166.55337524414062, 232.29653930664062, -81.87417602539062, 308.13128662109375, -12.223894119262695, 121.61994171142578, 169.8173828125, 602.46630859375, 177.96910095214844, 54.364227294921875, -227.47401428222656, 321.45928955078125, -24.253097534179688, 126.9819564819336, 572.6273803710938, 301.2974853515625, 137.90615844726562, 37.49523162841797, 240.55120849609375, 14.9560546875, 124.4302978515625, -32.19563674926758, 54.31496810913086, 314.6539306640625, 59.93979263305664, 148.39730834960938, 363.7523193359375, 411.631591796875, 252.11642456054688, 288.3952331542969, -8.902477264404297, 230.4401092529297, 63.46711349487305, 160.048828125, 143.4510955810547, 65.3846435546875, 301.366943359375, -38.8472900390625, 175.06109619140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000212.npy"} +{"epoch": 0.31130690161527164, "step": 213, "batch_size": 64, "mean": 127.49537658691406, "std": 154.1443328857422, "min": -220.19677734375, "p10": -56.45011901855467, "median": 114.82050704956055, "p90": 334.0070343017578, "max": 437.17547607421875, "pos_frac": 0.8125, "sample": [298.3770751953125, -90.03044128417969, 35.664817810058594, -105.56806182861328, 212.72703552246094, 15.019515991210938, 65.55990600585938, 106.85406494140625, -220.19677734375, -85.66238403320312, 9.91547966003418, 308.39727783203125, 340.65496826171875, -36.74962615966797, 84.51775360107422, 177.02940368652344, 10.38560676574707, 62.19172668457031, 158.40818786621094, 323.36083984375, 437.17547607421875, 326.5646667480469, 57.58863067626953, 416.01702880859375, 173.60169982910156, 283.16241455078125, 391.63861083984375, -19.81783676147461, -4.834236145019531, 13.31844711303711, 188.38385009765625, 125.10991668701172, 206.235595703125, 122.78694915771484, -43.0357666015625, 8.838134765625, 169.65240478515625, 330.2445983886719, 288.704345703125, 148.23580932617188, -12.982921600341797, 198.5026092529297, 335.6195068359375, -62.199127197265625, 216.5608673095703, 64.82322692871094, 185.4609832763672, 429.0738830566406, -140.9158935546875, 216.68026733398438, 249.86410522460938, 306.70001220703125, 352.20050048828125, 41.73091506958008, 15.538589477539062, 5.872159957885742, -126.21843719482422, 70.2559814453125, 100.58341217041016, 65.3078384399414, 36.352455139160156, 21.705963134765625, 168.0009002685547, 130.75894165039062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000213.npy"} +{"epoch": 0.31277533039647576, "step": 214, "batch_size": 64, "mean": 114.51547241210938, "std": 148.62936401367188, "min": -173.6182861328125, "p10": -50.41845474243164, "median": 85.505859375, "p90": 297.7182067871094, "max": 521.81689453125, "pos_frac": 0.765625, "sample": [74.33052825927734, 415.6233825683594, 17.060604095458984, 196.58616638183594, 1.9168872833251953, -14.394319534301758, -85.19032287597656, -173.6182861328125, 277.40875244140625, -79.03759765625, 171.72927856445312, 42.53052520751953, 96.68119049072266, -0.8738842010498047, 33.27362060546875, 108.41928100585938, 226.99456787109375, -27.0263614654541, 50.17674255371094, 190.4141845703125, 64.04646301269531, 71.6361312866211, 112.32073211669922, 264.4101257324219, 252.13046264648438, 70.7772445678711, 16.807228088378906, -0.07056999206542969, 128.93067932128906, -44.72908020019531, 107.0411376953125, 64.08576202392578, -0.44367218017578125, 333.48541259765625, -79.77558135986328, 521.81689453125, 350.0155334472656, 11.596145629882812, 27.499359130859375, 404.4286193847656, 203.2631072998047, 205.33316040039062, 269.15447998046875, 301.9373779296875, -149.18746948242188, 514.5845947265625, 30.36432456970215, 189.92678833007812, 216.93878173828125, 105.87972259521484, -50.86054229736328, 173.7879180908203, -48.602264404296875, 127.57806396484375, -49.38691711425781, 245.14938354492188, 62.49257278442383, 287.87347412109375, 71.47914123535156, 154.30252075195312, 121.45848083496094, 135.53636169433594, -52.080322265625, 63.05411911010742], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000214.npy"} +{"epoch": 0.3142437591776799, "step": 215, "batch_size": 64, "mean": 120.56410217285156, "std": 138.86724853515625, "min": -145.90821838378906, "p10": -64.28058013916015, "median": 99.3742904663086, "p90": 295.42076416015624, "max": 508.66497802734375, "pos_frac": 0.828125, "sample": [69.77198791503906, 325.1129150390625, 120.27053833007812, 267.7493896484375, -61.89117431640625, -79.5411376953125, 42.54015350341797, 120.11298370361328, 137.25604248046875, 102.47517395019531, 141.2256622314453, 50.40895080566406, 2.9151554107666016, -34.27849578857422, 434.78948974609375, 260.5967712402344, 258.06585693359375, 295.64276123046875, 508.66497802734375, 175.50120544433594, 96.27340698242188, 399.2252197265625, 111.05213165283203, -91.30503845214844, 73.35060119628906, 201.223876953125, -66.88702392578125, 72.89624786376953, 191.929931640625, 257.0840759277344, 30.42049217224121, 81.32595825195312, 80.63130187988281, -13.585836410522461, 173.34042358398438, 94.4883804321289, -131.44992065429688, 149.0444793701172, 197.13290405273438, 274.2125549316406, 339.12103271484375, 94.39273834228516, 206.28717041015625, -3.51116943359375, 151.0570068359375, 51.64453125, 42.39423370361328, -65.30461120605469, 48.78739547729492, 218.96839904785156, -145.90821838378906, 137.608642578125, 407.65447998046875, 31.47058868408203, 92.84175109863281, 89.47265625, 294.90277099609375, 7.4226837158203125, 196.88673400878906, 39.57530212402344, 107.1328353881836, 40.433349609375, -93.1029052734375, 108.08177947998047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000215.npy"} +{"epoch": 0.315712187958884, "step": 216, "batch_size": 64, "mean": 119.37692260742188, "std": 171.15626525878906, "min": -287.81964111328125, "p10": -73.27203598022462, "median": 101.08648681640625, "p90": 375.97046203613286, "max": 545.8858642578125, "pos_frac": 0.75, "sample": [217.5894775390625, 411.5614013671875, -73.62174224853516, 140.32168579101562, 210.44424438476562, 31.24641990661621, -184.85504150390625, 160.79263305664062, 218.2715606689453, 30.51325225830078, 545.8858642578125, 95.63432312011719, 114.81072998046875, -72.4560546875, -31.643970489501953, -8.049407958984375, -101.78433990478516, 210.25991821289062, 41.215824127197266, -185.0753173828125, 13.038070678710938, 324.8736572265625, -17.43401336669922, -17.813446044921875, 106.53865051269531, 152.24525451660156, 45.634002685546875, 192.1237030029297, 115.39743041992188, 0.9384288787841797, 108.15158081054688, 196.25531005859375, 17.440284729003906, -52.40705108642578, 402.7347717285156, 379.5426025390625, -67.24150085449219, 300.5319519042969, 50.07342529296875, 80.71160125732422, 59.266212463378906, 271.0034484863281, -2.263235092163086, -287.81964111328125, 69.39093017578125, 186.4931640625, 244.61764526367188, 398.3502197265625, 50.927978515625, -75.73157501220703, 279.088134765625, 47.22993469238281, 116.06197357177734, 172.2982177734375, -67.90044403076172, 340.148193359375, 239.41134643554688, 347.22479248046875, 412.1685791015625, 396.18524169921875, 20.464187622070312, -79.279052734375, 32.75518035888672, 367.6354675292969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000216.npy"} +{"epoch": 0.31718061674008813, "step": 217, "batch_size": 64, "mean": 140.3418731689453, "std": 160.13815307617188, "min": -179.71376037597656, "p10": -21.40581588745117, "median": 96.39009475708008, "p90": 372.1501647949219, "max": 548.4759521484375, "pos_frac": 0.84375, "sample": [136.15335083007812, 139.72027587890625, 95.9944839477539, 140.40072631835938, 61.55290222167969, 58.038780212402344, 71.64720153808594, 236.70921325683594, 78.5298080444336, 92.14666748046875, 48.01953125, 367.71881103515625, 145.72015380859375, 374.04931640625, -119.40674591064453, -1.2156295776367188, 40.77074432373047, 2.053802490234375, 203.88677978515625, 548.4759521484375, -3.443889617919922, 60.280799865722656, 293.37322998046875, 436.9808654785156, 98.3645248413086, 212.25863647460938, -32.27515411376953, 70.13555908203125, -179.71376037597656, 122.93338012695312, 505.5458679199219, 199.74993896484375, 6.679014205932617, 60.41828918457031, 267.6580810546875, 2.1343536376953125, 256.78460693359375, 290.95611572265625, -19.118484497070312, 503.942626953125, 439.94781494140625, -94.41675567626953, 351.00225830078125, 276.97149658203125, 138.43028259277344, 474.0376892089844, 20.446975708007812, 96.78570556640625, 69.624267578125, 71.14757537841797, 63.77159118652344, 178.82748413085938, 254.97119140625, 6.158052444458008, 148.2363739013672, 74.34062957763672, -45.76189422607422, -22.38610076904297, 28.644126892089844, -52.81559753417969, 137.71669006347656, 213.46141052246094, 253.83009338378906, 24.297555923461914], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000217.npy"} +{"epoch": 0.3186490455212922, "step": 218, "batch_size": 64, "mean": 108.9049301147461, "std": 165.6693878173828, "min": -274.9143371582031, "p10": -75.04668884277343, "median": 90.21390151977539, "p90": 333.2731048583985, "max": 582.289794921875, "pos_frac": 0.734375, "sample": [94.13813781738281, 62.54029083251953, 171.613525390625, 267.6275634765625, 149.76321411132812, 29.965030670166016, 335.8528137207031, 72.27986145019531, 14.1854248046875, 52.72924041748047, 152.11692810058594, 48.580902099609375, -16.15660858154297, 121.69444274902344, 454.772705078125, 181.45741271972656, 126.66897583007812, -21.976043701171875, 142.8729248046875, 195.15420532226562, 43.382110595703125, 58.243743896484375, -3.566059112548828, 235.30374145507812, -65.38652038574219, -274.9143371582031, 327.2537841796875, 99.0594482421875, 194.57119750976562, 203.1854248046875, 32.87928009033203, 424.74029541015625, 441.30938720703125, 257.4024353027344, 203.4501953125, 65.98629760742188, -20.31329917907715, -22.54557991027832, 93.62789154052734, -20.66519546508789, -29.239166259765625, -101.20576477050781, 149.32070922851562, -109.34506225585938, -74.24945831298828, -53.24256134033203, 582.289794921875, 347.0062255859375, 202.1929473876953, -75.38835906982422, -145.23062133789062, 240.09925842285156, 458.6287841796875, 11.637826919555664, 51.05746841430664, 86.79991149902344, 166.9441680908203, 99.59188842773438, 43.60729217529297, -175.23507690429688, 80.2419204711914, -124.44566345214844, 323.3822021484375, 105.81155395507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000218.npy"} +{"epoch": 0.3201174743024963, "step": 219, "batch_size": 64, "mean": 91.93537139892578, "std": 177.6659393310547, "min": -452.5419616699219, "p10": -114.98924636840817, "median": 78.78208541870117, "p90": 354.6544158935548, "max": 485.6072998046875, "pos_frac": 0.734375, "sample": [460.898681640625, 147.96725463867188, 3.4668407440185547, 327.320556640625, 3.980356216430664, 428.61444091796875, -452.5419616699219, 10.941953659057617, 38.309844970703125, 303.6020812988281, -15.153358459472656, 96.38605499267578, 204.26296997070312, 445.92816162109375, 485.6072998046875, 183.81861877441406, 57.635406494140625, 254.04574584960938, 107.89599609375, 139.32003784179688, 279.91082763671875, 26.121719360351562, -130.71270751953125, -6.25439453125, -68.3294448852539, -68.92733001708984, 367.7365417480469, -4.251556396484375, 366.3689270019531, 397.8607177734375, 38.95764923095703, 270.5912780761719, -0.0704498291015625, 48.09291076660156, 17.113418579101562, 175.3725128173828, 118.1836929321289, 16.38025665283203, 113.66162109375, 37.10284423828125, 290.7876281738281, 134.1299591064453, 90.25247192382812, -176.82925415039062, 86.62388610839844, 161.94248962402344, -198.19869995117188, 6.684568405151367, 6.200037002563477, 245.73178100585938, 29.7393798828125, -134.40301513671875, -0.9109878540039062, -48.97929000854492, -226.6623992919922, 91.21768951416016, 164.1097412109375, 83.17391204833984, -90.27992248535156, -35.48945617675781, 141.84823608398438, 74.3902587890625, 87.1474380493164, -125.5789566040039], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000219.npy"} +{"epoch": 0.32158590308370044, "step": 220, "batch_size": 64, "mean": 78.98377990722656, "std": 129.20367431640625, "min": -261.66680908203125, "p10": -71.09269332885742, "median": 80.76587677001953, "p90": 206.6965484619141, "max": 446.4186096191406, "pos_frac": 0.75, "sample": [189.26089477539062, 53.006629943847656, 63.485809326171875, -22.525596618652344, 45.689537048339844, 318.50823974609375, 366.0219421386719, 9.959354400634766, 151.98158264160156, 160.4783477783203, 123.71045684814453, 271.99798583984375, 137.114990234375, 149.069580078125, -225.5634765625, 111.11056518554688, 177.50660705566406, 33.518646240234375, 137.12448120117188, 208.86524963378906, 85.80781555175781, 23.55415916442871, 1.2015228271484375, -261.66680908203125, -71.6962661743164, 201.63624572753906, -26.124719619750977, 51.854644775390625, 209.4052734375, 69.3643798828125, -69.68435668945312, 156.74659729003906, 133.63255310058594, -21.857223510742188, 216.55747985839844, 118.00127410888672, 446.4186096191406, 173.45164489746094, -17.249794006347656, 108.61132049560547, 16.77656364440918, 52.37704086303711, 0.7118320465087891, 110.05656433105469, -114.69268035888672, 159.51333618164062, 61.07940673828125, 135.22410583496094, -141.14080810546875, 75.72393798828125, 71.43183898925781, -2.2449722290039062, 139.6038818359375, -42.84632873535156, 43.07107925415039, 157.81680297851562, 184.65731811523438, -61.629783630371094, 176.79925537109375, 189.60794067382812, -17.622997283935547, 156.9139404296875, -137.42724609375, -147.08425903320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000220.npy"} +{"epoch": 0.32305433186490456, "step": 221, "batch_size": 64, "mean": 171.51882934570312, "std": 163.33065795898438, "min": -186.412109375, "p10": -21.65741424560545, "median": 185.17830657958984, "p90": 386.34936828613286, "max": 551.36181640625, "pos_frac": 0.84375, "sample": [79.90889739990234, 107.49627685546875, 205.31752014160156, -6.467491149902344, 105.08843231201172, 551.36181640625, 6.127662658691406, 390.44940185546875, 200.57723999023438, 293.072021484375, 293.4239501953125, 10.736209869384766, 290.8353271484375, 240.18991088867188, 145.4691925048828, 271.09796142578125, -4.359130859375, 450.7160949707031, -5.729677200317383, 93.66388702392578, 211.84515380859375, 305.95794677734375, 403.48956298828125, 476.7333984375, 466.119873046875, 254.8262939453125, 223.8694610595703, -186.412109375, 56.186622619628906, 68.43599700927734, 173.58148193359375, 197.33102416992188, -31.354888916015625, 64.25176239013672, 66.50936889648438, 180.55032348632812, 113.52033996582031, -136.8297119140625, 259.5307312011719, 371.78045654296875, -28.167381286621094, 287.04962158203125, 255.2700958251953, 327.851806640625, 121.83984375, 233.416015625, 34.82417297363281, 376.7826232910156, 320.85284423828125, 231.29638671875, -46.207759857177734, -104.47075653076172, 15.690177917480469, 259.70904541015625, 41.75636291503906, -77.03863525390625, 17.728113174438477, 120.25049591064453, 46.03274917602539, 203.52713012695312, 311.074462890625, 93.31126403808594, 486.1201171875, 189.80628967285156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000221.npy"} +{"epoch": 0.3245227606461087, "step": 222, "batch_size": 64, "mean": 128.40843200683594, "std": 162.7919921875, "min": -232.00411987304688, "p10": -46.432830810546875, "median": 116.58121871948242, "p90": 360.4207580566407, "max": 559.7192993164062, "pos_frac": 0.78125, "sample": [-43.03681182861328, 446.0048522949219, -7.9540252685546875, -104.01502990722656, -5.108421325683594, -86.70646667480469, -20.582183837890625, 116.124267578125, 0.467498779296875, 226.52130126953125, 98.4561996459961, 123.49153900146484, 421.0897216796875, 90.88510131835938, 121.52275848388672, 242.86715698242188, -232.00411987304688, 151.68435668945312, 258.0392761230469, 176.29090881347656, 152.29698181152344, 108.94233703613281, -205.8719482421875, 325.4255065917969, -127.00594329833984, 269.345458984375, 492.72955322265625, 93.98335266113281, 17.726661682128906, 17.719772338867188, 160.69778442382812, 23.319976806640625, 210.83387756347656, -47.888267517089844, 205.7796173095703, 200.0562286376953, 47.696563720703125, 153.42721557617188, -15.304264068603516, 221.73101806640625, -30.195167541503906, 99.83979797363281, 81.81082916259766, 68.83699035644531, 338.2991943359375, 240.64071655273438, 117.03816986083984, 238.55889892578125, 147.8906707763672, 47.906097412109375, -28.427810668945312, 163.6356201171875, 369.90142822265625, 62.53252029418945, 423.0995178222656, 190.12496948242188, 133.95896911621094, -106.51031494140625, 222.25314331054688, 18.86376190185547, 559.7192993164062, 56.083648681640625, 416.6297607421875, 105.96987915039062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000222.npy"} +{"epoch": 0.32599118942731276, "step": 223, "batch_size": 64, "mean": 107.97784423828125, "std": 181.55764770507812, "min": -337.70062255859375, "p10": -117.41091156005858, "median": 108.73971939086914, "p90": 310.8174224853516, "max": 630.5066528320312, "pos_frac": 0.734375, "sample": [71.96232604980469, -66.60579681396484, 222.12100219726562, 147.5714111328125, 141.65322875976562, 355.76556396484375, 196.97100830078125, 103.09610748291016, 215.52301025390625, 166.98196411132812, -173.5109100341797, 56.78852844238281, 80.35505676269531, 236.80252075195312, 294.46307373046875, 289.529052734375, 134.44412231445312, 91.59873962402344, 554.0777587890625, -13.298274993896484, 137.5894317626953, -55.318870544433594, 91.18156433105469, -66.17039489746094, -59.42662811279297, 305.6462707519531, -158.1753692626953, 377.57598876953125, 103.40213775634766, -122.32093048095703, 5.555816650390625, -2.1978302001953125, 265.9913330078125, -337.70062255859375, 114.07730102539062, 146.30422973632812, -212.63475036621094, 197.263427734375, 74.13848876953125, 630.5066528320312, 248.6856689453125, -105.9542007446289, 178.68771362304688, 270.529541015625, -181.14688110351562, -60.90251541137695, 285.481689453125, 348.1455078125, 163.55487060546875, -11.572029113769531, 51.03773498535156, 30.043426513671875, 89.24478149414062, 413.45477294921875, 26.10517120361328, 12.263313293457031, -17.941640853881836, 12.726163864135742, 144.20445251464844, 137.41970825195312, 120.20645904541016, 161.08975219726562, -259.3917541503906, 313.03363037109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000223.npy"} +{"epoch": 0.3274596182085169, "step": 224, "batch_size": 64, "mean": 123.69600677490234, "std": 182.18006896972656, "min": -335.1512451171875, "p10": -76.44371643066407, "median": 67.18981170654297, "p90": 363.4639434814454, "max": 665.755859375, "pos_frac": 0.828125, "sample": [249.8376007080078, 3.944061279296875, 143.77452087402344, 16.413475036621094, -76.7000503540039, 552.7578125, 240.7713165283203, 49.172767639160156, 62.864280700683594, -335.1512451171875, 53.59784698486328, 172.6528778076172, 158.29238891601562, 146.3477783203125, 440.130859375, 88.29759216308594, 54.505332946777344, 195.3517608642578, 179.25765991210938, 665.755859375, 279.2227783203125, 110.056884765625, 310.79730224609375, 249.81179809570312, -134.89332580566406, -79.02719116210938, 271.78924560546875, 68.24714660644531, 234.53085327148438, -12.794692993164062, 33.10127258300781, 46.106964111328125, 36.548728942871094, 634.0343017578125, 260.97650146484375, 243.7452392578125, 113.12805938720703, -78.75406646728516, 22.514631271362305, 43.471248626708984, 225.05313110351562, 14.50311279296875, 48.885169982910156, -29.47064971923828, 104.07247924804688, -75.8456039428711, -61.54364013671875, 2.0126495361328125, -102.60307312011719, 30.675426483154297, 173.90316772460938, 145.14602661132812, 375.60003662109375, 19.801347732543945, 7.815456390380859, 66.13247680664062, 4.158935546875, 335.1463928222656, 37.440467834472656, -81.303955078125, 92.67045593261719, 425.76776123046875, 422.1202697753906, 21.918651580810547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000224.npy"} +{"epoch": 0.328928046989721, "step": 225, "batch_size": 64, "mean": 144.7752227783203, "std": 180.4758758544922, "min": -247.81985473632812, "p10": -85.29101943969727, "median": 148.50826263427734, "p90": 391.7450988769532, "max": 706.3538818359375, "pos_frac": 0.765625, "sample": [236.08026123046875, 169.7027587890625, 408.18804931640625, 366.2476806640625, 35.173927307128906, 96.2138442993164, 33.515106201171875, -92.57647705078125, -139.15829467773438, 420.8067626953125, 374.1573791503906, 288.7033386230469, -145.2324676513672, -80.46514129638672, 268.65863037109375, -26.764812469482422, -102.02052307128906, 3.422323226928711, -20.635005950927734, 75.98597717285156, 114.26492309570312, 122.35556030273438, 229.86935424804688, 160.17706298828125, -10.078851699829102, 148.580322265625, 47.360260009765625, 165.8131561279297, 2.556171417236328, 238.42276000976562, 303.2877197265625, 167.51739501953125, 220.2473907470703, 109.18982696533203, -43.79216384887695, 480.94329833984375, 430.8546142578125, 7.492515563964844, 243.90640258789062, 398.9818115234375, 101.75390625, 331.2802734375, 186.34451293945312, -40.13762664794922, 162.34426879882812, 135.37637329101562, 706.3538818359375, 283.7306823730469, 153.24404907226562, 81.91682434082031, 416.54339599609375, 7.71270751953125, 242.4810333251953, -100.02685546875, -87.3592529296875, 374.85943603515625, 303.525634765625, 172.10809326171875, 168.75498962402344, -247.81985473632812, 113.4870834350586, 148.4362030029297, -49.129310607910156, -8.11883544921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000225.npy"} +{"epoch": 0.3303964757709251, "step": 226, "batch_size": 64, "mean": 113.28193664550781, "std": 171.46205139160156, "min": -226.19210815429688, "p10": -39.67925643920897, "median": 77.83613967895508, "p90": 333.5439331054688, "max": 589.9083251953125, "pos_frac": 0.71875, "sample": [589.9083251953125, -160.73788452148438, -44.718135833740234, 16.368804931640625, -20.26910400390625, 169.50025939941406, -7.4678955078125, 65.92857360839844, 176.462890625, -27.921871185302734, 110.04888153076172, -0.7729034423828125, -26.73438262939453, 27.793792724609375, -170.60240173339844, 96.09172058105469, -120.84037780761719, 327.2294921875, 288.1945495605469, 26.28734588623047, 336.2501220703125, 361.517578125, 79.85187530517578, 127.25889587402344, 187.7667999267578, 288.8187255859375, 19.294761657714844, -23.817123413085938, 128.13360595703125, 454.1589660644531, 295.06964111328125, -79.24669647216797, 189.29466247558594, 76.84947204589844, 42.71783447265625, 533.793701171875, 255.75839233398438, 489.25341796875, 284.5962829589844, -11.001792907714844, -5.47174072265625, 148.8287353515625, -226.19210815429688, 154.52783203125, 42.136287689208984, 104.98237609863281, 10.137031555175781, -9.222198486328125, 9.280832290649414, 78.82280731201172, -3.627532958984375, 253.5681610107422, 109.44940185546875, 197.32394409179688, 41.682716369628906, 124.60841369628906, -93.31005859375, 5.946128845214844, 506.5186767578125, 59.267494201660156, 34.781410217285156, 150.07347106933594, -8.48846435546875, 214.3511962890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000226.npy"} +{"epoch": 0.33186490455212925, "step": 227, "batch_size": 64, "mean": 144.4818878173828, "std": 172.9486083984375, "min": -364.01153564453125, "p10": -14.627217864990227, "median": 117.03218460083008, "p90": 368.58688659667973, "max": 627.6432495117188, "pos_frac": 0.859375, "sample": [319.3691101074219, 105.04501342773438, 162.24948120117188, 381.21673583984375, 220.92138671875, 45.83330535888672, 50.60633850097656, 114.41802215576172, 311.349609375, 8.663833618164062, 117.66187286376953, 124.00363159179688, 179.68807983398438, 27.504995346069336, -17.515125274658203, 28.847396850585938, 164.90298461914062, 98.40834045410156, 200.5847625732422, 627.6432495117188, 156.47691345214844, 106.5072021484375, 322.2635192871094, 204.46786499023438, 59.99872589111328, 291.447021484375, 162.66522216796875, 244.65719604492188, 116.40249633789062, 91.89297485351562, 150.6806640625, 65.31315612792969, 332.15765380859375, 4.910785675048828, -82.77366638183594, -112.31852722167969, 196.94189453125, 212.94186401367188, 507.73828125, 111.45710754394531, 181.39398193359375, 441.756103515625, -100.09335327148438, 580.0377197265625, 0.09414482116699219, 20.490455627441406, 178.3286590576172, 42.567787170410156, 219.72842407226562, 38.66710662841797, 127.99020385742188, 375.86260986328125, 103.7835693359375, -3.9004974365234375, 7.117387771606445, -40.905426025390625, 351.6101989746094, -364.01153564453125, -7.888767242431641, 84.90982055664062, -78.42609405517578, 23.694948196411133, 138.59115600585938, 510.2109680175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000227.npy"} +{"epoch": 0.3333333333333333, "step": 228, "batch_size": 64, "mean": 101.45246887207031, "std": 163.22012329101562, "min": -279.23333740234375, "p10": -103.75468597412109, "median": 105.03263092041016, "p90": 312.44248352050784, "max": 499.25787353515625, "pos_frac": 0.75, "sample": [-176.57730102539062, -238.177001953125, -279.23333740234375, 222.22573852539062, 136.34390258789062, 328.86468505859375, -185.78793334960938, 175.81671142578125, 182.68069458007812, -151.88827514648438, 9.70367431640625, 283.2176208496094, 499.25787353515625, 315.0797424316406, 437.38916015625, 323.0419921875, 351.8504638671875, 167.96142578125, 122.56837463378906, -30.574630737304688, -50.31599426269531, 288.703125, 108.41114044189453, 306.28887939453125, 179.3455810546875, 160.6101837158203, 132.24705505371094, 21.85150146484375, -104.34020233154297, 118.17106628417969, 30.216781616210938, 193.45968627929688, 6.491401672363281, 198.195068359375, 223.35464477539062, 88.49284362792969, 120.48136138916016, 80.83331298828125, 33.47328186035156, 47.63250732421875, 98.0411605834961, 237.73602294921875, -75.20166015625, -14.412216186523438, 93.38337707519531, 101.74605560302734, 433.2698059082031, 12.076118469238281, 79.91343688964844, 53.375038146972656, 261.79840087890625, 0.058132171630859375, -24.325355529785156, 175.31707763671875, 226.7051544189453, 165.12713623046875, 200.4992218017578, 108.31920623779297, -102.38848114013672, -30.012786865234375, -20.07630157470703, 29.151397705078125, -148.03909301757812, -46.469947814941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000228.npy"} +{"epoch": 0.33480176211453744, "step": 229, "batch_size": 64, "mean": 108.77888488769531, "std": 161.3918914794922, "min": -387.73236083984375, "p10": -54.27392349243163, "median": 95.43351364135742, "p90": 338.7661926269531, "max": 549.0750732421875, "pos_frac": 0.75, "sample": [-26.332443237304688, 14.122241973876953, -117.21099853515625, 403.0106201171875, 54.92599105834961, -195.10226440429688, 108.76737213134766, 190.45538330078125, 64.63548278808594, 218.61557006835938, 249.43365478515625, 340.1416320800781, 100.95175170898438, 363.5093688964844, 91.11814880371094, 99.7488784790039, 254.72503662109375, 106.2666244506836, 40.84596633911133, 336.9079284667969, -88.7003173828125, 66.92666625976562, -3.022502899169922, 219.68971252441406, 145.2178955078125, 40.94464874267578, 549.0750732421875, -160.8388671875, 48.18043518066406, 243.1539306640625, 146.40707397460938, 246.18206787109375, 183.69070434570312, 364.6563720703125, 339.5625915527344, 39.935768127441406, 316.58441162109375, 29.10291290283203, 84.12652587890625, -58.49432373046875, 71.6957778930664, 112.16291809082031, 44.13896942138672, 190.94224548339844, -44.42632293701172, 156.71795654296875, 168.94024658203125, 225.60911560058594, -30.984329223632812, -140.70492553710938, 107.57612609863281, 427.87542724609375, 30.281038284301758, 82.75848388671875, 85.81249237060547, 132.7230987548828, -8.201774597167969, 181.13482666015625, -15.025833129882812, 176.90904235839844, -387.73236083984375, -0.6114273071289062, -18.440170288085938, -39.21875762939453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000229.npy"} +{"epoch": 0.33627019089574156, "step": 230, "batch_size": 64, "mean": 165.08201599121094, "std": 161.2410125732422, "min": -251.34365844726562, "p10": -31.807445907592772, "median": 155.02716064453125, "p90": 407.29953308105473, "max": 510.2415771484375, "pos_frac": 0.84375, "sample": [77.03004455566406, 366.47674560546875, -22.506574630737305, 306.9488525390625, 209.72845458984375, 79.41631317138672, -41.234527587890625, 255.60801696777344, 95.09637451171875, 133.27291870117188, 213.5634307861328, 170.28073120117188, 18.923606872558594, 233.03207397460938, 18.225540161132812, 114.26058959960938, -57.946510314941406, 85.80928039550781, 99.08578491210938, 145.8090362548828, 41.83349609375, -96.49958801269531, -32.85291290283203, 202.1253662109375, 145.06631469726562, 179.27980041503906, 213.96192932128906, 340.0065612792969, 327.2413635253906, 156.18826293945312, 80.85322570800781, 31.002822875976562, 332.0582275390625, 452.5187683105469, 173.28089904785156, -122.69454193115234, 136.90509033203125, 173.21554565429688, 230.805908203125, 440.57794189453125, -29.368022918701172, -12.960189819335938, -251.34365844726562, -46.373313903808594, 46.18052673339844, 47.68995666503906, 411.5004577636719, 140.71051025390625, 375.3013610839844, 450.60137939453125, 172.69371032714844, 109.52290344238281, 103.78749084472656, 15.309366226196289, 415.32220458984375, 291.58245849609375, 484.2251281738281, 205.3268585205078, 397.49737548828125, 220.12635803222656, 230.86622619628906, 153.86605834960938, 217.18763732910156, 510.2415771484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000230.npy"} +{"epoch": 0.3377386196769457, "step": 231, "batch_size": 64, "mean": 115.93913269042969, "std": 153.9716339111328, "min": -140.86212158203125, "p10": -45.92092666625976, "median": 92.36008834838867, "p90": 335.0958465576172, "max": 650.7132568359375, "pos_frac": 0.8125, "sample": [476.539794921875, 94.22636413574219, -42.08831024169922, 284.82147216796875, 15.066368103027344, 80.28785705566406, 340.6325988769531, 5.648193359375, 71.31570434570312, 95.34638214111328, 103.52798461914062, 351.99761962890625, -64.28712463378906, 88.56484985351562, 465.9037780761719, 145.2830352783203, 206.70924377441406, 92.15786743164062, 139.62051391601562, 206.73703002929688, 384.342529296875, 63.210147857666016, 107.48365020751953, 1.7834930419921875, 59.30104064941406, -79.54373931884766, 322.1767578125, -54.9500617980957, 213.55999755859375, 650.7132568359375, 461.8163757324219, 247.68582153320312, 127.39334106445312, 34.48096466064453, -140.86212158203125, 66.85185241699219, 107.34132385253906, 170.96990966796875, 83.06027221679688, -47.5634765625, -3.137847900390625, 34.50518798828125, 234.954345703125, 67.80465698242188, -23.91533660888672, 130.4464111328125, -23.112716674804688, -105.67623138427734, 57.40732955932617, 117.23013305664062, 305.92431640625, 165.44888305664062, 92.56230926513672, 7.879375457763672, -40.1703987121582, 116.77474975585938, -99.49122619628906, 108.17788696289062, 26.706283569335938, 18.98858642578125, 9.954910278320312, 145.29002380371094, 105.83206176757812, 32.45814514160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000231.npy"} +{"epoch": 0.3392070484581498, "step": 232, "batch_size": 64, "mean": 94.21968078613281, "std": 164.78802490234375, "min": -158.1475372314453, "p10": -84.68783416748046, "median": 59.586923599243164, "p90": 319.0439208984375, "max": 610.6156005859375, "pos_frac": 0.71875, "sample": [180.44973754882812, 97.40325164794922, 9.811561584472656, 57.93775177001953, 18.346847534179688, -141.74562072753906, 345.9702453613281, 253.14199829101562, -29.13201904296875, 250.21359252929688, 49.83589172363281, 465.9688720703125, 76.1180191040039, -68.75843048095703, -24.4791259765625, 64.08673858642578, -31.328033447265625, 185.79541015625, -110.53073120117188, 69.96158599853516, 155.44029235839844, 580.0111694335938, 62.488311767578125, 86.30111694335938, 136.4071044921875, -32.54263687133789, 610.6156005859375, -3.2093658447265625, 27.662567138671875, 256.6514892578125, 154.15432739257812, 206.86746215820312, -78.088623046875, 76.259521484375, -101.1923828125, 132.63290405273438, -134.68203735351562, 23.226585388183594, -87.51606750488281, 441.9376220703125, 24.655120849609375, -70.51949310302734, 1.9940643310546875, -103.51283264160156, 27.009326934814453, 85.56263732910156, 14.915771484375, -158.1475372314453, 266.58721923828125, 118.00167846679688, 350.8446044921875, 22.826976776123047, -7.85235595703125, 318.86968994140625, 223.08489990234375, -31.932512283325195, 123.94231414794922, 319.11859130859375, -24.676847457885742, 54.971229553222656, 61.2360954284668, 51.11563491821289, 72.90492248535156, 56.56798553466797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000232.npy"} +{"epoch": 0.3406754772393539, "step": 233, "batch_size": 64, "mean": 148.33291625976562, "std": 215.9307403564453, "min": -241.99899291992188, "p10": -72.1384292602539, "median": 100.21219635009766, "p90": 476.42311096191406, "max": 687.350341796875, "pos_frac": 0.703125, "sample": [14.72703742980957, 13.36385726928711, 499.6051025390625, -33.70594787597656, 193.0578155517578, 188.41131591796875, -171.90101623535156, -113.95934295654297, -5.4619598388671875, -187.08766174316406, 113.41487121582031, 34.961090087890625, 173.74085998535156, 266.6180114746094, 258.46832275390625, 687.350341796875, 125.0441665649414, 77.59432983398438, 526.2391967773438, 324.7850341796875, 47.11793518066406, 165.79190063476562, 87.009521484375, 557.7679443359375, 25.35979461669922, 274.05194091796875, -0.0843048095703125, -72.65576171875, -201.17822265625, -52.50041580200195, 148.2392120361328, 489.0712585449219, 206.95257568359375, 170.95399475097656, 304.39337158203125, 478.7880554199219, 33.166908264160156, 435.8603515625, -70.93132019042969, 41.16169738769531, 115.63622283935547, 414.14630126953125, -30.062877655029297, 341.6947937011719, 71.14602661132812, 372.3653564453125, -66.89881896972656, -241.99899291992188, -138.0094451904297, -24.939117431640625, 47.017066955566406, -29.525840759277344, 337.906982421875, 84.32902526855469, -31.559814453125, 212.4193878173828, -25.627422332763672, 470.9049072265625, -35.786624908447266, 619.859619140625, 388.56158447265625, 62.48298645019531, 318.11285400390625, 207.53036499023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000233.npy"} +{"epoch": 0.342143906020558, "step": 234, "batch_size": 64, "mean": 155.75323486328125, "std": 176.22540283203125, "min": -158.68519592285156, "p10": -62.04456939697264, "median": 129.79589080810547, "p90": 383.16207580566413, "max": 668.1612548828125, "pos_frac": 0.796875, "sample": [-72.71743774414062, 290.07427978515625, 271.7322998046875, 244.9324951171875, 365.60894775390625, 90.0645980834961, 166.70028686523438, 188.2913055419922, 431.6802062988281, -46.391265869140625, 286.6858825683594, 150.34571838378906, -158.68519592285156, -2.5822105407714844, 87.89730072021484, 263.7159729003906, 78.54443359375, -76.3824234008789, 75.10546875, 301.5477294921875, 668.1612548828125, 135.16165161132812, -9.392801284790039, -31.707984924316406, 75.7470932006836, -32.78633117675781, 178.785888671875, 349.7697448730469, 10.4371337890625, 346.190673828125, -14.048032760620117, 153.78524780273438, 442.61181640625, 112.84284973144531, 525.6820068359375, 124.1641845703125, 41.77709197998047, 178.79782104492188, 63.13932418823242, 284.13299560546875, 279.2776184082031, 136.459228515625, 262.2406311035156, 86.82654571533203, 18.65894317626953, 86.72306823730469, -97.46946716308594, 2.4723281860351562, 253.79190063476562, 104.5467529296875, 89.79450225830078, 445.015625, 603.2271728515625, -77.4292984008789, 300.60931396484375, 177.07455444335938, -68.75312805175781, 1.1903209686279297, 183.85731506347656, 53.55976867675781, 124.43013000488281, -109.788818359375, 390.6848449707031, 181.78717041015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000234.npy"} +{"epoch": 0.3436123348017621, "step": 235, "batch_size": 64, "mean": 98.2317123413086, "std": 174.693603515625, "min": -449.4805908203125, "p10": -78.90911407470703, "median": 110.44636535644531, "p90": 305.5226104736329, "max": 512.7127075195312, "pos_frac": 0.734375, "sample": [34.401397705078125, 228.05484008789062, -449.4805908203125, 41.723907470703125, 150.4622039794922, 497.571044921875, 162.93914794921875, 319.4661865234375, -71.01235961914062, 196.01011657714844, -163.5419464111328, -27.649749755859375, 141.16012573242188, 291.7548828125, 33.094268798828125, 37.38362121582031, -76.55604553222656, 26.309059143066406, 107.10951232910156, -275.6221008300781, 23.337154388427734, 22.074493408203125, -15.549491882324219, 68.3724136352539, 354.82391357421875, 231.0092010498047, -47.94451141357422, -59.72123718261719, 176.35626220703125, 201.90809631347656, 119.7152099609375, 118.68431091308594, 311.4230651855469, -79.91757202148438, 113.78321838378906, 194.9775390625, -2.3973560333251953, 255.98037719726562, 21.079696655273438, 163.76266479492188, 28.200130462646484, 53.43479919433594, 508.7948913574219, 247.1175537109375, 183.26406860351562, 188.9575958251953, 167.43499755859375, -41.940391540527344, -183.118896484375, 155.8644561767578, 512.7127075195312, 262.17462158203125, -150.1109619140625, 3.8017120361328125, 116.43452453613281, -11.610540390014648, -82.09957122802734, 190.7126922607422, 102.48068237304688, 17.088821411132812, 192.95950317382812, 144.40025329589844, 372.7319030761719, -68.19102478027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000235.npy"} +{"epoch": 0.34508076358296624, "step": 236, "batch_size": 64, "mean": 159.12815856933594, "std": 144.53160095214844, "min": -94.7965087890625, "p10": -14.72569351196288, "median": 130.34526824951172, "p90": 372.09518737792973, "max": 486.8313293457031, "pos_frac": 0.875, "sample": [379.3598937988281, 63.586875915527344, 21.384201049804688, 159.84951782226562, 202.0568389892578, 135.7543182373047, 438.6578063964844, 23.240379333496094, 77.01878356933594, 132.77012634277344, 201.145751953125, 364.3888854980469, 103.05833435058594, 234.4566650390625, 311.9425964355469, 318.5939636230469, 50.077911376953125, 252.2601776123047, 12.483329772949219, 375.39788818359375, 134.84844970703125, -34.269317626953125, 77.1644515991211, 293.8656921386719, 237.92855834960938, 27.337520599365234, 144.93539428710938, 380.43804931640625, 245.89797973632812, 37.716819763183594, 461.1965026855469, 106.4653091430664, -41.06806945800781, 248.3561248779297, -4.673191070556641, 247.0482177734375, 119.8873062133789, 457.0302734375, 97.62084197998047, -19.03390884399414, 125.31397247314453, 118.24469757080078, 29.76045799255371, 45.38178253173828, 12.507801055908203, 211.31800842285156, 108.22608947753906, 113.26946258544922, -69.77285766601562, -94.7965087890625, 144.32107543945312, 486.8313293457031, 354.24322509765625, 127.92041015625, 205.17227172851562, 121.94010925292969, 323.3774108886719, 102.8932113647461, 4.6552734375, 192.1196746826172, -63.94232177734375, 337.02667236328125, -31.468341827392578, 203.48219299316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000236.npy"} +{"epoch": 0.3465491923641703, "step": 237, "batch_size": 64, "mean": 119.61394500732422, "std": 174.1492156982422, "min": -335.4542541503906, "p10": -73.18541259765624, "median": 111.21974563598633, "p90": 376.6554138183596, "max": 662.3936767578125, "pos_frac": 0.765625, "sample": [173.4807586669922, 201.84222412109375, 194.333984375, 154.29556274414062, -0.8100318908691406, 121.33728790283203, 95.60669708251953, 147.25360107421875, 32.69504928588867, 177.01275634765625, 55.68505096435547, 31.682998657226562, 418.38128662109375, 129.06539916992188, 226.18917846679688, -61.73834228515625, -104.92527770996094, 142.1345977783203, -76.34366607666016, 93.49360656738281, 138.5714111328125, -104.06349182128906, 399.9534912109375, 662.3936767578125, -67.47691345214844, 399.2975769042969, 323.8236999511719, 4.552947998046875, 6.5082244873046875, 17.763500213623047, 484.3402099609375, 102.73078918457031, 217.7996368408203, 230.4561309814453, 13.811317443847656, 204.66468811035156, 29.26551055908203, 146.46249389648438, 301.1125183105469, -42.27991485595703, 216.13003540039062, 84.00508880615234, 173.67758178710938, 255.13803100585938, 409.02825927734375, 44.67559814453125, -3.67132568359375, 19.84508514404297, -335.4542541503906, 285.1650695800781, -51.295936584472656, 72.04451751708984, -162.00584411621094, -46.34584045410156, 187.19073486328125, 279.4932861328125, -75.63191223144531, 445.87615966796875, 6.219474792480469, -122.15359497070312, -51.533447265625, 119.70870208740234, 70.8634033203125, 213.96322631835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000237.npy"} +{"epoch": 0.34801762114537443, "step": 238, "batch_size": 64, "mean": 119.60325622558594, "std": 171.10052490234375, "min": -173.07174682617188, "p10": -42.20869140624998, "median": 81.90255355834961, "p90": 284.00517578125005, "max": 767.708984375, "pos_frac": 0.765625, "sample": [-14.824783325195312, -24.153335571289062, 5.6010589599609375, 141.1007537841797, 19.221271514892578, -4.254852294921875, 134.69224548339844, 278.6541748046875, -111.15548706054688, 412.95989990234375, 76.91091918945312, 162.3740692138672, 75.64335632324219, 61.98349380493164, -11.301813125610352, 187.72117614746094, 286.2984619140625, -70.20500183105469, 53.452415466308594, 47.50715255737305, 6.735715866088867, 663.708984375, 33.12919616699219, 145.7330780029297, -49.94670104980469, 504.2480163574219, 100.51223754882812, 18.54553985595703, 143.00823974609375, -110.12657165527344, 134.38931274414062, 119.27902221679688, 161.92156982421875, 122.74456024169922, 357.1466064453125, 28.663196563720703, 62.14596176147461, 117.72552490234375, 767.708984375, 150.56155395507812, -10.595783233642578, 14.42718505859375, 75.8963623046875, -103.26412200927734, -15.461959838867188, 252.35302734375, 212.61419677734375, 189.99334716796875, 64.20645904541016, 425.9629211425781, 204.5967559814453, 13.397750854492188, 185.8740234375, 240.86251831054688, -173.07174682617188, 275.99420166015625, 263.9877624511719, 25.437623977661133, -11.833061218261719, -18.780303955078125, 172.41583251953125, 159.39068603515625, -62.74896240234375, 86.8941879272461], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000238.npy"} +{"epoch": 0.34948604992657856, "step": 239, "batch_size": 64, "mean": 133.9548797607422, "std": 163.15536499023438, "min": -280.0307922363281, "p10": -41.054041290283195, "median": 124.28812789916992, "p90": 361.0255096435547, "max": 505.1696472167969, "pos_frac": 0.796875, "sample": [363.0605163574219, 175.11668395996094, -141.55418395996094, 356.27716064453125, -97.072509765625, 194.85125732421875, 67.2071533203125, 147.0050048828125, -43.486446380615234, -140.4312286376953, 50.04115676879883, 47.72308349609375, 77.42410278320312, 269.8314514160156, 76.10961151123047, 12.212635040283203, 505.1696472167969, 122.90653991699219, 180.25381469726562, 125.66971588134766, 207.33042907714844, 193.3580780029297, 243.5474395751953, -58.03895950317383, 0.09575653076171875, 46.27238464355469, 179.2210235595703, 394.9136657714844, 211.9215850830078, 107.16947937011719, -7.621282577514648, 21.335235595703125, 336.01483154296875, 407.76019287109375, -156.47239685058594, 63.56501007080078, -9.526817321777344, 365.0067138671875, -8.628768920898438, 337.1651916503906, 32.782108306884766, 348.6876220703125, 314.7690734863281, 151.13006591796875, -24.859603881835938, 42.721580505371094, -280.0307922363281, 142.00125122070312, -35.3784294128418, 150.03952026367188, 149.09808349609375, 287.80010986328125, 317.720947265625, 69.91520690917969, 468.9234619140625, 217.31675720214844, 377.45263671875, 84.11485290527344, 101.40605163574219, 126.5619888305664, 60.195404052734375, -25.50261688232422, 258.2388916015625, 15.303731918334961], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000239.npy"} +{"epoch": 0.3509544787077827, "step": 240, "batch_size": 64, "mean": 112.62922668457031, "std": 186.5790252685547, "min": -237.2541961669922, "p10": -169.01607360839841, "median": 122.86301803588867, "p90": 359.1159820556641, "max": 572.9171752929688, "pos_frac": 0.765625, "sample": [142.4884033203125, 139.1178436279297, 164.793701171875, 99.71488189697266, 126.22151184082031, 350.14862060546875, 153.71975708007812, 414.42840576171875, -215.47998046875, -55.242042541503906, 199.46131896972656, 138.97239685058594, -225.71466064453125, 169.57623291015625, 362.9591369628906, 61.78453826904297, -185.11050415039062, 71.77864074707031, 38.73725128173828, 245.53372192382812, 172.42819213867188, 409.59210205078125, 167.39828491210938, 142.19775390625, 65.87237548828125, 3.466991424560547, -235.68191528320312, 123.0315170288086, 79.55963134765625, 497.70587158203125, 191.4110870361328, 122.69451904296875, 114.53089904785156, 21.831382751464844, -195.2496337890625, 344.79400634765625, 230.0205535888672, 176.91064453125, -211.31268310546875, 75.60680389404297, -76.50367736816406, 307.4955749511719, -131.46240234375, 37.7461051940918, 572.9171752929688, -130.63221740722656, 277.4398193359375, -29.827638626098633, 264.3836669921875, 178.3272247314453, 116.97857666015625, 432.8583984375, -39.87779235839844, 107.58025360107422, -119.66331481933594, 144.84161376953125, 102.14940643310547, -51.258880615234375, 111.986328125, 196.5825653076172, 48.09477233886719, 146.2193603515625, 484.4523010253906, -237.2541961669922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000240.npy"} +{"epoch": 0.3524229074889868, "step": 241, "batch_size": 64, "mean": 176.90191650390625, "std": 206.40151977539062, "min": -230.8702850341797, "p10": -33.95944099426269, "median": 160.7864761352539, "p90": 440.98028869628916, "max": 853.2113037109375, "pos_frac": 0.828125, "sample": [102.42900848388672, 91.26914978027344, -224.06787109375, -22.67547035217285, 31.798065185546875, 233.08746337890625, 230.75482177734375, 464.491943359375, 83.46621704101562, 180.98910522460938, 127.60598754882812, -62.72796630859375, -21.051288604736328, 522.9176635742188, 3.975996971130371, 169.82711791992188, -34.8950309753418, 359.7945251464844, 184.2871551513672, 164.35968017578125, 361.67816162109375, 166.03598022460938, 95.2237777709961, 138.67478942871094, 853.2113037109375, 418.2663269042969, 203.85928344726562, 145.06021118164062, 598.9522705078125, 42.89793395996094, 791.8960571289062, 450.71484375, 320.4619140625, 74.50543212890625, 90.26370239257812, 163.8817596435547, 241.49655151367188, 96.63656616210938, -31.776397705078125, 168.8589324951172, 113.70056915283203, 48.18946838378906, 109.54893493652344, -9.222137451171875, 238.03167724609375, 74.78460693359375, 325.1209716796875, -68.56228637695312, 229.29434204101562, 194.9404296875, 71.8394775390625, 42.429405212402344, 278.7948913574219, 501.5081481933594, 276.3839111328125, 157.69119262695312, -170.56488037109375, 291.29327392578125, 252.08474731445312, 254.21011352539062, -230.8702850341797, 389.9983215332031, 91.75491333007812, -117.09326171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000241.npy"} +{"epoch": 0.35389133627019087, "step": 242, "batch_size": 64, "mean": 126.48635864257812, "std": 182.338134765625, "min": -290.371826171875, "p10": -53.51850891113279, "median": 95.41917419433594, "p90": 313.29255676269537, "max": 740.7994995117188, "pos_frac": 0.796875, "sample": [-33.18549346923828, 3.8505210876464844, 170.51318359375, 269.91168212890625, 37.56515121459961, -290.371826171875, 12.628372192382812, -9.18756103515625, 319.2076416015625, 145.70220947265625, 28.214879989624023, 263.41265869140625, 650.2230224609375, -15.581361770629883, 4.256172180175781, 65.58759307861328, 132.2659912109375, 132.6822509765625, 60.997467041015625, 13.772726058959961, 227.12818908691406, 89.4012451171875, 259.129638671875, 228.66293334960938, 263.564208984375, -62.23265838623047, 91.66912078857422, 53.029502868652344, 124.65697479248047, -94.87374877929688, 158.40069580078125, 216.60498046875, 100.54425811767578, -147.72625732421875, 270.2345886230469, 108.77933502197266, 299.4906921386719, 473.74261474609375, 6.947914123535156, 87.01025390625, 95.42176818847656, 206.6499481201172, -5.866607666015625, 740.7994995117188, 27.063941955566406, 72.75687408447266, 473.11798095703125, 95.41658020019531, 11.677505493164062, -3.9102001190185547, -124.4846420288086, 447.67535400390625, 179.3648223876953, -31.15930938720703, 63.52391052246094, 126.87786865234375, 145.2564239501953, -146.6427459716797, 246.7331085205078, 240.18472290039062, 6.299112319946289, 466.5198059082031, 110.92495727539062, -65.70355987548828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000242.npy"} +{"epoch": 0.355359765051395, "step": 243, "batch_size": 64, "mean": 155.12164306640625, "std": 187.65859985351562, "min": -309.8066101074219, "p10": -57.78369674682616, "median": 131.4586410522461, "p90": 385.7689239501953, "max": 751.2059936523438, "pos_frac": 0.859375, "sample": [386.05523681640625, 32.08065414428711, 89.65978240966797, 480.91302490234375, 198.47808837890625, 89.5630111694336, 129.6392364501953, 177.25538635253906, -86.62118530273438, -104.0468978881836, -63.816314697265625, 80.69393920898438, 230.39556884765625, 12.088937759399414, -144.9358367919922, 20.094207763671875, -135.5350341796875, 331.51409912109375, 139.2547149658203, 165.06414794921875, -309.8066101074219, 37.70274353027344, 178.54595947265625, 16.017005920410156, 21.98111915588379, 349.1318359375, 51.779754638671875, 210.50460815429688, 87.68737030029297, 417.406005859375, 293.1231689453125, 258.39910888671875, -43.70758819580078, 150.7129364013672, -91.78778076171875, 361.87152099609375, 265.86529541015625, 565.292724609375, 357.2020263671875, 252.77734375, 311.0813903808594, 125.41004180908203, 370.570068359375, 751.2059936523438, 420.854248046875, 46.81007385253906, 133.27804565429688, 45.4687385559082, 385.1008605957031, -13.876953125, 164.43505859375, 210.55245971679688, 23.6824951171875, 120.61187744140625, 510.4537658691406, 42.61090850830078, 149.02767944335938, 78.72148132324219, 5.541572570800781, 252.36557006835938, 15.450576782226562, 49.621002197265625, 67.68540954589844, 202.6287841796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000243.npy"} +{"epoch": 0.3568281938325991, "step": 244, "batch_size": 64, "mean": 159.97470092773438, "std": 167.3809814453125, "min": -160.62986755371094, "p10": -18.313524627685542, "median": 145.09481048583984, "p90": 407.21611633300785, "max": 639.5817260742188, "pos_frac": 0.828125, "sample": [66.21037292480469, -20.33415985107422, 119.40225219726562, 173.11111450195312, 27.725460052490234, 57.90742111206055, 452.5216979980469, 131.38516235351562, -58.666473388671875, -13.241180419921875, 182.30943298339844, 236.38204956054688, 14.176483154296875, 318.5107421875, 40.19110107421875, 146.37123107910156, -4.228130340576172, 454.8946533203125, 32.989627838134766, 105.00739288330078, -160.62986755371094, 122.277587890625, 297.123779296875, -67.99398803710938, 194.349609375, 34.45758819580078, 170.98492431640625, 236.39816284179688, 155.1299591064453, 40.624786376953125, 255.3324432373047, 114.08768463134766, 315.54193115234375, 312.8215637207031, 474.904052734375, 565.3275756835938, 184.10829162597656, 411.4911804199219, 52.90568161010742, 639.5817260742188, 242.93759155273438, -13.598709106445312, 397.240966796875, 184.33627319335938, 161.10797119140625, -76.49530792236328, 152.81748962402344, -36.001861572265625, -10.837127685546875, 255.70350646972656, 33.92890930175781, 20.343238830566406, 151.3513946533203, 90.17264556884766, 1.899871826171875, 113.85342407226562, 356.84735107421875, 215.0545654296875, -62.48311233520508, 143.81838989257812, 97.83346557617188, 482.78082275390625, 222.32640075683594, 301.9913330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000244.npy"} +{"epoch": 0.35829662261380324, "step": 245, "batch_size": 64, "mean": 124.18669128417969, "std": 179.62229919433594, "min": -421.6874694824219, "p10": -27.989055633544915, "median": 99.34983825683594, "p90": 329.10916748046884, "max": 623.2349243164062, "pos_frac": 0.796875, "sample": [120.18347930908203, 415.58447265625, 177.73272705078125, 132.86573791503906, 184.40289306640625, 161.5943145751953, 60.456642150878906, -7.6892242431640625, 217.8458251953125, 125.50323486328125, 88.13455200195312, 102.55046081542969, 159.5196990966797, 24.09958267211914, 144.29656982421875, 151.13262939453125, 213.437744140625, 286.2708740234375, 92.45883178710938, 17.675334930419922, 542.0169677734375, 126.25083923339844, -19.71255111694336, 341.6903076171875, 65.64842987060547, -4.2945404052734375, 244.00108337402344, -31.536128997802734, -209.23265075683594, -19.021690368652344, 299.753173828125, 151.99551391601562, 623.2349243164062, -184.6212921142578, 186.96356201171875, 21.156837463378906, 19.878440856933594, 96.14921569824219, 295.3259582519531, 83.89857482910156, -15.93741226196289, 212.1571044921875, 141.18406677246094, -78.33570861816406, 76.14665985107422, -421.6874694824219, 280.73297119140625, 297.40509033203125, 30.050270080566406, 221.3937225341797, 347.1995544433594, 36.84043884277344, 590.8084106445312, -81.17042541503906, -1.6633110046386719, 505.26947021484375, 78.02970886230469, 110.57044219970703, 12.067062377929688, -119.8463134765625, 75.56039428710938, 60.43659973144531, 78.45897674560547, 14.676551818847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000245.npy"} +{"epoch": 0.35976505139500736, "step": 246, "batch_size": 64, "mean": 126.03412628173828, "std": 183.54161071777344, "min": -267.17095947265625, "p10": -74.38367843627928, "median": 84.12926864624023, "p90": 354.9515716552735, "max": 744.6967163085938, "pos_frac": 0.796875, "sample": [201.6971435546875, 347.5599365234375, 149.82907104492188, 84.41141510009766, 375.6986083984375, 211.8839874267578, 744.6967163085938, -177.80792236328125, 42.884639739990234, -267.17095947265625, 57.453575134277344, 89.3032455444336, 73.735107421875, 0.3749103546142578, 166.89254760742188, 366.33282470703125, 302.2088623046875, 73.99384307861328, 45.71287536621094, 55.83837890625, 55.11981201171875, 375.8821716308594, 145.08029174804688, -78.37886810302734, 113.69392395019531, -95.47372436523438, 217.86929321289062, -63.92950439453125, 332.8463134765625, -114.25926971435547, 4.6976318359375, -27.917686462402344, 119.50502014160156, 106.65629577636719, 308.59197998046875, 50.36974334716797, 10.818498611450195, -21.979045867919922, 5.547943115234375, 236.85150146484375, 358.1194152832031, 129.71661376953125, 36.18073272705078, 190.07589721679688, 83.84712219238281, 153.31051635742188, 78.87886047363281, -65.06156921386719, 7.831249237060547, 223.7102508544922, -80.77323913574219, 91.10289001464844, 319.4315185546875, 205.55516052246094, 52.3173828125, 57.69569396972656, 10.791332244873047, 589.0223999023438, 299.4904479980469, 607.0714111328125, -34.52300262451172, 225.79379272460938, -95.59831237792969, -4.923854827880859], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000246.npy"} +{"epoch": 0.36123348017621143, "step": 247, "batch_size": 64, "mean": 176.29876708984375, "std": 193.0809326171875, "min": -293.2818908691406, "p10": -28.191698455810528, "median": 155.4546127319336, "p90": 443.40687255859376, "max": 642.8453369140625, "pos_frac": 0.875, "sample": [-255.6888885498047, 101.04002380371094, -91.31251525878906, 392.7423095703125, 86.57682800292969, 456.093505859375, 163.35372924804688, 436.13275146484375, -49.99365997314453, 360.31414794921875, 373.4419250488281, 470.1671447753906, 203.22821044921875, 293.6929626464844, 67.86203002929688, 314.06793212890625, 478.71221923828125, 49.51123809814453, 345.9277648925781, 306.17767333984375, 170.04489135742188, 39.51963806152344, 32.72625732421875, 122.04576873779297, 22.437728881835938, 248.1430206298828, 30.020301818847656, 84.24284362792969, 99.06746673583984, 118.09423828125, 237.75723266601562, 342.77886962890625, 92.73395538330078, 151.22964477539062, 58.89326477050781, 392.960693359375, 515.8489990234375, 354.82891845703125, 103.72349548339844, -153.01394653320312, 16.87594223022461, 198.32545471191406, -9.74871826171875, 188.776123046875, 62.55902862548828, 17.296165466308594, 159.67958068847656, 374.66497802734375, 596.110107421875, 92.77334594726562, 39.23047637939453, -125.9256591796875, 92.84431457519531, 642.8453369140625, 213.00473022460938, -36.09583282470703, 177.50396728515625, 303.24884033203125, 182.4051055908203, -293.2818908691406, 446.52435302734375, 16.663009643554688, 140.0789337158203, 220.6326904296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000247.npy"} +{"epoch": 0.36270190895741555, "step": 248, "batch_size": 64, "mean": 153.8965606689453, "std": 138.45750427246094, "min": -165.0022430419922, "p10": -1.5450597763061502, "median": 152.35977935791016, "p90": 332.7976135253906, "max": 456.97161865234375, "pos_frac": 0.890625, "sample": [317.73138427734375, 332.2038879394531, 96.33588409423828, 175.29241943359375, 201.3985137939453, 0.6964321136474609, 152.1393585205078, 347.8431091308594, 132.3270721435547, -97.68966674804688, 17.304367065429688, 247.97259521484375, 181.62548828125, 36.020599365234375, 215.98008728027344, 58.216949462890625, 27.187074661254883, 75.34364318847656, 167.21417236328125, -6.356586456298828, 152.5802001953125, 250.02035522460938, 259.45635986328125, 383.3395690917969, 19.1002197265625, -39.94068908691406, 120.7044906616211, 332.79193115234375, 126.27595520019531, 238.88088989257812, 169.9006805419922, -165.0022430419922, 149.54550170898438, 268.77838134765625, 157.1874237060547, 191.24851989746094, 85.19235229492188, 7.8091278076171875, 8.460617065429688, 52.6853141784668, 199.02284240722656, 274.0780944824219, 182.91436767578125, 429.42547607421875, 101.57969665527344, 97.62797546386719, 447.10113525390625, 302.3161315917969, 38.451873779296875, 456.97161865234375, 19.357093811035156, 187.69717407226562, 332.800048828125, 239.14439392089844, 247.50997924804688, 358.2533874511719, -2.5056991577148438, 58.57673645019531, 117.97157287597656, 295.4755859375, 119.31578826904297, 5.032337188720703, -4.272518157958984, -102.26734924316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000248.npy"} +{"epoch": 0.3641703377386197, "step": 249, "batch_size": 64, "mean": 84.9371109008789, "std": 188.2987518310547, "min": -362.67083740234375, "p10": -151.67853546142575, "median": 77.01847076416016, "p90": 293.06227416992186, "max": 695.1004028320312, "pos_frac": 0.671875, "sample": [-53.87730407714844, 293.13092041015625, -100.67509460449219, -99.99018859863281, 115.7939224243164, -129.11978149414062, 19.843765258789062, 129.95359802246094, 74.2965087890625, 358.5647888183594, 35.466522216796875, 58.76519775390625, 76.85044860839844, 61.96454620361328, 218.3444061279297, -362.67083740234375, -113.06132507324219, 375.816650390625, 150.8589324951172, -76.49711608886719, 159.98562622070312, 190.4327392578125, -165.05490112304688, 148.11300659179688, -33.75550842285156, 351.1961669921875, -161.34657287597656, 282.1697998046875, 150.69039916992188, 86.8541259765625, -29.475276947021484, -29.785762786865234, 236.30886840820312, 65.65513610839844, 129.97256469726562, -24.381072998046875, 183.55453491210938, -327.5758056640625, 125.68363952636719, 246.7127685546875, 210.97018432617188, -21.871856689453125, 695.1004028320312, -93.42967224121094, -205.92202758789062, -164.6812286376953, 59.696998596191406, 34.39390182495117, 112.07742309570312, 196.387939453125, 124.13684844970703, 260.6038818359375, -187.41677856445312, 267.7026672363281, 292.902099609375, 279.6849365234375, 77.18649291992188, -120.23440551757812, -33.340782165527344, 202.70863342285156, 34.248390197753906, 383.00531005859375, 410.91058349609375, 1.4419021606445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000249.npy"} +{"epoch": 0.3656387665198238, "step": 250, "batch_size": 64, "mean": 158.41148376464844, "std": 160.35458374023438, "min": -175.4219970703125, "p10": -33.96413478851318, "median": 151.82342529296875, "p90": 386.34230346679703, "max": 513.8613891601562, "pos_frac": 0.828125, "sample": [262.6365661621094, 20.271909713745117, 42.456573486328125, -7.255836486816406, -52.79229736328125, 185.1031036376953, 93.90067291259766, 468.5675048828125, 169.11239624023438, -98.51762390136719, 82.78600311279297, 507.7330627441406, 243.15740966796875, 148.5418243408203, 246.18466186523438, 151.5997772216797, 33.65392303466797, 140.7647705078125, 326.94317626953125, 347.95513916015625, 152.00335693359375, -84.68291473388672, -34.882728576660156, 96.2186508178711, 96.51856994628906, 212.234130859375, 89.10431671142578, -28.92122459411621, 72.97930908203125, -4.933349609375, -31.820749282836914, 42.65742492675781, 156.7063751220703, 280.0104675292969, -52.83544158935547, 498.82183837890625, 165.88909912109375, 180.83212280273438, -121.10430145263672, 11.934364318847656, 209.20460510253906, 402.7939453125, 290.27117919921875, 180.6436309814453, 151.64349365234375, 59.33129119873047, 261.49627685546875, 144.78744506835938, 291.572021484375, 142.7763671875, 68.35902404785156, 3.4475860595703125, 201.84912109375, 270.0278625488281, -175.4219970703125, 224.9923095703125, 412.54913330078125, 217.8777313232422, 165.66470336914062, 469.6133117675781, 325.58294677734375, 18.827287673950195, 513.8613891601562, 277.05242919921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000250.npy"} +{"epoch": 0.3671071953010279, "step": 251, "batch_size": 64, "mean": 106.85773468017578, "std": 183.47549438476562, "min": -365.3336181640625, "p10": -59.44277343749998, "median": 90.65372085571289, "p90": 327.2239624023438, "max": 699.09326171875, "pos_frac": 0.703125, "sample": [368.1737365722656, 12.05984878540039, 326.12884521484375, -19.48339080810547, 222.65682983398438, -265.77142333984375, -365.3336181640625, 699.09326171875, -5.727165222167969, 189.1509246826172, 184.0084228515625, -131.44940185546875, 327.69329833984375, 54.528289794921875, 114.0950927734375, -35.841827392578125, -276.0381164550781, 475.1542053222656, -25.155019760131836, 170.64454650878906, 25.147991180419922, 281.9561767578125, -14.869789123535156, -25.30327606201172, 29.3187255859375, 153.0132293701172, -119.95025634765625, -67.06338500976562, 124.19113159179688, 234.6390380859375, 67.26910400390625, -129.60003662109375, 343.0290832519531, -38.55133819580078, 60.29882049560547, 220.95632934570312, 77.58235931396484, 133.91354370117188, 150.38829040527344, 141.94247436523438, 11.930160522460938, 237.90040588378906, -13.403253555297852, 286.26416015625, -4.1443634033203125, 355.3855285644531, 197.6637420654297, 185.24229431152344, 89.9727554321289, 121.82886505126953, -32.156150817871094, 16.60106658935547, 560.6196899414062, 79.81723022460938, 316.87811279296875, 175.70269775390625, -41.661346435546875, 194.173583984375, 91.33468627929688, -11.794971466064453, 29.126914978027344, 138.66104125976562, 56.19074249267578, 129.8658905029297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000251.npy"} +{"epoch": 0.368575624082232, "step": 252, "batch_size": 64, "mean": 144.34500122070312, "std": 149.48019409179688, "min": -495.4229736328125, "p10": -15.836193847656244, "median": 157.97798919677734, "p90": 334.6926696777344, "max": 451.053955078125, "pos_frac": 0.859375, "sample": [285.482666015625, 372.5810852050781, 54.0981330871582, 220.31292724609375, 251.38963317871094, 61.96990966796875, 145.42825317382812, 346.7507629394531, 81.89151000976562, 165.94320678710938, 217.5534210205078, 23.885238647460938, 168.525634765625, 58.84974670410156, 115.76914978027344, 165.1454315185547, 55.13779067993164, 135.27110290527344, 182.13348388671875, -57.889854431152344, 252.1761932373047, 171.8650665283203, 20.880168914794922, 409.32049560546875, 254.47854614257812, -123.17533874511719, -20.960298538208008, 114.8773193359375, -18.249954223632812, 150.92628479003906, -27.797626495361328, 184.5087890625, 8.26707649230957, 141.53311157226562, 86.302734375, 110.67828369140625, 110.63717651367188, 403.1044616699219, 368.24749755859375, 312.2826232910156, 32.13275909423828, 165.02969360351562, 178.9349822998047, 48.09245300292969, 193.20401000976562, 336.25146484375, 274.4161376953125, 294.3132629394531, 192.7052001953125, 166.8030548095703, -10.204086303710938, -495.4229736328125, 331.05548095703125, 267.25823974609375, 113.9434814453125, 107.33787536621094, -8.746744155883789, 172.52398681640625, 451.053955078125, -57.91239547729492, 79.67826843261719, 65.3355941772461, 186.97811889648438, 193.18685913085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000252.npy"} +{"epoch": 0.3700440528634361, "step": 253, "batch_size": 64, "mean": 140.40821838378906, "std": 173.90423583984375, "min": -381.0729064941406, "p10": -72.01529006958008, "median": 125.19615173339844, "p90": 370.7252319335939, "max": 575.4754638671875, "pos_frac": 0.8125, "sample": [-68.58133697509766, 26.96770477294922, 275.7210388183594, 49.07624816894531, 16.749267578125, 141.49917602539062, 226.39089965820312, 53.528404235839844, 185.89747619628906, 80.71186828613281, 97.74113464355469, 575.4754638671875, 240.5440673828125, 66.25048065185547, 212.65945434570312, 116.19064331054688, 396.5731506347656, 86.03352355957031, -82.21536254882812, 105.42381286621094, -26.36207389831543, -381.0729064941406, 235.47442626953125, 381.8414001464844, 344.7875061035156, 9.331056594848633, 442.487548828125, 33.78235626220703, -21.149505615234375, 50.70560073852539, 17.821449279785156, -74.186279296875, 134.8316192626953, 257.0285339355469, 293.99359130859375, 227.71878051757812, 277.37469482421875, -128.79051208496094, 287.23114013671875, 115.0114517211914, 251.4567413330078, 336.79571533203125, 163.56033325195312, 279.6545715332031, 407.969482421875, 115.46635437011719, 105.35389709472656, 412.3382263183594, 134.20166015625, -226.14410400390625, 343.27801513671875, 169.91458129882812, -90.5908203125, 48.359825134277344, -73.48698425292969, 110.84010314941406, 161.7591552734375, 229.43650817871094, 220.26185607910156, -32.65899658203125, -53.14407730102539, 42.860595703125, 465.9612731933594, 182.1849365234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000253.npy"} +{"epoch": 0.37151248164464024, "step": 254, "batch_size": 64, "mean": 119.60063171386719, "std": 200.97354125976562, "min": -268.7315979003906, "p10": -116.69888763427734, "median": 92.71843338012695, "p90": 400.39639587402365, "max": 815.716064453125, "pos_frac": 0.75, "sample": [110.89276123046875, -37.415077209472656, 152.69198608398438, 85.51862335205078, 100.86666870117188, 137.0242462158203, 24.24188232421875, -82.78163146972656, 149.68255615234375, 38.947975158691406, 134.12957763671875, 160.68467712402344, 232.33651733398438, 55.97477722167969, -122.27574157714844, -268.7315979003906, 198.11944580078125, -50.55241012573242, 89.93743896484375, 219.94552612304688, 4.853183746337891, 219.821044921875, 506.4474792480469, 323.6724853515625, 345.0406188964844, -191.67019653320312, 426.985107421875, 302.4886169433594, 6.968467712402344, 18.841012954711914, 95.49942779541016, 198.4368438720703, 73.1377182006836, -195.5631103515625, 19.705326080322266, 192.47561645507812, -70.56328582763672, 424.12030029296875, -109.83828735351562, 61.19449234008789, -25.743980407714844, -206.55059814453125, 61.55845642089844, 473.4835205078125, -4.594457626342773, 219.8704376220703, 4.011072158813477, 342.66656494140625, 64.5568618774414, 185.1990966796875, 815.716064453125, 163.68934631347656, 85.892822265625, -17.409339904785156, -160.79776000976562, 559.2754516601562, 120.9442138671875, 233.91854858398438, 157.35890197753906, 67.10285949707031, -119.63914489746094, 537.9383544921875, 148.72250366210938, -34.02044677734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000254.npy"} +{"epoch": 0.37298091042584436, "step": 255, "batch_size": 64, "mean": 128.23907470703125, "std": 180.14280700683594, "min": -425.31768798828125, "p10": -49.17146377563476, "median": 104.87778854370117, "p90": 349.645428466797, "max": 582.887451171875, "pos_frac": 0.78125, "sample": [582.887451171875, -109.3348388671875, 27.659881591796875, 360.21759033203125, 151.96096801757812, -49.407745361328125, -47.96520233154297, -32.81999206542969, -425.31768798828125, 61.2296257019043, 288.57623291015625, 92.901123046875, -9.153533935546875, 21.70351791381836, 226.46202087402344, 224.741943359375, -157.76229858398438, -217.30068969726562, 166.39785766601562, 288.3308410644531, 244.59075927734375, 45.88661193847656, 80.40892791748047, 18.121768951416016, 98.85018920898438, -48.620140075683594, -125.73532104492188, 378.0064392089844, 3.680927276611328, 177.97079467773438, 216.4662322998047, 236.82037353515625, 72.53787231445312, 314.5969543457031, 370.26751708984375, 313.2315368652344, 262.2248840332031, 361.8643493652344, 324.90948486328125, 538.853271484375, 39.584144592285156, -5.455318450927734, 279.11712646484375, 146.49835205078125, 21.943538665771484, 193.89529418945312, 64.99972534179688, 223.17257690429688, -117.50642395019531, 324.97705078125, 411.39569091796875, 53.17612838745117, 135.6492919921875, 33.695594787597656, -18.04766082763672, 133.52313232421875, 30.516273498535156, 110.90538787841797, 32.517478942871094, 147.3076171875, -42.83521270751953, 79.07649230957031, 318.30108642578125, 281.95263671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000255.npy"} +{"epoch": 0.3744493392070485, "step": 256, "batch_size": 64, "mean": 136.01217651367188, "std": 173.10562133789062, "min": -305.7796630859375, "p10": -62.291674041748045, "median": 118.13191604614258, "p90": 351.99562377929686, "max": 585.5707397460938, "pos_frac": 0.765625, "sample": [95.79815673828125, 520.8592529296875, 94.22199249267578, 230.06288146972656, 193.21798706054688, 252.00997924804688, 31.016427993774414, 163.35174560546875, -33.026817321777344, 13.978744506835938, 296.80615234375, 217.07879638671875, 220.83395385742188, 351.13177490234375, 251.73239135742188, -134.5611572265625, 61.254024505615234, 200.3269500732422, -305.7796630859375, 31.244346618652344, 282.66986083984375, 115.94331359863281, -124.88900756835938, 291.4373474121094, 59.48859786987305, 261.9087829589844, -60.92705535888672, -62.87651062011719, -66.26052856445312, 183.5629119873047, 379.75506591796875, -4.7972869873046875, 212.069091796875, 86.95855712890625, 61.511199951171875, 128.20742797851562, 347.7374572753906, -69.13812255859375, 52.38465881347656, 42.70106506347656, -176.9988250732422, 38.723785400390625, 28.148590087890625, 16.077972412109375, 178.37106323242188, -22.92477798461914, 203.29141235351562, 120.32051849365234, -18.61505126953125, 268.25189208984375, -44.960235595703125, 105.06800842285156, -8.152626037597656, 199.58880615234375, 352.3658447265625, 388.05902099609375, 142.09510803222656, 65.84402465820312, 585.5707397460938, 526.339599609375, 222.25503540039062, 317.8383483886719, -6.589834213256836, 355.8064880371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000256.npy"} +{"epoch": 0.37591776798825255, "step": 257, "batch_size": 64, "mean": 143.2915802001953, "std": 182.66693115234375, "min": -267.51116943359375, "p10": -47.056467819213864, "median": 106.3177261352539, "p90": 427.5809020996094, "max": 633.9842529296875, "pos_frac": 0.78125, "sample": [464.05242919921875, -42.94729232788086, 9.217727661132812, 106.06843566894531, 357.7488098144531, -12.049240112304688, 227.99371337890625, 434.49737548828125, 229.48776245117188, 158.48867797851562, -16.734342575073242, -48.817543029785156, 103.85139465332031, 296.0711669921875, 147.50714111328125, 33.016204833984375, 226.91064453125, 210.64723205566406, 29.960941314697266, 428.3675231933594, 35.61726379394531, 409.3373107910156, 264.58074951171875, -147.7947540283203, 87.76981353759766, 110.44004821777344, -92.55220031738281, 14.887809753417969, -31.622982025146484, 453.9605407714844, 197.31504821777344, 231.33299255371094, 257.136474609375, 633.9842529296875, 92.6145248413086, -54.7711181640625, 106.5670166015625, 9.16285514831543, 125.18665313720703, -267.51116943359375, 79.36089324951172, 425.7454528808594, 41.1076545715332, 238.11785888671875, 196.4680938720703, 215.96652221679688, 62.8333740234375, 1.0254745483398438, 511.90814208984375, 236.00839233398438, 247.10601806640625, 306.58087158203125, -14.115699768066406, 104.71654510498047, 46.963348388671875, 516.385986328125, -91.84420776367188, -12.91295051574707, 168.67544555664062, -165.80555725097656, 33.41496276855469, 37.42498779296875, -12.06573486328125, 218.615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000257.npy"} +{"epoch": 0.37738619676945667, "step": 258, "batch_size": 64, "mean": 139.01031494140625, "std": 194.26425170898438, "min": -499.5169982910156, "p10": -50.48630294799804, "median": 134.64312744140625, "p90": 365.74736328125005, "max": 661.735595703125, "pos_frac": 0.765625, "sample": [285.470947265625, 35.989524841308594, -86.03663635253906, 264.5644226074219, 51.35700988769531, 133.80465698242188, 261.7614440917969, 252.67922973632812, 224.94821166992188, -8.021728515625, -35.887481689453125, -34.584815979003906, -54.413230895996094, 49.82255554199219, 102.10520935058594, -33.15907287597656, -41.189979553222656, 106.99618530273438, 153.66102600097656, -7.183509826660156, 56.640655517578125, 255.73269653320312, 520.4738159179688, 209.54173278808594, 467.7138366699219, 252.66751098632812, 191.61004638671875, 24.01203155517578, 169.93902587890625, 348.2260437011719, 201.5894012451172, 279.6517333984375, 310.231689453125, 181.4418487548828, -209.9552001953125, 92.46058654785156, 271.0711364746094, -353.57647705078125, 21.996849060058594, 119.02735137939453, 381.6627502441406, -3.9558067321777344, -92.41603088378906, 373.2565002441406, 33.63874435424805, -499.5169982910156, 661.735595703125, 135.13888549804688, -131.94924926757812, 113.83724212646484, 233.2142333984375, -41.32347106933594, 293.9345703125, 333.9938659667969, 378.1941223144531, 51.78331756591797, 287.08258056640625, 421.00372314453125, 149.056640625, 242.31057739257812, 74.51325988769531, 134.14736938476562, 251.673095703125, 82.46488189697266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000258.npy"} +{"epoch": 0.3788546255506608, "step": 259, "batch_size": 64, "mean": 119.85458374023438, "std": 182.16685485839844, "min": -241.02125549316406, "p10": -93.11334915161132, "median": 84.36827087402344, "p90": 359.64826965332037, "max": 542.901611328125, "pos_frac": 0.75, "sample": [215.90353393554688, -16.02271842956543, 147.38174438476562, 196.1188201904297, 7.493999481201172, -102.372802734375, 316.6200866699219, 417.1219482421875, 441.0936584472656, 212.62123107910156, 53.664466857910156, -53.11540985107422, 106.74351501464844, -71.26447296142578, 8.991241455078125, -97.47885131835938, 344.5633850097656, -53.69255065917969, 468.349609375, 184.26528930664062, 112.70797729492188, 151.64508056640625, 83.4429931640625, 55.12179946899414, 290.79119873046875, 20.425392150878906, 64.36463928222656, 50.05314636230469, 542.901611328125, -183.69789123535156, 58.942474365234375, 33.7039794921875, 512.2537841796875, 177.21011352539062, 21.309837341308594, -163.8253631591797, 136.58648681640625, 341.885986328125, -147.14321899414062, 314.84173583984375, -157.12399291992188, 16.30193328857422, 328.98846435546875, -79.46498107910156, 85.29354858398438, 50.647003173828125, 366.11322021484375, 314.9208984375, -241.02125549316406, 163.3788604736328, -38.568756103515625, 295.145751953125, 276.56488037109375, -5.26776123046875, 383.3857421875, -82.92717742919922, 127.05117797851562, 322.31512451171875, 133.12786865234375, -75.13977813720703, 55.55112838745117, 36.61892318725586, 142.22955322265625, 52.065528869628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000259.npy"} +{"epoch": 0.3803230543318649, "step": 260, "batch_size": 64, "mean": 160.49334716796875, "std": 180.99234008789062, "min": -183.06951904296875, "p10": -15.737824249267575, "median": 124.55017852783203, "p90": 389.5504455566407, "max": 974.7369384765625, "pos_frac": 0.875, "sample": [221.57586669921875, -65.94896697998047, 44.772254943847656, -105.85824584960938, 15.137655258178711, 164.6064453125, 118.37757110595703, 168.74925231933594, 370.91033935546875, 46.23704528808594, 67.595703125, 397.5390625, 118.94166564941406, 102.97859191894531, 154.4777069091797, 974.7369384765625, 168.14744567871094, 259.1183776855469, 59.97662353515625, 469.35150146484375, -183.06951904296875, 157.1058349609375, -31.723691940307617, 170.83221435546875, -107.5125503540039, 0.01860809326171875, 153.45745849609375, -17.243301391601562, 245.39187622070312, 114.11532592773438, 120.57954406738281, 354.45184326171875, -12.225044250488281, 233.4676971435547, 132.7375030517578, 340.1143798828125, 364.3739318847656, 157.94671630859375, 119.66291809082031, 99.51687622070312, 95.53007507324219, 66.94842529296875, 336.13671875, 128.52081298828125, 150.28768920898438, 25.552608489990234, 31.893142700195312, 84.1864013671875, 176.69412231445312, 189.85581970214844, 209.00726318359375, 175.05203247070312, 106.86780548095703, 416.13470458984375, 27.903358459472656, -43.543312072753906, 83.56707763671875, 427.08599853515625, 21.27574920654297, 574.3060913085938, 91.63526153564453, 411.0841064453125, 40.955291748046875, 281.215576171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000260.npy"} +{"epoch": 0.38179148311306904, "step": 261, "batch_size": 64, "mean": 150.63623046875, "std": 205.22320556640625, "min": -318.5157775878906, "p10": -74.42410011291503, "median": 126.93074035644531, "p90": 386.44739685058596, "max": 907.6098022460938, "pos_frac": 0.796875, "sample": [389.7518005371094, 373.46893310546875, 37.588050842285156, -230.4421844482422, 120.94325256347656, 907.6098022460938, 35.820587158203125, 378.73712158203125, -35.26787185668945, 208.20025634765625, 7.805585861206055, 73.04749298095703, -89.84716796875, 408.96514892578125, 112.67143249511719, 215.46548461914062, 224.88180541992188, 344.6830749511719, 360.08856201171875, -52.06806945800781, 91.83706665039062, 81.87136840820312, 132.91822814941406, 203.60403442382812, 191.07815551757812, 315.7879638671875, 502.63714599609375, 15.732696533203125, 301.7832946777344, 229.48512268066406, 65.01687622070312, 29.45301055908203, -1.398406982421875, 76.62193298339844, 242.6612548828125, 53.81444549560547, 275.400146484375, 372.43011474609375, 263.5489501953125, 135.899658203125, 230.01266479492188, -63.798946380615234, 479.9837341308594, 412.52813720703125, 2.1839046478271484, 371.33685302734375, -170.33273315429688, 206.38841247558594, 3.1110305786132812, 234.10610961914062, 107.2220687866211, 481.90380859375, -25.385459899902344, -318.5157775878906, -188.38253784179688, 115.7668228149414, -168.51885986328125, -15.268495559692383, 169.48782348632812, 191.8844451904297, 57.954124450683594, 81.55529022216797, 152.1884765625, -78.97773742675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000261.npy"} +{"epoch": 0.3832599118942731, "step": 262, "batch_size": 64, "mean": 123.85541534423828, "std": 165.8516845703125, "min": -250.5835418701172, "p10": -75.60032844543454, "median": 129.5196075439453, "p90": 291.31186828613284, "max": 753.728759765625, "pos_frac": 0.8125, "sample": [73.68299865722656, 399.4861755371094, 113.0597915649414, 211.438720703125, -250.5835418701172, 328.1202087402344, 461.043212890625, 121.47740173339844, 149.08389282226562, 259.26605224609375, -152.253662109375, -216.84286499023438, 138.60568237304688, 753.728759765625, 181.99139404296875, -172.54367065429688, 164.8245849609375, 261.5806884765625, -15.521780014038086, 179.65713500976562, 160.39715576171875, 53.08009338378906, -86.61643981933594, 292.7684631347656, 134.56454467773438, 5.3112640380859375, 40.13942337036133, 386.54876708984375, 116.20755004882812, 287.91314697265625, 51.049560546875, 87.28165435791016, 92.46224975585938, -49.89606857299805, 80.71195983886719, 60.57147979736328, 164.94964599609375, -31.903762817382812, 162.67242431640625, 152.5342559814453, 144.58087158203125, 134.18292236328125, 9.739280700683594, 236.28424072265625, 243.1166534423828, 75.94857025146484, 258.69775390625, 109.50072479248047, -99.17449188232422, -48.70580291748047, 65.91395568847656, 36.389495849609375, 45.77323913574219, 298.54449462890625, -161.333984375, -47.817100524902344, 124.85629272460938, 155.23110961914062, 234.92066955566406, 246.96243286132812, 180.245849609375, 41.10063171386719, 256.96466064453125, 234.77572631835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000262.npy"} +{"epoch": 0.38472834067547723, "step": 263, "batch_size": 64, "mean": 177.93978881835938, "std": 210.72232055664062, "min": -329.54010009765625, "p10": -76.6076217651367, "median": 150.9087905883789, "p90": 466.18482360839863, "max": 729.54541015625, "pos_frac": 0.796875, "sample": [337.8810119628906, -329.54010009765625, 148.63816833496094, 166.07115173339844, 360.5029296875, 57.82085418701172, 323.98834228515625, 49.231414794921875, 51.84867858886719, 93.0715560913086, -233.37249755859375, 170.53370666503906, 70.80589294433594, 307.10870361328125, 325.6640930175781, 184.8760528564453, 18.782899856567383, 355.962158203125, 119.72225189208984, 145.77752685546875, -7.174386978149414, -83.45944213867188, 47.304298400878906, 294.21197509765625, -22.047626495361328, -99.9107666015625, 324.4202880859375, 153.17941284179688, -27.07482147216797, 102.11888122558594, 43.07838439941406, 24.000898361206055, 356.0922546386719, 57.11970520019531, -60.62004089355469, 502.68499755859375, 291.2268371582031, 105.85845947265625, 399.7595520019531, 485.266845703125, 108.2409439086914, 503.64312744140625, 104.11259460449219, 269.4698486328125, 340.4289245605469, 158.63278198242188, 484.63818359375, -86.44789123535156, 242.1600341796875, 264.6595458984375, 389.11785888671875, 423.1269836425781, 688.46533203125, -178.3621063232422, -9.903366088867188, 565.5716552734375, -33.231361389160156, -93.4128646850586, 192.94700622558594, 205.67388916015625, 132.0683135986328, 729.54541015625, 137.89724731445312, 237.69338989257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000263.npy"} +{"epoch": 0.38619676945668135, "step": 264, "batch_size": 64, "mean": 151.44468688964844, "std": 212.54832458496094, "min": -258.09716796875, "p10": -79.34185409545896, "median": 127.4139633178711, "p90": 453.01604309082035, "max": 655.5096435546875, "pos_frac": 0.765625, "sample": [217.0185546875, 299.7891540527344, 47.3005256652832, 655.5096435546875, 87.16000366210938, -258.09716796875, -185.07168579101562, 406.9818420410156, 88.10015869140625, 482.40252685546875, -52.947914123535156, 31.870641708374023, 102.93650817871094, -141.36294555664062, 160.1658935546875, 194.70571899414062, 283.09588623046875, 441.33233642578125, 417.9170227050781, 308.87359619140625, 67.8692626953125, 247.13705444335938, 287.3631591796875, 211.33937072753906, 55.69788360595703, 34.74397277832031, -50.66510772705078, -14.617782592773438, -59.167747497558594, 214.3762664794922, -87.98789978027344, -4.420036315917969, 51.58991241455078, 616.7243041992188, 2.8388671875, 458.0233459472656, 140.23928833007812, 244.00631713867188, 21.177474975585938, 236.2073516845703, 58.134796142578125, 229.44920349121094, 120.32308959960938, 143.99635314941406, -14.159141540527344, 633.4546508789062, 402.97283935546875, -231.30810546875, 231.25143432617188, -166.74676513671875, 184.24562072753906, 11.416461944580078, 240.07138061523438, 501.03338623046875, -37.57939147949219, 134.5048370361328, 21.424537658691406, 58.745880126953125, -234.9173126220703, 509.2015075683594, 322.4529113769531, 262.3826904296875, -10.165939331054688, 62.11979675292969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000264.npy"} +{"epoch": 0.3876651982378855, "step": 265, "batch_size": 64, "mean": 147.303955078125, "std": 244.25196838378906, "min": -500.40045166015625, "p10": -98.5947494506836, "median": 124.05256652832031, "p90": 437.1080841064454, "max": 857.8463134765625, "pos_frac": 0.75, "sample": [107.44039154052734, 421.0201110839844, 327.9361572265625, 240.52513122558594, 68.41094207763672, 170.0934600830078, 348.8445739746094, 20.21955108642578, 640.1756591796875, 112.98931884765625, 250.64035034179688, -328.6501770019531, 23.78281021118164, -39.28580093383789, -146.91778564453125, 43.442081451416016, 378.94854736328125, -119.73912048339844, 96.3305892944336, -25.667999267578125, 268.94708251953125, 857.8463134765625, -500.40045166015625, 527.8598022460938, -92.42654418945312, 135.11581420898438, 212.998779296875, 478.8609313964844, -77.10874938964844, 737.4027709960938, -246.4737091064453, 300.0284729003906, 62.54441833496094, -65.41473388671875, 93.75970458984375, -58.2459716796875, 20.037017822265625, -49.95318603515625, 402.91790771484375, -101.23826599121094, 447.0091247558594, 95.2528076171875, 69.77791595458984, 406.4767150878906, -41.56867218017578, 180.7360382080078, 20.386260986328125, 170.40322875976562, -33.34864807128906, 286.842041015625, 106.11649322509766, 144.911865234375, 143.124755859375, 413.8201904296875, 153.84800720214844, 149.84519958496094, 232.1758270263672, 137.35928344726562, 50.23592758178711, -335.0928039550781, 444.0029296875, 93.92572021484375, 363.64990234375, 229.9663543701172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000265.npy"} +{"epoch": 0.3891336270190896, "step": 266, "batch_size": 64, "mean": 119.01298522949219, "std": 200.33860778808594, "min": -315.40142822265625, "p10": -81.63846359252928, "median": 77.05123519897461, "p90": 413.24994201660166, "max": 706.061767578125, "pos_frac": 0.71875, "sample": [-13.460708618164062, 552.5343017578125, 41.643470764160156, 124.38481903076172, 235.3240509033203, 139.9695587158203, -34.89381408691406, 272.4019470214844, 83.00564575195312, 456.90277099609375, 149.26609802246094, 305.59100341796875, -60.73219299316406, 262.6594543457031, -118.34245300292969, 53.282676696777344, 280.2220764160156, 91.52828979492188, 169.48081970214844, 56.774620056152344, 112.68818664550781, -273.80401611328125, -50.211952209472656, 373.49078369140625, 180.9661102294922, 391.3665466308594, 143.6636199951172, 6.494712829589844, 57.232032775878906, 422.6285400390625, 476.54742431640625, 276.77960205078125, 706.061767578125, 468.1796875, 226.86476135253906, 64.28585815429688, 72.78800964355469, 234.85845947265625, 39.252586364746094, 69.5346908569336, 72.15060424804688, 143.60772705078125, 64.0706558227539, 4.269201278686523, 172.52389526367188, -13.884075164794922, -315.40142822265625, -226.80230712890625, 268.7835693359375, 33.13536834716797, -156.66940307617188, -89.09351348876953, -40.20274353027344, -64.24334716796875, -60.643150329589844, -44.79096984863281, -22.616052627563477, -35.70165252685547, -170.14581298828125, 459.8547668457031, 231.2567138671875, 264.2103576660156, 14.637947082519531, 81.31446075439453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000266.npy"} +{"epoch": 0.39060205580029367, "step": 267, "batch_size": 64, "mean": 146.3773193359375, "std": 207.8628692626953, "min": -481.7513732910156, "p10": -42.570927810668934, "median": 122.60110092163086, "p90": 363.6195617675782, "max": 719.3955688476562, "pos_frac": 0.796875, "sample": [83.2144546508789, 172.93405151367188, 347.48089599609375, -27.449329376220703, 92.20390319824219, -19.44209098815918, 287.8208312988281, 185.9940643310547, 24.766693115234375, 21.205829620361328, -24.768585205078125, -52.566009521484375, 17.481536865234375, 370.5361328125, 174.78262329101562, 139.86883544921875, -54.38231658935547, 240.26669311523438, 494.00927734375, 719.3955688476562, 89.46290588378906, 97.95630645751953, 125.92668914794922, 22.145599365234375, -146.19618225097656, 86.83805084228516, 82.03256225585938, 105.90316009521484, 301.93902587890625, 216.8135223388672, 566.5145263671875, 406.6517639160156, 708.36376953125, 67.59872436523438, -21.30740737915039, 252.0589141845703, 15.867748260498047, -49.051612854003906, 12.327434539794922, 157.28225708007812, 282.8430480957031, 20.088333129882812, -6.128673553466797, 604.40283203125, -481.7513732910156, -4.560874938964844, -379.473388671875, 222.06768798828125, 199.68911743164062, 119.2755126953125, 231.55409240722656, 226.26754760742188, 98.66888427734375, 304.9640808105469, 111.98334503173828, 149.05975341796875, -49.95533752441406, 310.4744567871094, 55.52509307861328, 127.1763916015625, 341.3841247558594, 170.39080810546875, 294.7620544433594, 126.96024322509766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000267.npy"} +{"epoch": 0.3920704845814978, "step": 268, "batch_size": 64, "mean": 141.71861267089844, "std": 205.74954223632812, "min": -337.403564453125, "p10": -52.47172927856444, "median": 133.72422790527344, "p90": 354.51913452148443, "max": 795.4550170898438, "pos_frac": 0.78125, "sample": [29.634376525878906, 450.04412841796875, 89.67896270751953, -40.026344299316406, 54.71220397949219, 70.22315216064453, 149.6796875, 75.1435546875, 361.946533203125, 112.52493286132812, 140.69308471679688, -316.90374755859375, 174.39102172851562, 45.646331787109375, 254.54571533203125, -222.1490478515625, 134.83511352539062, 62.55049133300781, -200.3541259765625, 132.61334228515625, 173.05035400390625, 795.4550170898438, 65.46925354003906, 156.70376586914062, 69.86326599121094, 257.22674560546875, 485.60003662109375, 542.4352416992188, -37.457210540771484, 337.18853759765625, -81.26432037353516, 68.42353820800781, -57.80546569824219, -16.76205825805664, 603.071533203125, 157.38548278808594, 207.33897399902344, 156.14956665039062, 189.4619140625, 212.75392150878906, 122.4189453125, 279.04107666015625, 8.438545227050781, -72.93110656738281, 304.580078125, 80.94232177734375, 179.3059539794922, 279.69482421875, -8.246826171875, 245.4617919921875, 196.4532928466797, 136.2325897216797, 713.39892578125, 214.44479370117188, 86.42208862304688, 20.949783325195312, 131.13369750976562, -0.00829315185546875, 324.88916015625, -14.45936393737793, 196.82333374023438, -337.403564453125, -36.17192459106445, 174.86407470703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000268.npy"} +{"epoch": 0.3935389133627019, "step": 269, "batch_size": 64, "mean": 170.71112060546875, "std": 220.3466033935547, "min": -427.46435546875, "p10": -92.44907760620116, "median": 172.338134765625, "p90": 438.0767791748047, "max": 650.135986328125, "pos_frac": 0.765625, "sample": [167.09507751464844, 282.3896179199219, 140.18853759765625, 316.83148193359375, -76.82140350341797, 240.85964965820312, 244.79632568359375, 177.58119201660156, -143.87570190429688, -47.37700653076172, 307.760986328125, -22.931259155273438, 435.3605041503906, 51.80792236328125, 351.432861328125, 294.10980224609375, 9.384910583496094, 192.5164337158203, -52.00675964355469, -427.46435546875, 272.9085998535156, 30.304996490478516, 204.79226684570312, -178.1083984375, -99.14665222167969, 415.5892333984375, 12.296394348144531, 625.4014892578125, 501.772216796875, 138.01654052734375, 158.968505859375, 44.97149658203125, 447.7701416015625, 290.3632507324219, -10.069992065429688, 336.48480224609375, 427.9581604003906, 87.2504653930664, 242.94326782226562, 23.62255859375, 6.297767639160156, 143.1212615966797, -191.96202087402344, 62.242767333984375, 132.1631317138672, 627.613525390625, 279.14312744140625, 507.7022705078125, 307.4327697753906, -23.135025024414062, 291.4845275878906, 222.76849365234375, 0.81341552734375, 650.135986328125, -137.34449768066406, 436.8512878417969, -28.97845458984375, 76.00121307373047, -13.883769989013672, 256.5050354003906, 416.63763427734375, 438.60198974609375, -158.0045928955078, 207.575927734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000269.npy"} +{"epoch": 0.39500734214390604, "step": 270, "batch_size": 64, "mean": 128.4014434814453, "std": 202.8406982421875, "min": -426.0492248535156, "p10": -67.9484634399414, "median": 118.8958854675293, "p90": 359.88875122070317, "max": 747.9391479492188, "pos_frac": 0.78125, "sample": [243.17919921875, 344.3734130859375, 447.70660400390625, -360.2355041503906, -426.0492248535156, -14.938217163085938, 49.188194274902344, 271.1808776855469, 106.3294448852539, 293.6099548339844, 340.10626220703125, -50.147308349609375, 140.16802978515625, 230.2902069091797, 17.322967529296875, 58.026302337646484, 94.7779769897461, 171.79525756835938, -155.63619995117188, 218.16812133789062, 263.7419738769531, 157.167236328125, -46.930931091308594, 201.40170288085938, 440.3800048828125, 272.7583923339844, 17.358747482299805, 121.24884796142578, 209.12649536132812, 111.52176666259766, -234.2527618408203, 363.2532958984375, 747.9391479492188, -82.77592468261719, -62.666900634765625, 277.60382080078125, 297.9437255859375, 39.96424102783203, 105.63774108886719, -2.2141761779785156, 589.0758056640625, -218.28640747070312, 177.46359252929688, 17.91799545288086, 137.28689575195312, 116.54292297363281, 30.600967407226562, 227.85000610351562, -70.21199035644531, 8.089527130126953, 224.72027587890625, 352.03814697265625, 44.65458679199219, 14.380277633666992, 30.439498901367188, 155.20492553710938, -24.764892578125, 21.50958251953125, -22.468360900878906, 364.55755615234375, 152.64004516601562, 153.735595703125, 65.45915222167969, 451.8331298828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000270.npy"} +{"epoch": 0.3964757709251101, "step": 271, "batch_size": 64, "mean": 162.08523559570312, "std": 193.21275329589844, "min": -214.08688354492188, "p10": -35.381995773315424, "median": 112.89587020874023, "p90": 429.64421386718766, "max": 752.1815795898438, "pos_frac": 0.78125, "sample": [752.1815795898438, 353.1717834472656, 93.98112487792969, -37.833980560302734, 273.73486328125, -4.175617218017578, 367.0689392089844, 388.5934143066406, 25.06244659423828, -73.05191802978516, 329.05731201171875, -138.50076293945312, 445.278564453125, -214.08688354492188, 91.10875701904297, -27.048606872558594, 89.07608032226562, -26.38372039794922, 128.82200622558594, -14.946029663085938, -87.04674530029297, -5.1058502197265625, 463.4547119140625, 352.5831298828125, 528.44775390625, 385.062744140625, 81.79488372802734, -70.58612060546875, 56.268287658691406, 143.69598388671875, 1.2852516174316406, 251.04383850097656, 173.38442993164062, -29.66069793701172, -63.539794921875, 141.7981719970703, 87.21945190429688, 0.9661865234375, 393.1640625, 101.0010986328125, -18.823265075683594, 527.8269653320312, 54.90290069580078, 7.665525436401367, 122.49998474121094, 17.588207244873047, 110.29739379882812, 160.07810974121094, 338.0165100097656, 217.47634887695312, 496.7137451171875, 303.513671875, 115.49434661865234, 27.197341918945312, 498.78082275390625, 48.08020782470703, 232.0469970703125, 335.8900451660156, 55.17413330078125, 200.8538360595703, 192.78677368164062, 283.8021240234375, 95.92916870117188, 243.3231658935547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000271.npy"} +{"epoch": 0.39794419970631423, "step": 272, "batch_size": 64, "mean": 200.57228088378906, "std": 191.79505920410156, "min": -204.8065643310547, "p10": -79.17268676757811, "median": 218.12490844726562, "p90": 465.3450103759766, "max": 596.7237548828125, "pos_frac": 0.828125, "sample": [279.579345703125, 207.20953369140625, -65.84748840332031, -94.12913513183594, -32.70359802246094, 383.0030517578125, 245.38221740722656, 444.4833984375, 467.73541259765625, 347.11065673828125, 310.64422607421875, 359.0860900878906, -99.33562469482422, 180.34768676757812, -96.81022644042969, 72.43209838867188, 104.74385833740234, 190.49586486816406, 218.27273559570312, 137.119873046875, -98.98223876953125, 247.0872802734375, 338.79193115234375, 221.5269775390625, 291.5406494140625, 109.01485443115234, 1.6570472717285156, 235.02120971679688, 126.8837661743164, 583.93310546875, 5.7223052978515625, 240.41851806640625, 216.24496459960938, 173.53726196289062, 90.70362854003906, 466.7221374511719, 494.1721496582031, -204.8065643310547, 208.73822021484375, 286.63812255859375, 596.7237548828125, 32.23876953125, 524.962890625, 154.18841552734375, 312.8409423828125, -72.84195709228516, 324.6572265625, 61.065433502197266, 239.79685974121094, 429.4060974121094, -42.22975158691406, 217.97708129882812, 9.806259155273438, -107.86811828613281, -81.88585662841797, 299.522705078125, 296.0207214355469, 54.44934844970703, 423.2080078125, 145.30177307128906, 222.37808227539062, 462.1317138671875, 512.7192993164062, 228.67050170898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000272.npy"} +{"epoch": 0.39941262848751835, "step": 273, "batch_size": 64, "mean": 186.65919494628906, "std": 204.59596252441406, "min": -293.6380615234375, "p10": -68.78959274291991, "median": 187.54432678222656, "p90": 427.44396667480476, "max": 675.2109375, "pos_frac": 0.8125, "sample": [233.68927001953125, 331.45361328125, 119.91262817382812, 164.52847290039062, 529.181396484375, 477.61688232421875, 186.43801879882812, -98.79401397705078, 641.2687377929688, 15.169723510742188, 378.989990234375, 262.7405090332031, -76.48202514648438, 64.62139129638672, -34.8426513671875, 122.7324447631836, 17.518505096435547, 188.650634765625, 201.23297119140625, 360.83447265625, 190.65432739257812, 53.85853576660156, 92.56085205078125, 138.34161376953125, 115.55021667480469, 28.421791076660156, 382.06488037109375, 210.8140411376953, 164.63467407226562, 208.0858154296875, 329.300048828125, -17.540977478027344, 345.1745910644531, 240.243408203125, -27.702150344848633, 675.2109375, 544.4241333007812, 234.2504119873047, 191.58750915527344, 298.79180908203125, -200.44851684570312, -148.25064086914062, 272.15069580078125, -50.84058380126953, 260.03326416015625, 281.06512451171875, 409.81097412109375, 62.32539367675781, 130.3684844970703, -293.6380615234375, 629.4280395507812, 169.4090576171875, 72.18965148925781, -96.5645751953125, -24.9503173828125, 344.6962585449219, 38.87599182128906, 119.50782775878906, 326.1328125, -111.78349304199219, 332.2818908691406, 92.19641876220703, 413.6501770019531, 433.3555908203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000273.npy"} +{"epoch": 0.4008810572687225, "step": 274, "batch_size": 64, "mean": 144.20184326171875, "std": 197.9510040283203, "min": -245.71681213378906, "p10": -125.45073089599606, "median": 124.75010299682617, "p90": 445.13988342285165, "max": 581.8504638671875, "pos_frac": 0.765625, "sample": [46.21021270751953, 102.07820129394531, 33.59967803955078, -162.25074768066406, 230.67123413085938, 165.17550659179688, 228.80592346191406, -46.735389709472656, 129.83641052246094, -6.6439666748046875, -142.5484619140625, 471.7013854980469, 364.91900634765625, 257.1485290527344, 262.6143493652344, 182.2482452392578, 19.731712341308594, 482.00067138671875, -245.71681213378906, 322.650146484375, -148.49876403808594, 141.4463348388672, -25.905548095703125, 50.59225082397461, 341.5885009765625, 303.9765319824219, 455.8772888183594, -141.72747802734375, 104.1572494506836, 581.8504638671875, -42.48868179321289, 129.90542602539062, 308.15484619140625, 163.78366088867188, -5.856939315795898, 96.51191711425781, 463.5155944824219, -19.464385986328125, 124.84315490722656, 82.62257385253906, 297.65850830078125, 8.486007690429688, 101.54905700683594, 272.8818359375, 420.0859375, -97.86418914794922, 116.98292541503906, 30.64412498474121, 198.20330810546875, 552.159912109375, 278.85467529296875, 95.49943542480469, 0.7112197875976562, 147.15228271484375, -241.03421020507812, 326.97509765625, -137.20565795898438, -98.02256774902344, 551.7552490234375, 188.87478637695312, 109.34832763671875, 313.304931640625, 6.879669189453125, 124.65705108642578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000274.npy"} +{"epoch": 0.4023494860499266, "step": 275, "batch_size": 64, "mean": 150.89297485351562, "std": 190.32464599609375, "min": -261.3113098144531, "p10": -76.86675262451172, "median": 140.12873077392578, "p90": 405.2445068359376, "max": 550.8244018554688, "pos_frac": 0.78125, "sample": [213.5814666748047, 78.92457580566406, 301.6044616699219, 4.989635467529297, -24.1804141998291, -96.3594741821289, 384.1814880371094, 123.08189392089844, 505.6009826660156, -77.86078643798828, 245.90695190429688, 381.8282775878906, 550.8244018554688, 80.2236557006836, 149.26675415039062, 285.90338134765625, -42.59634780883789, 23.724853515625, 124.91643524169922, 194.7406463623047, -125.8725357055664, 198.38229370117188, 430.7664489746094, 27.301551818847656, 32.985877990722656, -222.7252197265625, 313.74884033203125, 168.78741455078125, 165.6300048828125, 74.25231170654297, -85.38184356689453, 39.442626953125, 314.63671875, 210.81695556640625, -16.868257522583008, 41.714935302734375, -203.312744140625, -48.030670166015625, -43.21173095703125, 45.37202453613281, 72.75827026367188, 322.1159362792969, -261.3113098144531, 177.09214782714844, 237.6337127685547, 414.2715148925781, 207.38064575195312, 545.9642944335938, 219.41473388671875, 485.6309814453125, 418.70941162109375, -35.74043273925781, 263.61761474609375, 377.27215576171875, 63.181068420410156, 322.6736145019531, 130.99070739746094, 338.3542785644531, -74.5473403930664, 224.7655487060547, 52.70476531982422, 338.45849609375, 66.508544921875, 22.51312255859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000275.npy"} +{"epoch": 0.40381791483113066, "step": 276, "batch_size": 64, "mean": 155.49386596679688, "std": 209.3193359375, "min": -262.2996520996094, "p10": -85.92391204833982, "median": 159.25896453857422, "p90": 427.3069091796876, "max": 849.3602905273438, "pos_frac": 0.78125, "sample": [106.3616714477539, 849.3602905273438, 172.19569396972656, 433.00848388671875, 489.64410400390625, -17.188745498657227, -15.87994384765625, 577.4456787109375, 35.36396789550781, 151.9250030517578, 140.02114868164062, 17.882850646972656, -27.574462890625, 248.58419799804688, 296.7237243652344, 265.1712341308594, 204.5966796875, -262.2996520996094, 138.58119201660156, 9.632312774658203, 36.9217643737793, 298.47955322265625, 206.85983276367188, 317.2898254394531, -101.18980407714844, -236.6690673828125, -22.946231842041016, 85.67909240722656, 212.27212524414062, 342.0636901855469, -50.303497314453125, 182.51416015625, 41.17894744873047, 166.59292602539062, 335.25604248046875, 281.432373046875, -171.9423828125, -212.3850555419922, -1.3281536102294922, 27.29153060913086, -153.3470916748047, 21.91998291015625, 58.08115768432617, 466.0732727050781, 83.75638580322266, 170.2998046875, 234.2605438232422, 213.94187927246094, 298.6978454589844, 54.28278350830078, 476.96234130859375, -19.135101318359375, 173.22848510742188, 279.2576904296875, 63.52001953125, 242.62684631347656, 213.734619140625, 167.8209991455078, 376.28411865234375, 88.92752075195312, -145.65280151367188, 583.8078002929688, 37.631874084472656, 414.00323486328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000276.npy"} +{"epoch": 0.4052863436123348, "step": 277, "batch_size": 64, "mean": 154.2389678955078, "std": 205.53924560546875, "min": -315.504638671875, "p10": -107.87050552368164, "median": 129.42224884033203, "p90": 439.11878967285156, "max": 564.7153930664062, "pos_frac": 0.765625, "sample": [8.147502899169922, 324.9115295410156, -153.29966735839844, 160.988525390625, 21.853084564208984, 295.19757080078125, 380.5067138671875, 157.16744995117188, -107.20075988769531, 216.6959228515625, 223.244140625, -36.44568634033203, 382.24853515625, -27.94281578063965, 204.91932678222656, 382.364013671875, 83.14856719970703, 115.60205078125, 206.464111328125, 308.6966552734375, -104.85906219482422, 372.34600830078125, 58.36033630371094, 70.3522720336914, -208.07720947265625, -108.15753936767578, 509.39459228515625, -11.37322998046875, -35.488128662109375, 123.98922729492188, -315.504638671875, 549.0494995117188, 143.18478393554688, 313.11871337890625, -195.0247802734375, -155.1966552734375, 564.7153930664062, 266.0943298339844, 131.12225341796875, -3.18121337890625, 239.61358642578125, 27.412811279296875, 503.5347900390625, 30.102436065673828, 12.136611938476562, 120.1995620727539, 185.01104736328125, 364.05596923828125, 436.58489990234375, 476.12542724609375, -158.43637084960938, 285.9549865722656, 76.17867279052734, 76.7932357788086, 12.797027587890625, -6.700557708740234, 235.8986358642578, 126.2231216430664, 440.2047424316406, 465.33599853515625, 127.72224426269531, 172.89846801757812, 103.98303985595703, 405.5314636230469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000277.npy"} +{"epoch": 0.4067547723935389, "step": 278, "batch_size": 64, "mean": 181.261474609375, "std": 205.91200256347656, "min": -151.93533325195312, "p10": -51.46418380737304, "median": 145.36105346679688, "p90": 515.4886749267579, "max": 696.2789916992188, "pos_frac": 0.78125, "sample": [-95.39935302734375, 351.68353271484375, -78.10314178466797, 75.90731811523438, 311.6964416503906, 339.11767578125, 229.61260986328125, 84.62434387207031, 175.31277465820312, 107.6679458618164, 259.1646423339844, 186.9251708984375, 31.36456871032715, -77.23001098632812, 158.12283325195312, 271.6478271484375, -7.4202728271484375, 323.9314880371094, 504.4234924316406, 18.504478454589844, 15.87635612487793, 641.213623046875, 415.013916015625, 204.92608642578125, 272.0005187988281, 293.6873779296875, 106.32926177978516, -24.382667541503906, 179.74386596679688, 525.1873168945312, 463.4955749511719, -58.33589172363281, 105.706298828125, 34.3826904296875, 558.7152099609375, 520.2308959960938, 34.95287322998047, -40.23834228515625, 82.00170135498047, 192.6048583984375, 83.18408966064453, 429.78021240234375, 162.6156463623047, 68.86746215820312, 41.23419189453125, 529.618408203125, 132.59927368164062, 245.9491729736328, -42.073368072509766, 596.4441528320312, 172.901123046875, 132.0432586669922, 40.40484619140625, -2.3398666381835938, 295.5818786621094, 285.08648681640625, -36.88660430908203, -41.726158142089844, 696.2789916992188, -151.93533325195312, 348.565673828125, 109.49154663085938, -134.12869262695312, -55.48881912231445], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000278.npy"} +{"epoch": 0.40822320117474303, "step": 279, "batch_size": 64, "mean": 182.81570434570312, "std": 236.7415771484375, "min": -355.23931884765625, "p10": -55.30280227661132, "median": 157.2572479248047, "p90": 476.8917053222657, "max": 912.4232177734375, "pos_frac": 0.75, "sample": [67.31641387939453, 78.19148254394531, 50.25835037231445, -20.85931396484375, -6.801727294921875, -40.315025329589844, 432.99755859375, 482.2302551269531, -13.191230773925781, -32.84462356567383, -14.772651672363281, 272.62335205078125, 189.083984375, 45.626800537109375, 531.2652587890625, 636.654541015625, 55.02265548706055, 173.98541259765625, 301.49810791015625, 226.16519165039062, 685.3805541992188, 100.3661117553711, 28.289892196655273, -303.1739501953125, 50.84107971191406, 98.5708999633789, 1.2982177734375, 183.26763916015625, 140.52908325195312, 543.0747680664062, 112.2452392578125, 67.04854583740234, 237.95968627929688, -22.527196884155273, 251.73956298828125, 422.01177978515625, 232.35513305664062, 912.4232177734375, 464.4350891113281, 314.093017578125, -3.1414031982421875, -61.72613525390625, -108.96891784667969, -355.23931884765625, 184.0442352294922, 443.5284118652344, 77.16900634765625, 261.14654541015625, 363.26214599609375, 136.65040588378906, 456.82073974609375, 313.3385925292969, 82.87198638916016, -136.1490478515625, 504.8700866699219, 223.47848510742188, 375.16162109375, 279.58343505859375, -120.787353515625, -163.52064514160156, 437.831787109375, 345.2977600097656, -12.080631256103516, 242.39976501464844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000279.npy"} +{"epoch": 0.40969162995594716, "step": 280, "batch_size": 64, "mean": 185.09500122070312, "std": 192.14739990234375, "min": -334.43365478515625, "p10": -39.85333023071287, "median": 166.95523071289062, "p90": 439.121517944336, "max": 570.8621215820312, "pos_frac": 0.796875, "sample": [130.9932403564453, 324.10675048828125, 134.06216430664062, 266.43603515625, -23.202957153320312, -46.98920440673828, 356.8194885253906, 448.50299072265625, 498.1693115234375, 46.146820068359375, 228.6993865966797, 21.034278869628906, -19.731189727783203, 349.4490966796875, 392.6232604980469, 387.62109375, 107.60631561279297, 266.36077880859375, 195.54022216796875, 290.5426330566406, 60.116031646728516, 516.7797241210938, 483.7615051269531, 198.6529083251953, 256.2946472167969, 88.93447875976562, 363.40325927734375, 365.8526611328125, -3.046844482421875, 226.83413696289062, 193.03363037109375, 153.42074584960938, 547.5820922851562, 169.31857299804688, -18.615692138671875, 150.01907348632812, -215.4002685546875, 369.81317138671875, 224.53660583496094, 103.32125091552734, 220.125244140625, 108.33348846435547, -128.1560516357422, 570.8621215820312, -79.54238891601562, -334.43365478515625, 126.35749053955078, 417.2314147949219, 132.91561889648438, 155.0498046875, 211.26268005371094, 464.44927978515625, -1.5050220489501953, 142.19290161132812, -3.431640625, -89.80626678466797, 157.40512084960938, 110.66642761230469, 164.59188842773438, 338.2298583984375, 305.44207763671875, -175.16448974609375, 72.55815124511719, 371.043701171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000280.npy"} +{"epoch": 0.4111600587371512, "step": 281, "batch_size": 64, "mean": 166.26089477539062, "std": 193.9694061279297, "min": -642.2554931640625, "p10": -24.963335800170892, "median": 154.92941284179688, "p90": 403.5678527832032, "max": 587.0740966796875, "pos_frac": 0.84375, "sample": [155.85211181640625, 294.0071105957031, 139.1232147216797, 222.53135681152344, 89.46678161621094, 237.72808837890625, 71.09661865234375, 199.10232543945312, 176.7400665283203, 83.8409652709961, 219.484375, 410.6501159667969, 255.70875549316406, 65.7350082397461, 259.4368896484375, -70.77099609375, 191.2861328125, 587.0740966796875, 48.46051025390625, 147.5603485107422, -2.464630126953125, 363.9188232421875, 248.95118713378906, 481.7768249511719, 182.40771484375, 274.7923278808594, 174.51744079589844, 290.37274169921875, -27.248950958251953, 563.6626586914062, 387.0425720214844, 104.24333190917969, 158.48028564453125, 443.7410888671875, 158.42755126953125, -642.2554931640625, 79.25505065917969, 137.89051818847656, 81.00457000732422, 21.972335815429688, 466.425537109375, 32.52830505371094, -19.630233764648438, 306.99285888671875, 152.15408325195312, 259.21502685546875, 34.337127685546875, 113.22566223144531, 541.4160766601562, 111.67078399658203, -181.57275390625, 137.27462768554688, -101.54750061035156, 110.05783081054688, -54.605987548828125, -3.7272567749023438, -49.856964111328125, 376.0059814453125, 222.0377960205078, 74.37508392333984, 154.0067138671875, 320.20550537109375, 1.9391231536865234, 373.16815185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000281.npy"} +{"epoch": 0.41262848751835535, "step": 282, "batch_size": 64, "mean": 129.51254272460938, "std": 217.6875762939453, "min": -207.386962890625, "p10": -112.03567504882812, "median": 112.35026931762695, "p90": 350.2529571533204, "max": 1206.857421875, "pos_frac": 0.75, "sample": [444.9620666503906, 123.72760009765625, 67.196533203125, -207.386962890625, 199.23011779785156, 264.23529052734375, 89.54134368896484, 43.95283508300781, 119.43255615234375, 330.453369140625, 276.34283447265625, 175.8590087890625, 332.7782287597656, 95.1789779663086, -10.532150268554688, 181.86883544921875, 1206.857421875, 146.22918701171875, 203.36492919921875, 117.58807373046875, 154.86871337890625, 192.22015380859375, 29.411922454833984, -129.98342895507812, 549.6328735351562, 357.74212646484375, 107.11246490478516, 149.47047424316406, 152.6894989013672, 504.10394287109375, 266.16424560546875, 183.86907958984375, 323.1851806640625, -32.756500244140625, 26.721080780029297, 88.69642639160156, -173.92955017089844, -107.28592681884766, 71.68185424804688, -138.47567749023438, -70.47174072265625, 54.7691650390625, 141.6608123779297, 198.82864379882812, 261.11614990234375, -14.476646423339844, -71.72738647460938, 376.43048095703125, 207.0191650390625, 118.68472290039062, -47.65361785888672, -26.369400024414062, 24.738754272460938, 51.85971450805664, 99.56944274902344, -207.27999877929688, 314.962158203125, 25.213972091674805, -114.07128143310547, 380.6675109863281, -196.60784912109375, 43.627437591552734, -75.68714141845703, 37.98008728027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000282.npy"} +{"epoch": 0.41409691629955947, "step": 283, "batch_size": 64, "mean": 202.7651824951172, "std": 210.1786651611328, "min": -447.37689208984375, "p10": -41.985166168212885, "median": 194.54068756103516, "p90": 490.7471893310547, "max": 648.0303955078125, "pos_frac": 0.84375, "sample": [-74.6917724609375, -90.3800277709961, 222.78684997558594, 38.98558807373047, 373.65643310546875, 321.3529968261719, -45.0838623046875, 229.76690673828125, 494.7007141113281, 41.55280303955078, 73.1493148803711, 166.87074279785156, -11.071956634521484, 71.03718566894531, 105.21723175048828, 253.32569885253906, -447.37689208984375, 74.00872802734375, 445.55242919921875, -69.44925689697266, 492.8999328613281, 471.24627685546875, 192.99476623535156, 322.62579345703125, 46.88105392456055, 144.48687744140625, -27.43223762512207, 468.86639404296875, 344.60113525390625, 498.81744384765625, 67.812255859375, 295.54364013671875, -154.91307067871094, 196.08660888671875, 453.24627685546875, 56.46897888183594, 115.74539184570312, -92.63877868652344, 84.3802490234375, 8.121885299682617, 540.3379516601562, 196.4084930419922, 312.5926513671875, 219.82659912109375, 303.3106384277344, 219.03781127929688, 95.87090301513672, 184.08847045898438, 427.47137451171875, 304.4902648925781, 596.2421875, 58.915184020996094, 393.4697265625, 143.46807861328125, 69.9686508178711, 275.478515625, 545.83447265625, 228.88514709472656, 360.5632629394531, 648.0303955078125, 485.72412109375, 129.641845703125, 112.31937408447266, -34.75487518310547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000283.npy"} +{"epoch": 0.4155653450807636, "step": 284, "batch_size": 64, "mean": 184.29434204101562, "std": 204.35250854492188, "min": -201.36456298828125, "p10": -40.71529731750488, "median": 152.04788970947266, "p90": 476.21762390136735, "max": 735.5867309570312, "pos_frac": 0.859375, "sample": [-88.96127319335938, 87.88667297363281, 333.7681884765625, 39.43780517578125, 96.36756134033203, 71.55252838134766, 352.7613525390625, 18.476280212402344, 264.3580017089844, 431.5013427734375, 400.13671875, -31.97854995727539, 495.3817443847656, 44.102500915527344, 290.9424743652344, 50.26955032348633, 37.024192810058594, 253.18878173828125, 300.85845947265625, 501.2771911621094, 48.782135009765625, 645.7393188476562, 143.56570434570312, 284.6976318359375, 131.49069213867188, 46.78242492675781, 160.5300750732422, 75.4066162109375, 311.35345458984375, -63.44203186035156, -5.240325927734375, 735.5867309570312, 186.45225524902344, -62.18560028076172, 225.6812744140625, 109.446533203125, -125.35172271728516, 77.10692596435547, 238.5928955078125, 731.4259033203125, 80.24284362792969, 184.68496704101562, 197.1002197265625, 10.197343826293945, -104.8599853515625, -201.36456298828125, 279.1253356933594, 333.4719543457031, 124.82145690917969, 233.77171325683594, 554.7088012695312, 184.1663818359375, 316.2073059082031, 227.9769744873047, 38.46839904785156, 7.362247467041016, -44.459617614746094, 8.434532165527344, 357.78143310546875, 247.01951599121094, 2.7637100219726562, 526.4295654296875, 16.64385223388672, 369.3704833984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000284.npy"} +{"epoch": 0.4170337738619677, "step": 285, "batch_size": 64, "mean": 164.75491333007812, "std": 209.03392028808594, "min": -592.2425537109375, "p10": -63.20351791381835, "median": 147.68311309814453, "p90": 399.98017578125007, "max": 639.3946533203125, "pos_frac": 0.84375, "sample": [387.67010498046875, 239.0552978515625, 45.813140869140625, 405.25592041015625, 143.03347778320312, 125.17694091796875, 140.30612182617188, 59.870399475097656, 140.85638427734375, 639.3946533203125, 317.9105224609375, 186.67454528808594, 362.9379577636719, 132.51171875, 146.8820343017578, 568.49072265625, 20.881317138671875, 140.41702270507812, 112.33486938476562, 259.4191589355469, 478.1498107910156, -284.771484375, 60.403587341308594, 310.0323181152344, 423.4209289550781, -27.80321502685547, -66.85165405273438, 85.43621826171875, 101.56170654296875, 64.85770416259766, 259.70989990234375, 186.68052673339844, -70.62674713134766, 78.50845336914062, 280.7833557128906, 314.4368896484375, 24.30237579345703, -592.2425537109375, 248.69839477539062, 18.901336669921875, 179.22366333007812, 16.466705322265625, 600.4150390625, 274.0744934082031, -54.691200256347656, 323.0060119628906, 283.44775390625, 133.38417053222656, 252.1119384765625, 384.1365661621094, 191.36862182617188, -186.6728973388672, 348.0182189941406, 206.28506469726562, -191.7210235595703, -31.167118072509766, 428.2838134765625, 143.87759399414062, 102.28719329833984, 218.6454315185547, -200.74542236328125, 325.8856201171875, 181.43038940429688, 148.48419189453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000285.npy"} +{"epoch": 0.4185022026431718, "step": 286, "batch_size": 64, "mean": 182.9866485595703, "std": 230.26596069335938, "min": -354.0040588378906, "p10": -96.95835037231444, "median": 167.04224395751953, "p90": 482.9579315185547, "max": 693.0340576171875, "pos_frac": 0.78125, "sample": [58.514793395996094, -69.60631561279297, 455.6512756347656, 138.05557250976562, 179.92828369140625, 170.91552734375, 35.474937438964844, 266.7664794921875, 358.72064208984375, -354.0040588378906, 303.8678283691406, 210.45578002929688, 1.4669647216796875, -80.15059661865234, 480.4163513183594, 235.25216674804688, 609.720458984375, 9.07855224609375, 230.57333374023438, 163.16896057128906, 484.04718017578125, 130.66647338867188, -265.2746887207031, 439.92462158203125, 314.43450927734375, -78.24237060546875, -60.76768112182617, 361.2073974609375, 365.7059020996094, 594.481201171875, 543.7164306640625, -232.61297607421875, 204.69236755371094, 63.336212158203125, -40.01347351074219, 25.511581420898438, 390.33172607421875, 149.1558380126953, 1.077484130859375, -118.69273376464844, 141.9596405029297, -102.58248138427734, 261.0958557128906, -90.28328704833984, 135.93917846679688, 437.65826416015625, 466.14630126953125, 143.41229248046875, -119.34060668945312, 557.9208984375, -61.24219512939453, 253.82321166992188, 86.97102355957031, 511.90155029296875, 151.7255859375, 310.7315979003906, 90.61251831054688, 242.4034423828125, 282.9039306640625, -99.819091796875, 693.0340576171875, 397.7079162597656, 251.21905517578125, 90.29496002197266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000286.npy"} +{"epoch": 0.4199706314243759, "step": 287, "batch_size": 64, "mean": 149.98983764648438, "std": 203.4695587158203, "min": -223.41293334960938, "p10": -90.3440689086914, "median": 116.10505676269531, "p90": 453.5651092529297, "max": 678.56005859375, "pos_frac": 0.78125, "sample": [113.12724304199219, 4.087982177734375, -7.525064468383789, 63.11669158935547, 140.72682189941406, -34.097015380859375, 295.984619140625, 597.2901611328125, 175.60218811035156, 160.3397216796875, 506.4615478515625, 102.72215270996094, 231.17234802246094, 534.6473388671875, -82.76486206054688, -72.73748779296875, 9.456504821777344, 522.0578002929688, 115.238037109375, 342.7716064453125, 206.64932250976562, 6.8149871826171875, 106.39508819580078, -223.41293334960938, 678.56005859375, 98.60246276855469, 56.67778015136719, -13.0010986328125, -93.59230041503906, 55.72173309326172, 167.1700897216797, -50.654762268066406, -166.52899169921875, 150.3236541748047, 76.68928527832031, 205.8684844970703, 67.51338195800781, 272.0635986328125, 118.63172912597656, 345.59515380859375, -16.813051223754883, 283.62750244140625, 168.48873901367188, 93.70301818847656, 51.13099670410156, 127.99880981445312, 116.97207641601562, 23.535263061523438, 452.6417236328125, 435.4449462890625, -147.54798889160156, 54.07244110107422, 215.03355407714844, 36.683067321777344, 453.9608459472656, -217.70816040039062, -142.14039611816406, -104.44457244873047, 262.90771484375, 354.9320373535156, 511.29913330078125, 387.6556091308594, 167.49441528320312, 246.656494140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000287.npy"} +{"epoch": 0.42143906020558003, "step": 288, "batch_size": 64, "mean": 186.38465881347656, "std": 209.69747924804688, "min": -354.3421630859375, "p10": -4.829238891601558, "median": 182.98385620117188, "p90": 425.80621032714845, "max": 743.735595703125, "pos_frac": 0.875, "sample": [192.33926391601562, 177.29824829101562, 20.616031646728516, 211.9011688232422, 346.04107666015625, 417.19970703125, 0.2832164764404297, 743.735595703125, 27.30137825012207, 372.00518798828125, 292.3025207519531, 151.74417114257812, -131.53689575195312, 15.895660400390625, 99.50048828125, 157.7001495361328, 131.55699157714844, 428.1660461425781, 255.5001220703125, 370.5310363769531, 192.74954223632812, 286.2554931640625, 279.5377197265625, -0.55645751953125, 43.9926643371582, 197.66461181640625, 170.32415771484375, 48.960792541503906, 187.725341796875, 67.44999694824219, 206.40374755859375, 224.56478881835938, 604.0128784179688, 135.25070190429688, 420.2999267578125, 320.20880126953125, 209.66282653808594, -6.660430908203125, 200.663330078125, 52.62622833251953, -354.3421630859375, 618.377197265625, 110.62495422363281, 325.34393310546875, 67.77107238769531, 178.24237060546875, 57.00446319580078, 48.36955261230469, 487.50482177734375, 113.38005828857422, 247.07069396972656, 622.0855712890625, 215.52688598632812, -350.03033447265625, 93.17355346679688, 521.8764038085938, 312.04339599609375, 65.24354553222656, 62.54914855957031, 365.9961242675781, -29.74048614501953, 340.92633056640625, -145.01756286621094, -166.57847595214844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000288.npy"} +{"epoch": 0.42290748898678415, "step": 289, "batch_size": 64, "mean": 208.77468872070312, "std": 232.82252502441406, "min": -208.53173828125, "p10": -66.03944244384766, "median": 169.97967529296875, "p90": 546.2722106933595, "max": 698.450439453125, "pos_frac": 0.828125, "sample": [142.39395141601562, -66.19241333007812, 634.2886962890625, 661.1767578125, -158.47933959960938, 606.212646484375, 415.3751220703125, 295.18743896484375, 49.187408447265625, 555.64697265625, 495.89044189453125, 432.77667236328125, 293.4873046875, 524.3977661132812, 346.43133544921875, 307.8905944824219, 21.282882690429688, 698.450439453125, -72.4956283569336, 251.20083618164062, 107.33612060546875, 48.196044921875, 103.04147338867188, -35.162654876708984, -146.054931640625, 30.97652816772461, 296.1220703125, 460.6548767089844, 67.56143188476562, 135.5683135986328, 428.8138122558594, 648.6748046875, 340.09625244140625, -65.68251037597656, 64.03107452392578, 34.16267395019531, 615.0516967773438, 93.41780090332031, -90.37553405761719, 13.929555892944336, 101.63357543945312, 496.6717529296875, -64.05178833007812, 256.0641174316406, 206.43060302734375, 398.33074951171875, 430.16473388671875, -43.7994384765625, 139.6769561767578, -116.22982788085938, 163.54241943359375, 186.0211181640625, 79.03262329101562, 32.956939697265625, 507.96258544921875, 189.116455078125, 29.246328353881836, 36.76421356201172, 176.41693115234375, 193.21221923828125, 256.5273132324219, -208.53173828125, 275.628662109375, 54.323753356933594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000289.npy"} +{"epoch": 0.4243759177679883, "step": 290, "batch_size": 64, "mean": 210.26165771484375, "std": 259.14361572265625, "min": -354.5686950683594, "p10": -64.58390502929686, "median": 199.65428924560547, "p90": 576.3289428710938, "max": 889.0296630859375, "pos_frac": 0.8125, "sample": [546.8458251953125, 175.98712158203125, 131.28607177734375, 180.11862182617188, -354.5686950683594, 390.9812927246094, 443.57366943359375, -71.63302612304688, 37.94691467285156, 149.46983337402344, -225.7135009765625, 411.2516174316406, 231.632080078125, 11.810394287109375, 262.3612060546875, 213.95407104492188, 503.8355712890625, -26.911376953125, 247.14260864257812, 17.47586441040039, -1.6381072998046875, 45.09318542480469, -171.32989501953125, 136.92190551757812, 139.37344360351562, -288.1617126464844, 161.24362182617188, 317.9924011230469, 889.0296630859375, 710.5308837890625, 77.50483703613281, 246.4423370361328, 299.670166015625, 635.3829345703125, 243.23187255859375, 69.30680847167969, 278.3103942871094, 96.23927307128906, 264.3662109375, 186.81964111328125, 13.27301025390625, 238.16941833496094, 212.4889373779297, 335.7392578125, 48.87345886230469, 283.606201171875, -45.043418884277344, -48.135955810546875, 306.8215637207031, -127.05136108398438, 688.220703125, 479.00653076171875, 275.4493103027344, 564.1976318359375, 16.364452362060547, 619.0620727539062, 581.528076171875, 328.7440185546875, -45.84621047973633, 59.5133056640625, 71.35125732421875, 854.2652587890625, 239.28567504882812, -106.31367492675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000290.npy"} +{"epoch": 0.42584434654919234, "step": 291, "batch_size": 64, "mean": 194.061279296875, "std": 219.4269561767578, "min": -420.1502990722656, "p10": -48.37581405639648, "median": 209.96350860595703, "p90": 455.67958068847656, "max": 649.0008544921875, "pos_frac": 0.796875, "sample": [207.32362365722656, 229.43077087402344, 2.232818603515625, 56.565589904785156, -15.4212646484375, 296.98126220703125, 633.4031982421875, 291.6384582519531, 444.12188720703125, 275.6263122558594, 61.90205383300781, 407.71319580078125, 306.61474609375, -51.780479431152344, 649.0008544921875, -29.31494140625, 455.14947509765625, 235.3310089111328, -8.973886489868164, 240.6527557373047, 472.382568359375, 5.2590484619140625, 288.8214416503906, 85.61611938476562, 279.88311767578125, 390.17919921875, 151.78814697265625, 200.43016052246094, 399.2900085449219, 31.810985565185547, 340.2141418457031, 98.57459259033203, 338.86212158203125, -207.0968780517578, 68.75027465820312, -145.53488159179688, -34.965843200683594, 53.802024841308594, -420.1502990722656, 582.9412841796875, 348.86700439453125, 455.9067687988281, 398.042724609375, 212.6033935546875, -104.93801879882812, -40.43159484863281, -62.90046691894531, 354.59674072265625, 58.623191833496094, 158.02316284179688, 100.67033386230469, 168.21484375, 349.71221923828125, 312.441650390625, 5.455780029296875, 234.43960571289062, 426.1877746582031, 79.08641052246094, 528.4735717773438, -29.218528747558594, -128.03866577148438, 389.5113525390625, 533.37744140625, 2.160318374633789], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000291.npy"} +{"epoch": 0.42731277533039647, "step": 292, "batch_size": 64, "mean": 198.60556030273438, "std": 245.7430877685547, "min": -579.1243286132812, "p10": -59.19863319396973, "median": 196.73492431640625, "p90": 513.1909454345704, "max": 739.297607421875, "pos_frac": 0.8125, "sample": [242.88531494140625, 54.469425201416016, 16.447097778320312, 194.3402099609375, 319.5202941894531, 199.129638671875, 119.4654541015625, 116.6861801147461, 201.37115478515625, -579.1243286132812, 706.955322265625, 62.75940704345703, 586.7393798828125, 112.17237854003906, 301.0663757324219, -60.14982604980469, -257.3016357421875, 427.959716796875, 60.31616973876953, 292.5909118652344, -23.36040496826172, 737.1025390625, 404.6327819824219, 234.16720581054688, 455.5290222167969, 517.454345703125, 265.5361022949219, 462.5252685546875, 158.79925537109375, 121.17251586914062, -117.08935546875, 265.1585693359375, 9.431099891662598, 493.097900390625, -12.814750671386719, 139.5801544189453, -99.04656982421875, 186.7609100341797, 135.0249786376953, 120.26731872558594, 41.919097900390625, 519.4186401367188, 223.12277221679688, 184.68702697753906, 206.55499267578125, 208.06915283203125, 416.9014587402344, 345.31842041015625, 739.297607421875, 0.5813579559326172, 170.87820434570312, 295.95391845703125, 247.9300994873047, -220.90501403808594, 235.07102966308594, -165.53622436523438, 707.8135375976562, -17.412975311279297, -56.979183197021484, 503.2430114746094, 212.26417541503906, 233.48666381835938, 124.94844055175781, -18.097780227661133], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000292.npy"} +{"epoch": 0.4287812041116006, "step": 293, "batch_size": 64, "mean": 153.2054443359375, "std": 234.1806640625, "min": -246.8534393310547, "p10": -129.60037994384766, "median": 120.40057754516602, "p90": 434.9908477783203, "max": 902.8529052734375, "pos_frac": 0.734375, "sample": [38.926334381103516, 63.489776611328125, 20.157384872436523, 49.14556884765625, 218.4618682861328, 196.0983428955078, 470.8417663574219, 160.6977081298828, 433.45355224609375, 39.78398132324219, 435.6496887207031, -68.68647766113281, 209.98419189453125, 13.621326446533203, 902.8529052734375, -244.63946533203125, 287.3022766113281, 188.881591796875, 52.013397216796875, 280.1263427734375, 358.5346984863281, -152.93399047851562, 245.82876586914062, -99.69157409667969, 43.96185302734375, 99.32958221435547, -75.81095123291016, 134.97787475585938, -5.3986663818359375, 494.910400390625, 418.55877685546875, 299.1649169921875, 314.16937255859375, 423.6194152832031, 312.8778381347656, -130.30714416503906, 46.29908752441406, 158.3191680908203, 190.36282348632812, -14.559133529663086, -62.14611053466797, 78.7432632446289, -1.322998046875, 251.25131225585938, 272.62017822265625, 362.0232849121094, 213.89959716796875, 347.435302734375, 104.17759704589844, -240.04791259765625, 21.82049560546875, -246.8534393310547, -161.76756286621094, 470.06256103515625, 801.6060791015625, 105.82328033447266, -127.95126342773438, 146.6575927734375, 60.45224380493164, -80.17796325683594, -135.02182006835938, 365.37896728515625, 512.983642578125, -64.87327575683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000293.npy"} +{"epoch": 0.4302496328928047, "step": 294, "batch_size": 64, "mean": 171.36770629882812, "std": 229.19219970703125, "min": -266.6346130371094, "p10": -93.35507049560542, "median": 97.51404571533203, "p90": 496.63154907226567, "max": 819.360595703125, "pos_frac": 0.875, "sample": [31.89685821533203, 53.5526123046875, 309.477783203125, 419.390625, 89.23187255859375, 59.4432487487793, -151.50582885742188, 138.23196411132812, 5.8878631591796875, 72.03260040283203, 17.083267211914062, 42.740814208984375, -48.223445892333984, 548.1871948242188, 50.08496856689453, 280.4488830566406, 194.5053253173828, 416.05133056640625, 55.72722244262695, 173.8677520751953, 680.2418823242188, -152.7259063720703, 261.498779296875, 90.93292236328125, 629.8847045898438, 418.5059509277344, 309.38037109375, 161.7010040283203, 178.58425903320312, -112.69719696044922, 240.51168823242188, 292.991943359375, 302.1706237792969, 819.360595703125, -199.66522216796875, -188.5087127685547, 643.6541137695312, 191.2325897216797, 396.3216857910156, 19.911293029785156, -266.6346130371094, -184.7757568359375, 249.634521484375, 307.8531799316406, 550.0950317382812, 108.78414916992188, 14.549625396728516, 160.75904846191406, 489.8067626953125, 10.33995246887207, 348.3963317871094, 365.9394836425781, 18.49138641357422, 92.38139343261719, 44.07456588745117, 38.0899658203125, 42.05561447143555, 61.840370178222656, 18.410289764404297, 102.64669799804688, 499.55645751953125, 76.38351440429688, 28.020782470703125, 49.43408966064453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000294.npy"} +{"epoch": 0.43171806167400884, "step": 295, "batch_size": 64, "mean": 185.25486755371094, "std": 242.42413330078125, "min": -324.50128173828125, "p10": -123.36470413208005, "median": 164.7293930053711, "p90": 516.8847656250001, "max": 863.7744140625, "pos_frac": 0.78125, "sample": [-214.09979248046875, -6.604658126831055, 549.5991821289062, 12.459510803222656, -324.50128173828125, 217.05712890625, -151.57188415527344, 317.0579528808594, 298.1814270019531, -255.56280517578125, 329.272705078125, -31.412429809570312, 136.5672607421875, 387.34637451171875, 409.9358215332031, 281.9281311035156, 254.2909698486328, 281.8444519042969, -102.8331527709961, 87.22118377685547, 330.0186767578125, -14.251302719116211, 143.53604125976562, 364.1827087402344, 8.073448181152344, 149.772216796875, 527.608154296875, -132.1639404296875, 731.0679931640625, 160.43228149414062, 395.5450744628906, 19.304580688476562, 29.335506439208984, -148.18572998046875, 441.5980224609375, 548.2429809570312, 55.711387634277344, -95.25385284423828, -202.5797882080078, 91.61634826660156, 230.7464599609375, 408.3434143066406, 371.10919189453125, 117.84089660644531, -47.15862274169922, 75.65643310546875, 240.46632385253906, 65.54151153564453, 198.96456909179688, 526.6043701171875, 342.2798767089844, 275.73712158203125, 18.374420166015625, 863.7744140625, 494.2056884765625, 398.5539855957031, 144.66317749023438, 184.41824340820312, 192.7453155517578, 169.02650451660156, 49.48278045654297, 104.52290344238281, 612.150634765625, -61.52526092529297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000295.npy"} +{"epoch": 0.4331864904552129, "step": 296, "batch_size": 64, "mean": 134.98883056640625, "std": 265.3215637207031, "min": -634.85205078125, "p10": -226.33961486816403, "median": 143.46627807617188, "p90": 503.3848236083985, "max": 765.5859985351562, "pos_frac": 0.734375, "sample": [117.96688079833984, 88.84159088134766, 153.17578125, 733.58544921875, -119.72787475585938, 548.59765625, -256.2787170410156, -83.26371765136719, -56.40465545654297, -634.85205078125, 102.78250122070312, 200.33270263671875, -306.01422119140625, 205.2355194091797, 164.94639587402344, -262.61236572265625, 153.6178436279297, 326.8265380859375, 609.7467041015625, 411.39361572265625, 220.82647705078125, 243.967041015625, -19.085365295410156, -80.22735595703125, 141.85418701171875, 355.5354309082031, 62.735443115234375, -241.0922393798828, 11.1640625, -191.9168243408203, 597.2313842773438, 15.341384887695312, 275.9630126953125, 51.417137145996094, 202.73019409179688, 508.3046569824219, 168.06129455566406, 98.77444458007812, -473.5124206542969, 242.64520263671875, -280.3735046386719, 298.1536560058594, -45.590797424316406, -60.13043212890625, 509.64984130859375, 94.65758514404297, 116.34862518310547, 194.0883026123047, 145.078369140625, 169.12783813476562, 127.46985626220703, 45.92505645751953, -4.600196838378906, 765.5859985351562, 235.49790954589844, 226.67019653320312, 89.24817657470703, 67.68242645263672, 491.90521240234375, -74.17018127441406, 247.21165466308594, 216.16580200195312, 405.53662109375, 369.5343322753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000296.npy"} +{"epoch": 0.434654919236417, "step": 297, "batch_size": 64, "mean": 162.16717529296875, "std": 196.54209899902344, "min": -253.93936157226562, "p10": -47.10204238891601, "median": 118.12560272216797, "p90": 415.98563537597664, "max": 624.1141357421875, "pos_frac": 0.8125, "sample": [136.47802734375, 241.7744598388672, 387.4781188964844, 517.7882080078125, 86.53721618652344, 11.731744766235352, 333.328857421875, 209.48529052734375, -135.23545837402344, -56.8204231262207, 70.90541076660156, 150.75064086914062, 232.54241943359375, 435.81707763671875, 391.6480712890625, -31.93560218811035, 78.72955322265625, 19.361724853515625, -45.8370361328125, -13.115928649902344, 317.08941650390625, -150.96241760253906, 88.57914733886719, 35.21080017089844, 514.5238037109375, 89.45523071289062, 303.92083740234375, 77.79022216796875, 237.32650756835938, 333.3007507324219, 174.06610107421875, 98.09213256835938, 402.495361328125, 213.04713439941406, 397.7518310546875, 26.01424789428711, 111.06767272949219, 4.046577453613281, 523.9304809570312, 272.2588806152344, 7.9745025634765625, 189.6099853515625, -238.44805908203125, 332.36962890625, 63.86560821533203, -55.13109588623047, 125.18353271484375, 73.411376953125, 600.5977783203125, -45.13459777832031, 134.4708251953125, 421.7671813964844, 244.29718017578125, -15.7222900390625, -253.93936157226562, 313.06195068359375, 2.9939651489257812, 82.15718078613281, 192.69223022460938, 624.1141357421875, 109.81890869140625, 373.5382080078125, -47.644187927246094, 52.37727355957031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000297.npy"} +{"epoch": 0.43612334801762115, "step": 298, "batch_size": 64, "mean": 211.24917602539062, "std": 310.3638916015625, "min": -584.5660400390625, "p10": -115.59244537353516, "median": 172.8660659790039, "p90": 588.031982421875, "max": 1438.5341796875, "pos_frac": 0.71875, "sample": [121.97547149658203, -163.08450317382812, 22.34268569946289, -584.5660400390625, 55.556976318359375, -112.364013671875, -5.237958908081055, 677.4823608398438, 253.33737182617188, 5.67926025390625, 192.82186889648438, 471.197998046875, -61.15437316894531, -106.33824157714844, -164.89486694335938, 64.65401458740234, 206.73556518554688, 131.6046905517578, 113.9217300415039, 256.2752685546875, 827.7232666015625, -8.96270751953125, 380.26715087890625, -164.50656127929688, 643.6704711914062, 274.29815673828125, -116.97605895996094, 559.4232788085938, 590.5628662109375, 721.7755126953125, -122.5556869506836, -122.910888671875, -7.480247497558594, 7.9484100341796875, -33.195213317871094, 582.1265869140625, 677.7754516601562, 133.9435272216797, 429.2286376953125, 252.56939697265625, 8.702634811401367, -10.431549072265625, 1438.5341796875, 256.9423828125, 581.9523315429688, 228.39505004882812, 415.993896484375, 450.03363037109375, -12.333187103271484, 108.8190689086914, 209.75283813476562, 433.902099609375, 329.41033935546875, 137.40103149414062, 550.2103271484375, 161.79893493652344, -13.497255325317383, 370.6382141113281, -69.25361633300781, 227.71047973632812, 183.93319702148438, 234.58351135253906, 60.69371795654297, 355.38427734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000298.npy"} +{"epoch": 0.43759177679882527, "step": 299, "batch_size": 64, "mean": 180.61741638183594, "std": 256.47125244140625, "min": -391.25189208984375, "p10": -99.99365539550782, "median": 137.14777374267578, "p90": 546.297412109375, "max": 855.2594604492188, "pos_frac": 0.765625, "sample": [-112.67792510986328, -260.3570861816406, -391.25189208984375, 373.0341796875, 49.70587158203125, -25.45706558227539, 361.92584228515625, 132.86151123046875, 77.89514923095703, 100.02942657470703, 589.0488891601562, 545.3995971679688, 743.1130981445312, -100.35286712646484, 165.3114471435547, 727.3954467773438, -202.12933349609375, 351.2002258300781, 138.56405639648438, 242.0629425048828, -99.1554946899414, 341.72833251953125, 145.40786743164062, 119.01287841796875, 451.3374328613281, 117.1573486328125, 855.2594604492188, -2.083049774169922, 214.50833129882812, 20.98098373413086, -15.029245376586914, 113.59748840332031, -148.473876953125, 43.82826232910156, 197.3015899658203, 546.6821899414062, 135.7314910888672, 152.72999572753906, -36.645782470703125, 172.6922607421875, 52.782493591308594, 318.1766662597656, 280.95184326171875, -45.51883316040039, 174.67575073242188, -89.16632843017578, -213.6946563720703, 493.62139892578125, 34.151123046875, 777.3037109375, -64.6152114868164, 69.8005142211914, 289.0296936035156, 339.552490234375, 259.4590759277344, 326.63720703125, 97.02763366699219, 611.4343872070312, 254.90582275390625, 132.76853942871094, 28.69207763671875, 350.4756774902344, 10.166877746582031, 239.00738525390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000299.npy"} +{"epoch": 0.4390602055800294, "step": 300, "batch_size": 64, "mean": 212.70004272460938, "std": 294.4864196777344, "min": -427.9167175292969, "p10": -224.82164916992184, "median": 195.10107421875, "p90": 607.0065490722657, "max": 789.9986572265625, "pos_frac": 0.75, "sample": [317.71331787109375, 282.475341796875, 148.66683959960938, 381.001953125, 661.90283203125, -379.9697265625, -194.321533203125, 633.2086791992188, 407.94677734375, 8.230770111083984, -28.364553451538086, 339.2453918457031, 136.95091247558594, 590.181640625, 277.2880554199219, 405.0324401855469, 117.12422180175781, 476.94049072265625, 49.32841491699219, 65.96298217773438, -44.9029541015625, 199.49252319335938, 270.1372985839844, -31.36725616455078, 495.6966247558594, 365.033935546875, -128.12747192382812, 113.55653381347656, 159.0178985595703, 262.64776611328125, 73.65144348144531, 768.527587890625, 271.1935729980469, -427.9167175292969, 540.0347290039062, -308.5068054199219, 564.5454711914062, 355.0837707519531, 474.66552734375, 177.128173828125, 89.64936065673828, 460.5545654296875, 614.2172241210938, 323.8268127441406, -41.81771469116211, 356.2980041503906, -95.37409973144531, -245.06375122070312, 735.3736572265625, 430.9858093261719, 423.36456298828125, -271.01409912109375, 167.93118286132812, -262.9469299316406, -1.2879905700683594, 148.89108276367188, 508.7871398925781, 75.75457763671875, 629.3966064453125, 789.9986572265625, -237.89312744140625, 18.494495391845703, 190.70962524414062, -42.16938781738281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000300.npy"} +{"epoch": 0.44052863436123346, "step": 301, "batch_size": 64, "mean": 214.18943786621094, "std": 274.3539123535156, "min": -210.6104736328125, "p10": -111.19889221191404, "median": 141.83805084228516, "p90": 600.5178649902344, "max": 988.8233032226562, "pos_frac": 0.8125, "sample": [221.3214569091797, 314.9335632324219, -42.352943420410156, 322.530029296875, 425.2071838378906, 305.0572814941406, 818.0784912109375, 112.52989196777344, 533.2681274414062, -11.419233322143555, 902.0341796875, 313.70501708984375, 52.772193908691406, 272.05291748046875, -151.29002380371094, 136.11427307128906, 99.15804290771484, 667.4117431640625, 104.64302062988281, 358.3997802734375, 819.2960205078125, 82.41325378417969, -126.41300201416016, 86.00614929199219, 452.4609069824219, 52.78094482421875, 64.7541732788086, 68.42347717285156, 256.82421875, 379.8088073730469, 267.5745849609375, 267.050048828125, 58.898189544677734, 218.34164428710938, 26.36069679260254, 56.67303466796875, 201.1118927001953, -177.2594757080078, 11.102535247802734, 13.5179443359375, 128.01712036132812, 2.890371322631836, 248.13882446289062, -136.13278198242188, -29.433456420898438, 988.8233032226562, 243.1692657470703, 197.92691040039062, 748.6214599609375, 78.45501708984375, 574.5596313476562, 147.56182861328125, 348.9267578125, 525.3150024414062, 413.285400390625, 55.543270111083984, -119.20379638671875, -192.09642028808594, 611.642822265625, -210.6104736328125, 105.89302062988281, -92.52078247070312, -48.77645492553711, 284.2477722167969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000301.npy"} +{"epoch": 0.4419970631424376, "step": 302, "batch_size": 64, "mean": 176.7947235107422, "std": 236.75076293945312, "min": -241.3558349609375, "p10": -99.24397201538085, "median": 156.7672348022461, "p90": 500.6409790039064, "max": 894.9427490234375, "pos_frac": 0.765625, "sample": [34.67662048339844, 434.0556640625, 709.3601684570312, -56.504573822021484, 212.10430908203125, -38.79127502441406, 227.31239318847656, 8.9288330078125, 177.6006317138672, 32.912384033203125, 217.82406616210938, 29.710678100585938, 76.81060028076172, 78.8180923461914, 187.3287353515625, 109.70345306396484, 359.257080078125, -6.234668731689453, 444.93524169921875, 367.9473571777344, 227.9392852783203, 177.66250610351562, 188.00546264648438, 243.305908203125, -70.72221374511719, -102.62065887451172, 388.12060546875, 555.7684326171875, 93.49993133544922, -230.39207458496094, 219.04489135742188, -44.512550354003906, 146.21588134765625, -91.36503601074219, -108.11598205566406, 524.0819091796875, 894.9427490234375, 519.9550170898438, 167.31858825683594, -113.31072998046875, -28.22320556640625, 455.57489013671875, -241.3558349609375, 249.60552978515625, 424.5025634765625, 53.734458923339844, 117.26487731933594, 655.9307250976562, 227.75070190429688, 313.9814453125, 38.54523468017578, 227.2892608642578, -181.18069458007812, 285.63037109375, 73.27696228027344, 268.28314208984375, -58.09779357910156, 403.4012145996094, 69.73722839355469, 654.9857788085938, -145.87615966796875, 43.10661697387695, 118.34174346923828, 96.07572937011719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000302.npy"} +{"epoch": 0.4434654919236417, "step": 303, "batch_size": 64, "mean": 159.26882934570312, "std": 241.89088439941406, "min": -420.1774597167969, "p10": -134.55278778076172, "median": 129.32454681396484, "p90": 522.2392761230469, "max": 678.1196899414062, "pos_frac": 0.703125, "sample": [304.3101806640625, -4.299350738525391, 28.07733154296875, 606.8524780273438, -136.4636993408203, 377.7178039550781, 438.56817626953125, 678.1196899414062, -18.8658390045166, -105.43869018554688, 365.43096923828125, 202.65615844726562, 350.9358215332031, -99.41043090820312, 90.29549407958984, 166.2948455810547, 93.27116394042969, 13.87755012512207, -80.0738754272461, 415.73809814453125, -420.1774597167969, -58.20649337768555, -130.093994140625, 312.7012023925781, 437.92718505859375, 562.1929931640625, -141.4000701904297, 333.74560546875, -253.21066284179688, 124.52717590332031, -15.977630615234375, 322.874267578125, 623.0018310546875, 224.16908264160156, 449.17822265625, 178.5828857421875, -166.97116088867188, 111.40985107421875, 100.46109771728516, 613.8239135742188, 531.93896484375, 52.8973388671875, 195.20175170898438, 516.2587280273438, 134.12191772460938, 212.99093627929688, 259.33563232421875, 316.1209716796875, 149.43972778320312, -41.349327087402344, 94.87075805664062, -37.568153381347656, 213.3030242919922, 20.888111114501953, 87.14129638671875, 524.8023681640625, -142.6361083984375, -40.85597229003906, 11.272708892822266, 22.683902740478516, 248.7133026123047, -4.5606689453125, -192.16812133789062, 164.21051025390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000303.npy"} +{"epoch": 0.44493392070484583, "step": 304, "batch_size": 64, "mean": 182.71343994140625, "std": 157.5171661376953, "min": -239.23509216308594, "p10": -0.39480590820311523, "median": 180.743896484375, "p90": 385.81872558593756, "max": 623.9747924804688, "pos_frac": 0.890625, "sample": [31.852279663085938, 242.26121520996094, 275.2595520019531, 126.41002655029297, -8.835357666015625, -14.853225708007812, -4.431732177734375, 49.25468444824219, 191.80972290039062, 108.178466796875, 282.8825378417969, 317.01983642578125, 179.46514892578125, 354.47845458984375, 52.47727966308594, 272.9046630859375, 293.7393493652344, 293.64434814453125, 59.78667449951172, 33.83401870727539, 156.55398559570312, 391.99468994140625, 56.95347595214844, 69.74752807617188, 280.7771301269531, 86.783447265625, 279.6969299316406, 79.04954528808594, 371.40814208984375, 108.44329833984375, 128.65768432617188, 125.12240600585938, 623.9747924804688, 132.13449096679688, -50.982643127441406, -239.23509216308594, 229.77069091796875, 9.024688720703125, 29.696823120117188, 248.43539428710938, -58.906097412109375, 462.0069580078125, 106.81968688964844, 66.2377700805664, 453.4449462890625, 159.7874755859375, 12.483871459960938, 70.91622924804688, 224.53521728515625, 342.5035705566406, 258.0605163574219, -66.37077331542969, 223.4501190185547, 421.8305969238281, 393.06329345703125, 55.11381530761719, 266.3936462402344, 182.02264404296875, 407.2601318359375, 363.6565856933594, 333.7994384765625, 194.47640991210938, 322.216552734375, 243.71212768554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000304.npy"} +{"epoch": 0.44640234948604995, "step": 305, "batch_size": 64, "mean": 163.28309631347656, "std": 236.73658752441406, "min": -322.0887451171875, "p10": -84.03926086425778, "median": 129.17516708374023, "p90": 458.1063873291016, "max": 1054.94189453125, "pos_frac": 0.734375, "sample": [400.0547790527344, 277.9049072265625, 113.23655700683594, 206.5447235107422, 189.6446990966797, 697.1471557617188, 259.94573974609375, 28.25581169128418, 257.44757080078125, 500.8643798828125, 171.6455841064453, 82.40293884277344, 79.58565521240234, -61.290008544921875, -93.7889404296875, -14.730842590332031, 317.846923828125, 447.1036682128906, 89.18329620361328, 68.59395599365234, 1054.94189453125, 349.7861328125, 23.234371185302734, 262.57489013671875, 292.44720458984375, -38.9000244140625, -6.780237197875977, 233.37840270996094, 241.67771911621094, -100.92880249023438, -42.29046630859375, 274.33514404296875, -5.0294342041015625, 328.0452575683594, 130.5120849609375, 146.47705078125, 537.5423583984375, 315.80316162109375, -25.659177780151367, 157.39654541015625, 127.83824920654297, -13.798118591308594, 102.48094177246094, 93.48140716552734, 585.8301391601562, 560.9436645507812, 224.85606384277344, -45.26654052734375, -217.92153930664062, 155.03887939453125, -296.3973083496094, 462.82183837890625, 415.17431640625, 65.0090560913086, -95.03427124023438, 31.1485595703125, -115.28324127197266, 43.4166259765625, 273.8667297363281, -322.0887451171875, 49.513160705566406, 256.19500732421875, -55.30207824707031, 17.432437896728516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000305.npy"} +{"epoch": 0.447870778267254, "step": 306, "batch_size": 64, "mean": 158.99729919433594, "std": 197.92811584472656, "min": -350.03228759765625, "p10": -33.15047264099119, "median": 98.2620849609375, "p90": 415.8634063720703, "max": 893.9873046875, "pos_frac": 0.8125, "sample": [19.892963409423828, 15.165760040283203, 87.93447875976562, 1.37677001953125, 34.0355224609375, -65.61146545410156, 341.2049255371094, 8.599723815917969, 96.99920654296875, -8.879554748535156, 237.67100524902344, 449.9154357910156, 180.6060791015625, 412.59490966796875, 69.10653686523438, 85.96031951904297, 9.208011627197266, 469.0981140136719, 71.1196517944336, 50.18580627441406, 87.06719207763672, -59.96953582763672, -8.774032592773438, 432.5631408691406, 240.22970581054688, 327.20587158203125, -10.58890151977539, -6.665290832519531, 417.2641906738281, 105.84349060058594, 81.3819580078125, 144.29220581054688, 260.20928955078125, 326.12799072265625, 50.1400146484375, 145.4049530029297, 99.52496337890625, -89.30815887451172, 22.721569061279297, 327.8367919921875, -350.03228759765625, 324.89013671875, 91.09586334228516, -42.81971740722656, 51.52711486816406, 263.0102233886719, -45.868873596191406, 113.6034927368164, 535.4180908203125, 519.1979370117188, 149.29751586914062, 306.258056640625, 226.9849090576172, -86.02997589111328, 383.02691650390625, -8.140308380126953, 52.890995025634766, 40.22130584716797, 275.15631103515625, 117.81851196289062, 893.9873046875, 210.96884155273438, 315.87017822265625, 378.8025817871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000306.npy"} +{"epoch": 0.44933920704845814, "step": 307, "batch_size": 64, "mean": 162.99288940429688, "std": 214.6385040283203, "min": -428.0800476074219, "p10": -91.87994918823242, "median": 124.09592056274414, "p90": 466.37028503417974, "max": 772.4826049804688, "pos_frac": 0.796875, "sample": [413.80609130859375, 126.21607208251953, 131.78933715820312, 220.00650024414062, 180.41299438476562, 158.3192596435547, -121.93231201171875, 70.82290649414062, -81.13673400878906, 561.29248046875, 118.59085845947266, 13.447822570800781, 72.89329528808594, -150.7296142578125, 450.38433837890625, -428.0800476074219, 496.7172546386719, -167.33912658691406, 232.24411010742188, -81.95464324951172, 239.8687744140625, 277.8047180175781, 87.12657928466797, 169.17788696289062, 481.82122802734375, 253.76553344726562, 38.2673454284668, 121.97576904296875, 77.14434814453125, 498.8141174316406, 85.71408081054688, 473.2214050292969, 70.18732452392578, 100.95750427246094, -96.13365173339844, 107.26669311523438, -76.88546752929688, 47.87867736816406, 433.07763671875, 397.42547607421875, -47.20970916748047, 383.2994689941406, 68.28514862060547, 303.4508361816406, 205.29637145996094, 62.43873596191406, 356.3717041015625, 255.33944702148438, -15.862524032592773, 72.48692321777344, 211.83168029785156, 15.321281433105469, 356.79510498046875, 562.3297119140625, 284.4975280761719, 772.4826049804688, 134.2327880859375, -97.58502197265625, 223.16856384277344, 113.28518676757812, -39.71488952636719, 313.2087707519531, 79.60005187988281, -146.05111694335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000307.npy"} +{"epoch": 0.45080763582966227, "step": 308, "batch_size": 64, "mean": 186.27633666992188, "std": 192.6704864501953, "min": -194.6070098876953, "p10": -50.02389736175535, "median": 170.47956085205078, "p90": 458.3158172607422, "max": 549.701904296875, "pos_frac": 0.8125, "sample": [164.04110717773438, 449.00994873046875, -194.6070098876953, 238.33676147460938, 31.80682373046875, 512.5392456054688, -135.75177001953125, 6.383548736572266, 69.47930908203125, 52.50642395019531, 410.58843994140625, -8.28475570678711, 223.79705810546875, 93.98117065429688, 386.9210205078125, 481.8877868652344, -128.31727600097656, 169.9519805908203, 144.21896362304688, 14.280342102050781, 266.32220458984375, 512.874755859375, 294.87750244140625, 442.03424072265625, -97.81842041015625, 297.73858642578125, 123.55728149414062, 95.77925109863281, 121.18861389160156, -58.70549011230469, 257.11773681640625, 122.3065414428711, 171.00714111328125, 223.1925811767578, 278.2738037109375, 462.3040466308594, 504.25146484375, 214.22268676757812, -117.52455139160156, 66.52027893066406, 78.91175842285156, 353.99652099609375, 329.1504211425781, 151.42117309570312, 185.2294158935547, 549.701904296875, 228.94967651367188, 55.77418518066406, -24.70923614501953, 398.18988037109375, 537.00146484375, 281.2861633300781, 160.17843627929688, -10.627683639526367, -29.766847610473633, 49.56048583984375, 356.1224670410156, 376.5442199707031, 269.7379150390625, -21.96685028076172, 398.2511291503906, 218.84320068359375, -147.54893493652344, 15.165733337402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000308.npy"} +{"epoch": 0.4522760646108664, "step": 309, "batch_size": 64, "mean": 181.35653686523438, "std": 233.16058349609375, "min": -680.8449096679688, "p10": -96.4515380859375, "median": 182.12516021728516, "p90": 464.7881072998048, "max": 746.4739990234375, "pos_frac": 0.796875, "sample": [192.26107788085938, 77.80183410644531, 118.4080581665039, -117.76284790039062, 296.5398254394531, 175.87664794921875, 136.82411193847656, -17.057668685913086, 530.7652587890625, 61.4202880859375, 263.857177734375, 449.2525634765625, 111.5064926147461, 505.0340270996094, 65.66370391845703, 40.71831512451172, -91.60346984863281, 296.260009765625, 471.4461975097656, 299.9236145019531, 256.11517333984375, -680.8449096679688, 228.79901123046875, -128.38389587402344, 79.33206176757812, 160.65890502929688, 248.16744995117188, 292.15606689453125, 17.640975952148438, -3.538389205932617, -119.92316436767578, 632.0823974609375, 39.5240478515625, 1.5698699951171875, 138.67721557617188, 440.8614501953125, 746.4739990234375, 418.5617370605469, 128.04840087890625, -23.820785522460938, -46.68368148803711, 71.3046646118164, 65.22053527832031, 422.87664794921875, 298.905029296875, 299.78265380859375, 505.89349365234375, 410.312744140625, 301.25341796875, -26.351566314697266, 410.2513122558594, -172.3872528076172, 217.28903198242188, 298.8131408691406, -119.6441650390625, 62.367191314697266, 547.2381591796875, 394.05511474609375, -98.52928161621094, 198.87579345703125, 188.37367248535156, 21.18130874633789, 387.6904602050781, 229.43641662597656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000309.npy"} +{"epoch": 0.45374449339207046, "step": 310, "batch_size": 64, "mean": 179.5365447998047, "std": 186.35250854492188, "min": -238.20074462890625, "p10": -43.69660034179687, "median": 169.74169921875, "p90": 411.90269775390635, "max": 634.8976440429688, "pos_frac": 0.828125, "sample": [-46.63601303100586, 170.38568115234375, 340.2966003417969, -50.081932067871094, 92.08216857910156, 313.88372802734375, 58.17323684692383, 41.42438507080078, 33.15321350097656, 212.73452758789062, -198.95877075195312, 393.0118408203125, 459.0502014160156, -36.83797073364258, 76.84284973144531, 312.1357727050781, 212.13417053222656, 246.7114715576172, 46.0037841796875, 200.36639404296875, -13.695331573486328, 110.45121765136719, 352.95086669921875, 84.14419555664062, 165.3050537109375, 540.899658203125, 336.98193359375, 37.573307037353516, 18.039857864379883, 315.494140625, -238.20074462890625, 634.8976440429688, 0.6010856628417969, 150.2023468017578, 553.0578002929688, 184.25811767578125, 257.8673095703125, -22.99543571472168, -61.31641387939453, -28.02918243408203, 71.38333129882812, 159.270263671875, 327.3167724609375, 248.54530334472656, 107.05390167236328, 294.73760986328125, 419.998779296875, -114.42493438720703, 352.5291748046875, 254.34677124023438, 355.3558044433594, -53.03620147705078, 330.2608947753906, 183.6138153076172, 63.37480163574219, 69.56916046142578, 92.63470458984375, 510.080322265625, 495.6155090332031, 236.64344787597656, 77.69091796875, 377.936767578125, 206.38156127929688, 169.09771728515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000310.npy"} +{"epoch": 0.4552129221732746, "step": 311, "batch_size": 64, "mean": 195.67971801757812, "std": 252.9629669189453, "min": -252.5065155029297, "p10": -106.75041503906246, "median": 154.24077606201172, "p90": 567.5149536132814, "max": 886.8372802734375, "pos_frac": 0.734375, "sample": [477.02703857421875, 377.3390808105469, 392.7413024902344, -8.673942565917969, -159.3258819580078, 534.3909912109375, -123.71543884277344, 232.5424346923828, 154.987060546875, 577.2904052734375, 268.8434143066406, 76.685302734375, 339.2510986328125, 716.6187744140625, 318.96527099609375, 115.35037994384766, 496.660888671875, 139.17369079589844, -38.960975646972656, 597.384521484375, 193.6949920654297, 223.5870361328125, 522.223388671875, 236.47557067871094, 50.96953582763672, 164.0123748779297, -51.38795471191406, -181.57498168945312, 187.54293823242188, 123.64434051513672, -69.04086303710938, 299.7776184082031, -136.65728759765625, 134.6296844482422, 265.90667724609375, -122.91165161132812, -6.155799865722656, 886.8372802734375, -19.411102294921875, 262.2821044921875, 51.688026428222656, 49.04499053955078, 215.74465942382812, 54.15264892578125, 608.2066650390625, 422.0917053222656, 28.141803741455078, -43.65557861328125, -24.46389389038086, 100.15327453613281, -22.426471710205078, 544.70556640625, 151.55206298828125, 3.0470809936523438, -252.5065155029297, 459.08404541015625, 153.49449157714844, 223.51693725585938, 14.293724060058594, 259.5645751953125, 626.8216552734375, -176.9297332763672, -45.47759246826172, 674.6377563476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000311.npy"} +{"epoch": 0.4566813509544787, "step": 312, "batch_size": 64, "mean": 183.03475952148438, "std": 254.16796875, "min": -316.9760437011719, "p10": -84.70376663208006, "median": 127.37588882446289, "p90": 487.84434814453124, "max": 1046.13330078125, "pos_frac": 0.8125, "sample": [118.89085388183594, 1046.13330078125, 535.4944458007812, 182.50228881835938, 107.61698913574219, 49.99066925048828, -89.26264190673828, 102.93397521972656, 120.70011138916016, 336.81427001953125, -275.92987060546875, 397.6422424316406, 958.0731201171875, 353.5318603515625, 250.80682373046875, 289.08905029296875, -202.8370361328125, 242.20367431640625, 403.19970703125, 154.44482421875, 518.0602416992188, 87.74708557128906, 138.6531219482422, 252.88180541992188, 79.45269775390625, 36.16191101074219, 32.779136657714844, 739.53271484375, 347.7415466308594, 489.60552978515625, 123.74978637695312, 428.2049865722656, 176.39254760742188, 97.68000793457031, -121.84490966796875, 99.03944396972656, 168.1685333251953, 483.73492431640625, -33.06944274902344, 57.4864616394043, 279.6922607421875, 123.80577850341797, -47.030059814453125, 247.7726287841797, 177.56903076171875, 17.118499755859375, -36.339569091796875, 642.150634765625, -74.06639099121094, 10.550085067749023, 182.1996612548828, 119.73362731933594, -157.86367797851562, 10.2388916015625, -90.45723724365234, 130.9459991455078, 150.5576171875, -0.515594482421875, -316.9760437011719, 393.9548034667969, 256.2677307128906, 19.087905883789062, 43.9561653137207, 347.67535400390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000312.npy"} +{"epoch": 0.4581497797356828, "step": 313, "batch_size": 64, "mean": 189.80377197265625, "std": 257.02191162109375, "min": -448.249755859375, "p10": -102.16852722167962, "median": 182.32271575927734, "p90": 510.21256713867194, "max": 887.564697265625, "pos_frac": 0.796875, "sample": [76.31976318359375, 292.8656311035156, 118.08229064941406, 304.941162109375, -141.4256591796875, -8.957595825195312, 495.1204833984375, 178.41650390625, 372.14923095703125, -47.4661979675293, 16.883560180664062, -168.75440979003906, 454.48858642578125, 146.8878936767578, 20.834230422973633, 239.46092224121094, 516.6806030273438, 197.5662078857422, 95.50232696533203, 211.1004638671875, 59.194007873535156, 336.9914855957031, -422.2420959472656, -157.02809143066406, 191.9110107421875, 581.3397827148438, 277.0716552734375, 73.81944274902344, 187.48223876953125, 887.564697265625, 336.529296875, 151.63372802734375, 35.07958984375, 186.2289276123047, -221.86947631835938, 144.55258178710938, -29.727399826049805, -448.249755859375, 419.4637145996094, 219.5740509033203, -14.897056579589844, 244.63555908203125, 57.6143684387207, 340.59906005859375, 317.4919738769531, 127.36016082763672, -34.897178649902344, 345.6910400390625, 296.22650146484375, -125.61238098144531, 435.24993896484375, 110.61068725585938, 151.43914794921875, 17.11359405517578, 214.9268798828125, 585.2310791015625, 623.834716796875, -9.334030151367188, 4.685386657714844, 79.30758666992188, 291.1219787597656, 350.2532958984375, 744.2491455078125, 814.5241088867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000313.npy"} +{"epoch": 0.45961820851688695, "step": 314, "batch_size": 64, "mean": 176.21929931640625, "std": 278.32574462890625, "min": -378.21734619140625, "p10": -161.98898773193358, "median": 134.08779525756836, "p90": 493.9214111328125, "max": 903.6690063476562, "pos_frac": 0.71875, "sample": [768.0145263671875, -276.85980224609375, 112.35454559326172, 42.07299041748047, 32.580780029296875, 257.62908935546875, 38.271881103515625, 351.6828918457031, 82.44602966308594, -135.1967010498047, 366.4335021972656, 13.904220581054688, 249.59759521484375, 101.84288024902344, 190.13259887695312, -378.21734619140625, -178.37474060058594, 37.07112121582031, 653.15234375, 779.2003784179688, 30.4876708984375, -25.84440040588379, -172.74185180664062, 58.86556625366211, 195.4986572265625, -165.66453552246094, -13.700241088867188, -140.9130096435547, -32.369773864746094, 426.6202392578125, 482.96710205078125, 14.9329833984375, 258.498046875, 494.62310791015625, 302.2585754394531, 174.31045532226562, -130.9000244140625, 155.821044921875, 47.38824462890625, 805.0208129882812, 426.039306640625, 332.26654052734375, 903.6690063476562, 492.28411865234375, -93.78610229492188, 353.89508056640625, -158.29812622070312, 450.86383056640625, -24.402679443359375, 232.69467163085938, 352.88311767578125, 40.08634948730469, 293.86138916015625, 356.3077392578125, 463.3469543457031, 532.4640502929688, 86.57267761230469, 272.6033630371094, 240.75364685058594, -46.77448272705078, -163.57078552246094, -43.53178024291992, 325.8720703125, -220.9628143310547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000314.npy"} +{"epoch": 0.461086637298091, "step": 315, "batch_size": 64, "mean": 157.57626342773438, "std": 219.61166381835938, "min": -205.828857421875, "p10": -85.47158355712891, "median": 110.84260559082031, "p90": 502.2987823486329, "max": 739.72119140625, "pos_frac": 0.734375, "sample": [556.0833740234375, -65.42273712158203, 288.6806640625, 8.27349853515625, -63.604183197021484, 50.83769226074219, 64.30699157714844, 113.44154357910156, 280.53558349609375, -37.969573974609375, 280.3326416015625, 26.583986282348633, -65.89610290527344, 265.84735107421875, 86.81407928466797, 548.7103881835938, 314.45306396484375, 170.77520751953125, -145.73025512695312, 351.5758361816406, 215.2080078125, 517.091552734375, 543.7828369140625, 60.35565185546875, -132.63604736328125, 197.155517578125, 261.38165283203125, 174.3347625732422, 407.2236022949219, 158.5758514404297, 432.6559143066406, 72.17689514160156, 78.77655029296875, 108.24366760253906, 182.03871154785156, 310.02581787109375, -109.33016967773438, -32.0667724609375, 240.08091735839844, 14.526336669921875, 33.9686164855957, -83.59457397460938, 739.72119140625, 55.756134033203125, 219.43875122070312, -6.047157287597656, -179.14968872070312, 6.207841873168945, -86.27601623535156, 610.2633056640625, 73.34133911132812, 260.64959716796875, 636.116455078125, 135.168212890625, 149.08921813964844, -124.61419677734375, 286.4572448730469, -30.81216049194336, 406.1697082519531, 467.7823181152344, -205.828857421875, -61.50956726074219, 63.652286529541016, -9.298927307128906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000315.npy"} +{"epoch": 0.46255506607929514, "step": 316, "batch_size": 64, "mean": 234.00823974609375, "std": 253.26156616210938, "min": -249.42880249023438, "p10": -5.3934284210205, "median": 161.11019134521484, "p90": 599.8680297851563, "max": 953.4509887695312, "pos_frac": 0.890625, "sample": [330.65399169921875, 126.16007995605469, 496.6202392578125, 64.07371520996094, 45.908348083496094, 930.8297119140625, 148.41973876953125, 692.9951782226562, 480.9593505859375, 651.9796752929688, 389.141357421875, 338.4876708984375, 40.08734130859375, 27.385196685791016, 28.46808624267578, 213.37318420410156, 66.73484802246094, 108.95332336425781, 1.4512596130371094, 434.055908203125, 200.43917846679688, 27.239749908447266, 609.572998046875, 457.6888427734375, 293.8634338378906, 127.87871551513672, 68.00299072265625, 605.9739379882812, 153.7977752685547, 519.26904296875, 7.57020378112793, 157.46893310546875, 45.41107177734375, 164.75144958496094, 207.21194458007812, 350.048095703125, 416.466796875, 55.737525939941406, 383.220458984375, 24.70447540283203, 585.6209106445312, 334.48968505859375, 41.46893310546875, 390.9344177246094, 186.23023986816406, 147.56430053710938, -233.8511505126953, 22.084789276123047, 953.4509887695312, 128.27098083496094, 783.04736328125, -249.42880249023438, 193.94725036621094, 204.0638427734375, 368.7958984375, -8.326866149902344, -20.665245056152344, -21.671279907226562, -45.12848663330078, 134.88259887695312, 235.47781372070312, -106.1695785522461, 120.7308349609375, 337.652099609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000316.npy"} +{"epoch": 0.46402349486049926, "step": 317, "batch_size": 64, "mean": 247.40829467773438, "std": 237.35400390625, "min": -155.39695739746094, "p10": -49.207481384277344, "median": 221.45252990722656, "p90": 585.6850280761719, "max": 704.5596923828125, "pos_frac": 0.796875, "sample": [358.93572998046875, 38.918678283691406, 96.56397247314453, 464.7784729003906, 470.8497619628906, 77.91439819335938, 449.00762939453125, 375.2138671875, -154.07321166992188, 585.7653198242188, 704.5596923828125, 144.68833923339844, 225.38595581054688, -16.700927734375, 138.29891967773438, 420.0428466796875, 176.18116760253906, -19.10326385498047, 99.1728286743164, 425.173583984375, -46.6116943359375, 346.45001220703125, 374.42413330078125, 521.9319458007812, 41.61216354370117, 539.9659423828125, -20.34756088256836, 104.76262664794922, 663.1993408203125, 406.6878662109375, 627.3464965820312, 454.2076416015625, 95.07261657714844, 602.257568359375, 432.7744140625, -57.61360168457031, 283.8833923339844, 98.21998596191406, 265.3254089355469, 116.94760131835938, 217.51910400390625, 547.717041015625, -29.40418815612793, -50.31996154785156, 662.5891723632812, 212.63543701171875, 585.4976806640625, 335.4384765625, 270.8809814453125, -80.25594329833984, 145.58665466308594, 87.01099395751953, 265.4127197265625, -29.29521942138672, -144.68630981445312, 136.791259765625, 148.91055297851562, -124.89141082763672, 651.428466796875, 408.92218017578125, 100.55377960205078, 405.3900146484375, -155.39695739746094, 354.0262145996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000317.npy"} +{"epoch": 0.4654919236417034, "step": 318, "batch_size": 64, "mean": 185.17645263671875, "std": 237.19442749023438, "min": -343.9800720214844, "p10": -70.54890975952148, "median": 146.36261749267578, "p90": 525.7545227050783, "max": 840.9328002929688, "pos_frac": 0.8125, "sample": [62.73853302001953, -23.393722534179688, 811.581787109375, 88.53070068359375, 251.4974365234375, 121.56343078613281, -57.549461364746094, 191.13851928710938, 130.14154052734375, 676.5836181640625, 13.704195022583008, 111.308349609375, 336.2889404296875, 557.1399536132812, 620.1986694335938, 218.63731384277344, 6.5337677001953125, 140.1768035888672, -103.25774383544922, -50.15784454345703, 589.8583374023438, 66.57622528076172, -113.29124450683594, 486.62164306640625, 840.9328002929688, 305.8353271484375, -29.91756820678711, -114.33790588378906, 214.3293914794922, 297.9775695800781, 88.28457641601562, 321.9084777832031, 53.17017364501953, 396.1171569824219, 214.35105895996094, 140.64407348632812, 542.5257568359375, 32.51080322265625, -19.012908935546875, 432.4804382324219, 414.08782958984375, 141.64871215820312, 10.668170928955078, -106.74417877197266, 347.62432861328125, 185.78457641601562, 105.45372772216797, 212.6168212890625, 151.07652282714844, -76.12010192871094, -335.697509765625, 47.746253967285156, 222.366455078125, 206.67510986328125, 368.1853942871094, 371.3782958984375, 232.57760620117188, 159.0518798828125, 56.09101867675781, 274.5690002441406, 136.04290771484375, 200.50677490234375, -343.9800720214844, 18.713279724121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000318.npy"} +{"epoch": 0.4669603524229075, "step": 319, "batch_size": 64, "mean": 220.9802703857422, "std": 247.9208526611328, "min": -310.4207763671875, "p10": -108.69849700927735, "median": 221.92739868164062, "p90": 542.6328002929688, "max": 986.8610229492188, "pos_frac": 0.8125, "sample": [293.454345703125, 159.0634002685547, 306.29486083984375, 257.2603454589844, 40.00965881347656, 251.908447265625, 359.7613830566406, 511.44024658203125, 236.50729370117188, 84.18103790283203, -109.12783813476562, -216.93069458007812, 334.66424560546875, 115.38090515136719, 307.5240783691406, 139.09764099121094, 53.558815002441406, 251.2245635986328, 257.9903869628906, 180.63311767578125, 185.20249938964844, 396.8950500488281, 217.10760498046875, 232.27923583984375, 328.24920654296875, 20.52142333984375, 368.2442626953125, 794.3787841796875, 413.8382873535156, 353.64422607421875, -133.57720947265625, 162.20819091796875, 178.0954132080078, 530.86328125, -246.64016723632812, 384.7618408203125, -310.4207763671875, -97.9541015625, -133.8709716796875, 395.6573486328125, 556.1326904296875, 439.5705871582031, 592.6989135742188, -1.4154033660888672, 177.25502014160156, 43.34764099121094, 547.6768798828125, 618.9849853515625, 270.3029479980469, 588.8283081054688, 986.8610229492188, -107.69670104980469, 154.615966796875, 421.575439453125, -29.117481231689453, 37.92138671875, -174.57070922851562, -45.11023712158203, 362.5239562988281, 157.35816955566406, 121.79132080078125, 193.12811279296875, 226.7471923828125, 149.94715881347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000319.npy"} +{"epoch": 0.4684287812041116, "step": 320, "batch_size": 64, "mean": 240.2877960205078, "std": 259.0086364746094, "min": -290.53857421875, "p10": -66.37993240356445, "median": 222.9927215576172, "p90": 578.1055725097658, "max": 1003.077392578125, "pos_frac": 0.8125, "sample": [153.32179260253906, 190.2967987060547, 870.534912109375, 119.51783752441406, 269.91412353515625, -99.4038314819336, 180.46881103515625, -290.53857421875, 810.8246459960938, 201.6070098876953, 442.5947265625, 348.1700134277344, -36.93687438964844, 356.6315612792969, 221.95816040039062, 538.2990112304688, 611.4115600585938, 379.6877136230469, 363.52777099609375, -142.841552734375, 365.33746337890625, 365.5406494140625, -38.358360290527344, 253.8314208984375, 72.15259552001953, 0.37609100341796875, 303.99029541015625, 285.8558654785156, -62.31834411621094, 595.16552734375, -25.860397338867188, 244.51754760742188, -68.12061309814453, 146.21884155273438, 448.18267822265625, 108.4854736328125, 160.773681640625, 216.20712280273438, 1003.077392578125, 364.4312438964844, 380.6422119140625, 90.74359893798828, 102.21047973632812, -60.190704345703125, 659.0121459960938, 60.101097106933594, 400.98724365234375, 209.51087951660156, 0.3621387481689453, 16.567031860351562, 363.4917907714844, 369.80804443359375, 265.59515380859375, 444.18572998046875, 485.41241455078125, 632.2578125, 529.3140869140625, 9.985563278198242, -90.91989135742188, -71.2099838256836, 48.64849853515625, 224.02728271484375, 253.28018188476562, -173.93548583984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000320.npy"} +{"epoch": 0.4698972099853157, "step": 321, "batch_size": 64, "mean": 204.66717529296875, "std": 275.3786315917969, "min": -403.501953125, "p10": -72.7759552001953, "median": 183.17898559570312, "p90": 564.9298583984377, "max": 1024.70458984375, "pos_frac": 0.765625, "sample": [683.3513793945312, 119.2746810913086, 294.96820068359375, 262.3550720214844, -41.81322479248047, 274.127685546875, 324.8550720214844, 269.725830078125, 171.17620849609375, 502.7108154296875, 441.2209777832031, 365.5379638671875, 107.85263061523438, 40.47676467895508, -239.47337341308594, 298.16815185546875, 440.85546875, 225.11135864257812, 172.84091186523438, 579.9376220703125, 802.97314453125, 161.89474487304688, -403.501953125, 324.0243835449219, 196.3305206298828, 299.5929870605469, 730.6976928710938, -319.00390625, -128.6186981201172, 138.11073303222656, 67.39064025878906, 529.9117431640625, -40.720184326171875, 362.74188232421875, 185.01467895507812, 401.9207458496094, 241.9503631591797, -299.15997314453125, 1024.70458984375, 47.51996612548828, -59.382110595703125, 66.77050018310547, 22.104516983032227, 112.89662170410156, -31.886619567871094, 152.1680450439453, -78.51617431640625, -26.165699005126953, 253.93453979492188, 263.33135986328125, 664.645751953125, 676.4437255859375, -7.971218109130859, -318.0367431640625, 181.8668212890625, 180.45663452148438, 145.717529296875, 150.10462951660156, -20.461027145385742, 466.69989013671875, 184.49114990234375, 286.6522521972656, 272.44378662109375, -56.642730712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000321.npy"} +{"epoch": 0.4713656387665198, "step": 322, "batch_size": 64, "mean": 226.37049865722656, "std": 241.35641479492188, "min": -311.0309143066406, "p10": -55.31768341064453, "median": 247.78831481933594, "p90": 517.6384399414063, "max": 828.6724853515625, "pos_frac": 0.765625, "sample": [-186.1815185546875, 822.2559814453125, 346.1003723144531, 828.6724853515625, 90.18206024169922, -70.43307495117188, 522.3855590820312, 382.5658264160156, 317.96856689453125, -78.82754516601562, -57.86711120605469, -24.024070739746094, 337.60638427734375, 274.292724609375, 310.66259765625, 362.24969482421875, -6.785392761230469, 241.056640625, -21.849037170410156, -116.31442260742188, 604.5067138671875, 565.617919921875, 237.43807983398438, 568.4808959960938, -49.3690185546875, 261.4618225097656, 349.4473876953125, 506.56182861328125, 46.493263244628906, 476.46124267578125, -31.265775680541992, 215.3662109375, -13.566570281982422, 375.7733154296875, -6.0302734375, 20.632579803466797, 259.140625, 185.8727569580078, 12.938907623291016, 325.724365234375, 462.02685546875, 415.7691650390625, 413.95269775390625, -114.7860336303711, 478.48779296875, 135.61268615722656, 283.87060546875, -30.639690399169922, 249.97647094726562, 42.77830505371094, 74.50305938720703, 666.2825317382812, 195.39303588867188, 310.7602844238281, 371.4504699707031, 392.53607177734375, 97.65899658203125, 82.24939727783203, 58.30311584472656, -311.0309143066406, 9.963836669921875, 352.7374267578125, 418.8516540527344, 245.60015869140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000322.npy"} +{"epoch": 0.47283406754772395, "step": 323, "batch_size": 64, "mean": 190.0548095703125, "std": 247.40451049804688, "min": -478.317138671875, "p10": -73.42815551757812, "median": 177.86404418945312, "p90": 540.1785522460938, "max": 804.090576171875, "pos_frac": 0.8125, "sample": [419.9819030761719, 268.57965087890625, 550.8311767578125, -478.317138671875, 572.2843627929688, -330.37188720703125, 560.0254516601562, 804.090576171875, 19.741413116455078, 69.73229217529297, 285.7994079589844, 53.04541015625, 180.93423461914062, 136.67457580566406, 15.320213317871094, 318.8656005859375, 96.3198013305664, 299.5260925292969, -68.32178497314453, 639.7701416015625, 5.590507507324219, 97.09178161621094, 453.21014404296875, -75.6166000366211, -92.87715148925781, 395.1554260253906, 161.2142791748047, 152.04464721679688, 377.3531494140625, 27.582000732421875, 545.650146484375, 287.7725830078125, 402.83099365234375, -22.647932052612305, 311.3936767578125, 22.286170959472656, -64.92683410644531, 174.79385375976562, 281.6689453125, 227.38311767578125, 308.03936767578125, 35.95965576171875, 245.507568359375, 527.4114990234375, -329.4935302734375, 173.75779724121094, 312.82171630859375, 585.6963500976562, 390.5191650390625, -61.741111755371094, 236.86111450195312, 526.6721801757812, -86.45342254638672, 275.9896545410156, -274.93890380859375, 225.57815551757812, 128.68084716796875, 265.8896179199219, 83.7546615600586, 282.2542724609375, 47.75102996826172, -2.6339454650878906, 136.24197387695312, 47.91845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000323.npy"} +{"epoch": 0.47430249632892807, "step": 324, "batch_size": 64, "mean": 183.44676208496094, "std": 277.77154541015625, "min": -428.9640808105469, "p10": -118.3505661010742, "median": 152.69564819335938, "p90": 510.24022521972677, "max": 957.6033935546875, "pos_frac": 0.765625, "sample": [-210.86807250976562, 661.2820434570312, -61.82513427734375, 162.9856719970703, 33.881507873535156, 174.733642578125, 240.15020751953125, 314.1515808105469, 143.7482147216797, 191.560791015625, 816.9892578125, -94.80575561523438, 158.46005249023438, 92.44688415527344, 23.516674041748047, 693.47802734375, -234.41087341308594, -322.309814453125, 342.9140625, 249.50567626953125, 24.620681762695312, 141.81195068359375, -127.67169189453125, 201.88275146484375, 331.95977783203125, 856.3279418945312, 448.42022705078125, -78.15602111816406, 84.44198608398438, 423.9747619628906, 460.3378601074219, 121.92744445800781, 74.86824798583984, 93.02396392822266, 284.6026306152344, 327.2620544433594, 105.75984191894531, 319.8751220703125, 202.90097045898438, 0.5331325531005859, 416.6932067871094, -428.9640808105469, -13.575019836425781, 390.4256896972656, 531.626953125, 368.8822021484375, 148.03334045410156, -304.3565979003906, -133.44775390625, 95.9260025024414, 233.87551879882812, 48.18450164794922, 157.3579559326172, -52.441158294677734, 91.65632629394531, 66.55154418945312, 423.1199645996094, -96.60127258300781, -29.626407623291016, 957.6033935546875, 693.5258178710938, 208.73265075683594, -31.25518798828125, 324.376708984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000324.npy"} +{"epoch": 0.47577092511013214, "step": 325, "batch_size": 64, "mean": 157.63201904296875, "std": 214.16908264160156, "min": -358.1204833984375, "p10": -71.23831977844236, "median": 141.68650817871094, "p90": 457.13458557128916, "max": 796.6693115234375, "pos_frac": 0.828125, "sample": [-348.6066589355469, -358.1204833984375, 262.14117431640625, -26.33391571044922, 330.415283203125, 285.792236328125, 126.0806655883789, -87.24609375, 170.65621948242188, 287.3238525390625, 329.09576416015625, 486.23699951171875, 302.39501953125, 155.2794952392578, 186.57289123535156, 127.8382339477539, 43.13910675048828, 87.11952209472656, 47.31947326660156, 234.80442810058594, -133.34701538085938, 247.38543701171875, 283.6258239746094, 501.2336120605469, 23.642635345458984, 69.35967254638672, 118.81288146972656, 87.72246551513672, 121.49519348144531, 23.25865936279297, 488.9944763183594, -14.001480102539062, 796.6693115234375, 21.59012222290039, 518.2757568359375, 43.16551971435547, 465.00439453125, -49.54945755004883, 243.35153198242188, 225.5790557861328, -80.5335464477539, 172.14739990234375, 373.4538269042969, -39.87945556640625, 218.733642578125, 288.7169189453125, 144.03314208984375, 218.1207275390625, 120.97201538085938, 139.33987426757812, 312.36297607421875, -302.112060546875, 23.780208587646484, 659.9786376953125, 24.28057861328125, 19.530006408691406, 438.7716979980469, 30.476388931274414, -109.55010986328125, 246.18707275390625, 158.1874237060547, 41.88057327270508, 59.45310974121094, 205.94577026367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000325.npy"} +{"epoch": 0.47723935389133626, "step": 326, "batch_size": 64, "mean": 154.7425537109375, "std": 185.08230590820312, "min": -370.7458190917969, "p10": -35.13610076904296, "median": 134.89266967773438, "p90": 374.76379394531256, "max": 622.933837890625, "pos_frac": 0.796875, "sample": [-4.8787841796875, -12.988643646240234, 110.23500061035156, 379.5883483886719, 139.7750701904297, -10.688232421875, 139.64114379882812, 130.14419555664062, 314.97552490234375, -199.72872924804688, -111.42218017578125, -370.7458190917969, 519.5944213867188, 98.85033416748047, -80.89998626708984, 164.3375244140625, 174.4683837890625, 80.14857482910156, 273.63232421875, 252.84237670898438, 21.471038818359375, 112.81924438476562, 237.6820068359375, 232.801025390625, -27.6998291015625, 146.8149871826172, 622.933837890625, 190.4734344482422, 98.00386047363281, 110.99613952636719, 341.0379638671875, 202.64202880859375, 247.25999450683594, 120.1751708984375, 191.58441162109375, -19.4984130859375, 47.54498291015625, 363.5065002441406, -38.42325210571289, 109.11215209960938, 116.41337585449219, 15.406982421875, 99.28846740722656, 168.204345703125, 27.44134521484375, 108.75727844238281, 285.6934814453125, 332.7947692871094, 220.5515899658203, 225.8643035888672, -38.32307434082031, 419.5797424316406, 617.703125, 76.22212982177734, 328.8656005859375, 156.61904907226562, 499.8414306640625, 313.8556213378906, -1.1342411041259766, -194.9139862060547, 424.1837158203125, 314.32598876953125, 21.201953887939453, 66.96248626708984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000326.npy"} +{"epoch": 0.4787077826725404, "step": 327, "batch_size": 64, "mean": 219.50949096679688, "std": 220.30361938476562, "min": -259.93951416015625, "p10": -88.53612670898434, "median": 242.6549835205078, "p90": 503.66260681152346, "max": 724.9379272460938, "pos_frac": 0.828125, "sample": [234.39013671875, 203.6343536376953, 445.65301513671875, 219.52659606933594, 212.1580352783203, 330.5289306640625, 44.431617736816406, 248.49609375, 329.2371520996094, 155.57139587402344, 357.1671142578125, 330.8643798828125, 119.25703430175781, 333.07257080078125, -106.2027359008789, 392.5955810546875, 3.0414466857910156, 491.8763122558594, 338.9151306152344, 290.76080322265625, -242.32205200195312, 608.1571044921875, 504.452880859375, 394.78662109375, 18.278966903686523, 520.7158203125, 286.89361572265625, -126.94148254394531, 232.07952880859375, 296.3932189941406, 383.3774719238281, -130.15005493164062, -26.112625122070312, 141.34571838378906, 293.66119384765625, 172.17771911621094, 287.431884765625, 261.65460205078125, 249.74197387695312, 181.41259765625, -5.8489837646484375, 101.30068969726562, -61.281158447265625, 470.268310546875, 236.81387329101562, -100.21682739257812, 41.06917190551758, 343.32745361328125, 297.4220275878906, 60.146575927734375, 582.9537963867188, 91.83869934082031, 724.9379272460938, -259.93951416015625, 501.8186340332031, -37.668212890625, 67.75386047363281, 579.8435668945312, 69.22465515136719, 418.26007080078125, 264.98687744140625, 514.5975341796875, 102.20282745361328, -237.21395874023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000327.npy"} +{"epoch": 0.4801762114537445, "step": 328, "batch_size": 64, "mean": 196.95599365234375, "std": 243.94415283203125, "min": -380.948974609375, "p10": -59.63810844421387, "median": 162.02967834472656, "p90": 502.1053100585939, "max": 911.7060546875, "pos_frac": 0.75, "sample": [180.97952270507812, -130.64315795898438, 178.00869750976562, 156.32571411132812, 13.432373046875, 378.8526611328125, 343.1452941894531, 123.55448913574219, 39.73124694824219, -380.948974609375, 112.49304962158203, 414.2932434082031, 462.86444091796875, 374.06201171875, 252.09605407714844, 433.44403076171875, -24.85417938232422, -150.3915252685547, -58.94183349609375, 214.80499267578125, -45.354915618896484, -31.745697021484375, 311.1563720703125, 911.7060546875, 218.70416259765625, 276.9925537109375, 389.5688781738281, 156.59893798828125, 663.8065185546875, 367.0074768066406, -73.66650390625, 191.70474243164062, 292.4734802246094, -59.9365119934082, 63.92498779296875, 131.59988403320312, 310.8701477050781, 400.50714111328125, 159.25778198242188, 127.13670349121094, -9.044023513793945, 137.57745361328125, -101.76751708984375, 191.26031494140625, -15.701896667480469, 470.2608642578125, 107.5684814453125, 247.8949432373047, 250.75732421875, 164.80157470703125, 543.248779296875, 515.7529296875, 40.19648742675781, -15.242706298828125, -16.450206756591797, 682.2476806640625, 798.3515625, 67.86215209960938, 639.2640991210938, -10.98019027709961, 13.510627746582031, 305.248046875, 31.78538703918457, -127.8382797241211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000328.npy"} +{"epoch": 0.48164464023494863, "step": 329, "batch_size": 64, "mean": 215.82797241210938, "std": 268.51751708984375, "min": -665.3629150390625, "p10": -53.10569152832031, "median": 209.48905181884766, "p90": 558.4554260253907, "max": 931.0707397460938, "pos_frac": 0.796875, "sample": [186.23880004882812, 403.8968505859375, 519.7359619140625, 304.05181884765625, 349.9959411621094, -67.99321746826172, 479.2383728027344, -6.255027770996094, 483.098876953125, 235.89315795898438, 58.72955322265625, 372.1159362792969, 60.481239318847656, -49.61094665527344, 66.01875305175781, 154.18630981445312, 233.52455139160156, 209.87802124023438, 28.20915412902832, 761.3773193359375, 90.25530242919922, 92.35269165039062, 172.55018615722656, 175.34671020507812, -665.3629150390625, 307.6755065917969, 38.54149627685547, 931.0707397460938, 613.2645263671875, -44.43959045410156, 290.2068176269531, -27.397212982177734, 355.4056701660156, 147.3945770263672, 257.640869140625, 398.2454528808594, 567.9192504882812, 327.66363525390625, 735.06884765625, 226.6058349609375, 31.439640045166016, -49.28700637817383, -141.41868591308594, 536.3731689453125, 455.1905822753906, -156.230712890625, 304.7721252441406, -118.87513732910156, 371.2033386230469, 168.5750274658203, 232.49124145507812, -54.60343933105469, 209.10008239746094, 262.5513610839844, 81.03278350830078, 48.67076873779297, 231.17222595214844, 441.572509765625, 695.5701904296875, -33.35357666015625, -278.8992614746094, 167.0413818359375, 598.8903198242188, 37.19092559814453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000329.npy"} +{"epoch": 0.4831130690161527, "step": 330, "batch_size": 64, "mean": 225.97341918945312, "std": 255.16845703125, "min": -300.02813720703125, "p10": -84.93160705566405, "median": 233.19058227539062, "p90": 508.0591064453125, "max": 889.980712890625, "pos_frac": 0.828125, "sample": [473.21533203125, -34.8037109375, -150.3995361328125, -91.9307861328125, 323.6458740234375, 510.05133056640625, 161.83314514160156, 207.69357299804688, -63.53241729736328, 107.92980194091797, -120.95191955566406, 133.3959197998047, 179.73171997070312, 417.00457763671875, 503.41058349609375, 45.769142150878906, 282.8362731933594, 275.6679382324219, 254.38064575195312, 424.3050231933594, 1.1580638885498047, 102.41847229003906, 338.4024658203125, 79.72218322753906, 173.6884765625, 347.8099365234375, -300.02813720703125, 319.00311279296875, 228.89227294921875, 237.4888916015625, 398.4638366699219, 10.633831024169922, -283.24542236328125, 238.27696228027344, 534.23291015625, 192.8701171875, 46.11070251464844, 782.5248413085938, 34.932899475097656, 154.34898376464844, -14.849647521972656, 447.40875244140625, 133.03729248046875, 889.980712890625, -236.92198181152344, 809.125732421875, 689.8173217773438, 347.8549499511719, 375.42938232421875, 73.57452392578125, 324.91387939453125, 374.10302734375, 157.43606567382812, 700.0819702148438, 3.8208541870117188, 298.8936767578125, 250.74014282226562, 445.43115234375, -167.69223022460938, 428.93426513671875, 108.48590087890625, 324.31353759765625, 290.0228271484375, -68.60018920898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000330.npy"} +{"epoch": 0.4845814977973568, "step": 331, "batch_size": 64, "mean": 226.6082763671875, "std": 190.3015594482422, "min": -156.85577392578125, "p10": 18.27304611206057, "median": 199.54584503173828, "p90": 426.6102630615236, "max": 818.7810668945312, "pos_frac": 0.90625, "sample": [162.4906005859375, 743.9522705078125, 268.01837158203125, -27.52154541015625, 287.1692810058594, 366.7225341796875, 94.09030151367188, 43.79722595214844, 223.43406677246094, 154.89263916015625, 732.092529296875, 272.9354553222656, 818.7810668945312, 8.109687805175781, 178.7772979736328, 354.68353271484375, 575.6387939453125, 108.35737609863281, 148.69180297851562, 203.57876586914062, 443.8098449707031, 386.4779052734375, 467.12933349609375, -33.84138107299805, 108.03665161132812, 153.1386260986328, 192.57725524902344, 224.38119506835938, 255.37734985351562, -49.0325927734375, 262.9227600097656, -57.6031379699707, 273.205078125, 185.98687744140625, 42.56554412841797, 179.57827758789062, -156.85577392578125, 197.76763916015625, 332.01239013671875, 576.7354125976562, 231.88308715820312, 201.3240509033203, 107.86432647705078, 356.523681640625, 78.15389251708984, 86.67462158203125, 319.277099609375, 165.62139892578125, 295.38995361328125, 93.77726745605469, -119.26898193359375, 278.9136962890625, 125.84742736816406, 99.78758239746094, 344.2547302246094, 330.6400146484375, 315.82550048828125, 98.70240020751953, 191.96348571777344, 381.58099365234375, 168.52894592285156, 257.6112365722656, 347.004150390625, 41.987548828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000331.npy"} +{"epoch": 0.48604992657856094, "step": 332, "batch_size": 64, "mean": 164.1973876953125, "std": 232.5335235595703, "min": -199.33279418945312, "p10": -102.41101531982422, "median": 116.62026596069336, "p90": 442.1667419433594, "max": 982.8802490234375, "pos_frac": 0.75, "sample": [717.8234252929688, 163.17791748046875, 144.8344268798828, 320.14947509765625, 31.237762451171875, -16.341110229492188, 53.53321838378906, 442.16851806640625, 422.22015380859375, 188.41163635253906, -21.016494750976562, 75.64672088623047, -101.11785888671875, 982.8802490234375, 193.2176513671875, 106.08663177490234, 127.1612548828125, 141.2176971435547, 123.1855239868164, 2.569601058959961, 126.14232635498047, 95.33000183105469, 427.5773620605469, -102.96522521972656, 348.53399658203125, 39.06306838989258, -99.22006225585938, 109.94622039794922, 603.9467163085938, 234.15463256835938, 163.53665161132812, 94.5826187133789, 289.59149169921875, -39.93987274169922, -40.94538116455078, 82.03980255126953, 509.56005859375, 519.2034912109375, -108.43630981445312, 257.55194091796875, 133.8019256591797, -187.63357543945312, 287.95379638671875, 92.35499572753906, 328.9390563964844, 697.3301391601562, 110.05500793457031, 442.16259765625, -97.13360595703125, 72.28115844726562, -199.33279418945312, 249.9700927734375, 366.2914123535156, -113.45635986328125, 346.9661865234375, 13.22153091430664, -112.82319641113281, -7.214282989501953, -117.58442687988281, 163.62521362304688, 304.06219482421875, 100.53096771240234, 108.15336608886719, -80.18798828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000332.npy"} +{"epoch": 0.48751835535976507, "step": 333, "batch_size": 64, "mean": 215.58282470703125, "std": 300.0898132324219, "min": -337.0198669433594, "p10": -177.33856353759765, "median": 193.65960693359375, "p90": 599.5364868164064, "max": 929.36767578125, "pos_frac": 0.703125, "sample": [-33.68925476074219, 298.2928771972656, 929.36767578125, -8.201156616210938, 79.56375885009766, 36.26103210449219, 277.96478271484375, 240.8251495361328, 70.8004150390625, 36.434085845947266, 790.8388061523438, -165.24095153808594, 177.80706787109375, -233.14559936523438, 567.4407348632812, -228.824951171875, 668.4959716796875, -99.88336181640625, 654.1898193359375, 399.8901062011719, -12.92447280883789, 309.62017822265625, -52.39384460449219, 355.1957092285156, 498.41107177734375, 538.61865234375, -182.52325439453125, 117.8238525390625, 18.033203125, 425.4029541015625, 144.52999877929688, -5.89286994934082, -79.61370849609375, 494.4514465332031, 613.2918090820312, 399.42791748046875, -259.44659423828125, 10.08218002319336, 503.50445556640625, 423.4761047363281, 453.76666259765625, -216.5332794189453, -15.9755859375, 514.2869262695312, 477.90191650390625, -48.714698791503906, 197.3439483642578, 161.45680236816406, 760.192626953125, 278.4499816894531, 504.15887451171875, 267.4483337402344, 566.6177368164062, 195.595703125, 191.7235107421875, -137.5911407470703, 449.21099853515625, -4.725612640380859, 340.75567626953125, -305.43707275390625, 659.4378051757812, -337.0198669433594, 53.28126525878906, 73.40736389160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000333.npy"} +{"epoch": 0.4889867841409692, "step": 334, "batch_size": 64, "mean": 194.11178588867188, "std": 307.4593505859375, "min": -578.9366455078125, "p10": -146.26842117309567, "median": 157.10147857666016, "p90": 661.5992492675783, "max": 892.7734375, "pos_frac": 0.796875, "sample": [620.5595703125, 57.55615234375, -178.71786499023438, 499.435791015625, 239.52947998046875, 41.909088134765625, -95.24535369873047, 58.97667694091797, 251.840576171875, 29.619308471679688, 213.36212158203125, -4.251106262207031, 357.6494445800781, 194.22042846679688, -157.6125946044922, 203.09312438964844, 49.67073059082031, 446.52960205078125, -578.9366455078125, 693.3045654296875, 51.467044830322266, 218.3515167236328, -25.212142944335938, -294.1171875, 362.90582275390625, 270.4514465332031, 503.95928955078125, -414.0010986328125, -220.4944610595703, 8.750808715820312, 34.588958740234375, -85.65866088867188, 131.3273162841797, 74.52581787109375, 806.5382690429688, 892.7734375, 291.737060546875, 32.31317901611328, 58.805233001708984, 511.3416748046875, 56.694602966308594, 287.58856201171875, 527.4496459960938, 381.30780029296875, -90.21577453613281, 61.41476821899414, 799.8345947265625, 62.694793701171875, 741.322998046875, 523.1889038085938, -119.7986831665039, -366.09674072265625, 41.399658203125, 294.3121643066406, 199.10121154785156, 679.1876831054688, 513.607666015625, 97.62019348144531, 44.76866149902344, 358.4842529296875, 67.74215698242188, 705.744140625, 220.07945251464844, 182.87564086914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000334.npy"} +{"epoch": 0.49045521292217326, "step": 335, "batch_size": 64, "mean": 148.0127410888672, "std": 286.202880859375, "min": -302.2250061035156, "p10": -221.52425537109374, "median": 109.42106628417969, "p90": 459.7585937500001, "max": 1027.07275390625, "pos_frac": 0.625, "sample": [186.97705078125, 415.9536437988281, 440.8411865234375, -12.784271240234375, 178.52928161621094, -82.7123794555664, -37.256614685058594, 440.880126953125, 62.562381744384766, 54.35478973388672, -247.58834838867188, -19.830520629882812, 302.921142578125, 260.5849609375, -42.61327362060547, 678.1712036132812, -33.017860412597656, 84.78034210205078, -294.4288330078125, 228.20559692382812, -136.99822998046875, 57.58802795410156, 222.6114501953125, 102.89889526367188, 742.2294921875, 607.6022338867188, 406.0062255859375, 283.056396484375, 46.19374084472656, 667.3953857421875, 1027.07275390625, -302.2250061035156, 135.28762817382812, -295.1396789550781, 403.0246276855469, -31.780954360961914, 436.81005859375, -2.4405345916748047, 70.60049438476562, 594.1134643554688, -89.12091064453125, 334.2425842285156, -210.13790893554688, 127.62744140625, 248.80545043945312, -96.82711029052734, 236.29356384277344, -24.42572021484375, -280.5238952636719, -117.09136962890625, 115.9432373046875, -164.3841094970703, 428.75347900390625, -263.54248046875, -210.54864501953125, 382.2738952636719, 269.74566650390625, 467.849365234375, -21.112106323242188, -226.22808837890625, 328.84381103515625, 351.4996032714844, 218.8150634765625, 67.62826538085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000335.npy"} +{"epoch": 0.4919236417033774, "step": 336, "batch_size": 64, "mean": 249.61618041992188, "std": 359.6513366699219, "min": -687.2728271484375, "p10": -103.95736541748046, "median": 254.58018493652344, "p90": 763.5552795410157, "max": 1204.8065185546875, "pos_frac": 0.84375, "sample": [319.9119873046875, 69.411865234375, 934.35302734375, -33.60596466064453, 428.92364501953125, 280.35369873046875, 280.7238464355469, -128.79910278320312, -75.00625610351562, 891.8880615234375, 309.20306396484375, 350.0997314453125, 143.60989379882812, 483.4533996582031, 1204.8065185546875, 22.901945114135742, 816.7301025390625, 312.0234375, 272.67999267578125, 424.31060791015625, -100.74568176269531, 57.32939147949219, 107.73210144042969, 250.0164794921875, 125.7578125, 423.4588317871094, 118.10604858398438, 98.32028198242188, 771.9212036132812, 281.74066162109375, 206.16004943847656, 64.96888732910156, 50.48585510253906, 259.1438903808594, 108.20204162597656, 82.09857177734375, -468.3065185546875, 742.599609375, -105.33380126953125, -265.9677734375, 744.0347900390625, 18.51776885986328, 873.4383544921875, 67.63758850097656, 454.58538818359375, 71.93850708007812, -687.2728271484375, 77.7859878540039, 327.575927734375, 326.504150390625, 124.04872131347656, 118.56620025634766, 174.2976531982422, 22.14628028869629, 273.2557067871094, 512.1853637695312, 550.9138793945312, 1116.803955078125, -388.1047668457031, 353.34393310546875, -489.0634765625, 373.0149841308594, 299.2814025878906, 544.3384399414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000336.npy"} +{"epoch": 0.4933920704845815, "step": 337, "batch_size": 64, "mean": 250.1221160888672, "std": 284.1232604980469, "min": -287.51446533203125, "p10": -83.99329490661619, "median": 219.81403350830078, "p90": 636.7614990234376, "max": 1083.242919921875, "pos_frac": 0.828125, "sample": [957.31298828125, 257.1723937988281, -213.63441467285156, 112.67501068115234, 612.1195678710938, 518.5068359375, -17.840431213378906, 290.39593505859375, -13.580066680908203, 470.45977783203125, 4.465339660644531, 253.5657958984375, 294.28033447265625, 412.8682861328125, 463.1806335449219, -130.69039916992188, 217.43572998046875, 160.7623291015625, 29.036148071289062, 536.3838500976562, 221.47352600097656, 218.154541015625, 27.8282527923584, 647.3223266601562, 164.47874450683594, -97.2095947265625, 1083.242919921875, 88.4368896484375, -201.73114013671875, -195.63821411132812, 235.85693359375, 190.85206604003906, 266.83746337890625, -216.16134643554688, 207.2075653076172, 90.24620056152344, 482.3889465332031, -15.989334106445312, 297.6954345703125, 390.1864013671875, 42.59553146362305, 717.2039184570312, 83.7589111328125, 193.13015747070312, -287.51446533203125, 134.43466186523438, 858.3563842773438, 524.131591796875, 361.5011901855469, 66.30982208251953, 185.15182495117188, 93.30965423583984, 708.8681640625, 370.638427734375, 168.2113494873047, 717.0797729492188, 407.94964599609375, 226.89276123046875, 33.185447692871094, 360.1070556640625, 296.0467834472656, 338.710205078125, -53.1552619934082, 360.5586242675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000337.npy"} +{"epoch": 0.4948604992657856, "step": 338, "batch_size": 64, "mean": 233.94757080078125, "std": 240.927001953125, "min": -252.11795043945312, "p10": -18.493199157714834, "median": 188.75465393066406, "p90": 562.4945983886719, "max": 847.487548828125, "pos_frac": 0.84375, "sample": [60.38990783691406, 564.6362915039062, 87.36482238769531, -252.11795043945312, 220.38424682617188, 174.6722869873047, -151.14138793945312, 226.7339324951172, 655.4385375976562, 257.2611389160156, 139.70433044433594, 85.66810607910156, -126.637939453125, 488.5351867675781, 95.60104370117188, 343.1292724609375, 26.821624755859375, 508.9426574707031, 427.2889099121094, 510.4498596191406, 713.980224609375, -22.69452667236328, 223.3609619140625, 233.66468811035156, 257.488037109375, -3.0830230712890625, 441.90777587890625, -23.22325897216797, -8.690101623535156, -2.9499130249023438, 234.4188232421875, 568.9586181640625, 510.9698486328125, 86.04386901855469, 224.76025390625, 415.3898620605469, 159.6983642578125, 44.86643600463867, 339.1860656738281, 150.49159240722656, 378.15435791015625, 61.0272216796875, 89.61105346679688, 101.59066772460938, 514.537353515625, 41.476104736328125, 178.51602172851562, 122.9559555053711, 375.282470703125, 171.16517639160156, 20.07720947265625, 38.724308013916016, 547.7216796875, 847.487548828125, -76.88426971435547, 283.38214111328125, 557.497314453125, 198.9932861328125, 360.75799560546875, 172.27493286132812, 667.285400390625, -246.01040649414062, 66.3265609741211, 613.0252075195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000338.npy"} +{"epoch": 0.49632892804698975, "step": 339, "batch_size": 64, "mean": 231.79090881347656, "std": 257.9338073730469, "min": -293.88653564453125, "p10": -44.42213745117187, "median": 221.4570770263672, "p90": 624.6512756347659, "max": 892.3178100585938, "pos_frac": 0.8125, "sample": [11.370941162109375, -152.832275390625, -83.3443603515625, -44.90814971923828, 654.4990234375, 68.71022033691406, -3.5443267822265625, 138.50125122070312, 355.3064880371094, 44.87757110595703, 892.3178100585938, 39.74928283691406, 98.1234130859375, 183.028564453125, 326.70794677734375, 377.30169677734375, 22.513193130493164, 8.708335876464844, 77.9069595336914, 579.29931640625, 280.30560302734375, -25.989768981933594, 284.3866271972656, -18.951637268066406, 292.3904113769531, 121.46934509277344, 705.0706787109375, 27.509429931640625, 396.9317626953125, 644.0878295898438, -177.95318603515625, 264.67510986328125, -293.88653564453125, 158.25796508789062, 493.26031494140625, 363.05682373046875, 178.0311279296875, -43.288108825683594, 255.81832885742188, 507.6590881347656, 155.72364807128906, 262.0166320800781, 479.82525634765625, 539.97314453125, 111.45093536376953, 652.5258178710938, -153.99090576171875, 661.995849609375, 326.0237731933594, -211.40098571777344, 227.97250366210938, 209.94964599609375, 38.65861511230469, 214.941650390625, 45.66079330444336, 248.2643280029297, 240.32237243652344, -39.265464782714844, 515.7213134765625, 365.742919921875, 537.0433349609375, 718.5989990234375, 399.08795166015625, 280.6419372558594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000339.npy"} +{"epoch": 0.4977973568281938, "step": 340, "batch_size": 64, "mean": 198.86572265625, "std": 288.56158447265625, "min": -484.27764892578125, "p10": -103.57010421752928, "median": 182.1568374633789, "p90": 602.4258056640626, "max": 1030.6243896484375, "pos_frac": 0.78125, "sample": [179.428955078125, 254.37213134765625, 92.6952896118164, 228.1295166015625, 232.04075622558594, 534.1060180664062, 646.384765625, -484.27764892578125, 258.35980224609375, -44.119590759277344, 354.306884765625, 1030.6243896484375, 95.0223388671875, -288.5621337890625, 616.232666015625, -82.05916595458984, 287.5509338378906, 121.3757095336914, -37.60724639892578, 91.05810546875, 198.0865936279297, -224.87835693359375, -395.0338439941406, 576.7935180664062, -184.11192321777344, 93.90300750732422, 37.208580017089844, 448.4706115722656, -42.58612060546875, 314.07696533203125, 545.9421997070312, 4.0361480712890625, 869.0151977539062, 613.4110717773438, 13.82771110534668, 147.35592651367188, 125.92913818359375, 261.93865966796875, 184.8847198486328, 206.92831420898438, 227.03207397460938, 131.47390747070312, 213.6929931640625, -8.07400894165039, -112.78907775878906, 269.6142272949219, 289.65673828125, -77.20662689208984, 523.9381713867188, 1.1229095458984375, 130.93429565429688, 439.5491027832031, 654.8216552734375, 683.4801025390625, 374.53582763671875, 254.83499145507812, 157.19876098632812, 36.195804595947266, 384.6108703613281, 157.74969482421875, -302.6820068359375, 396.25677490234375, -53.44004821777344, 74.63819122314453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000340.npy"} +{"epoch": 0.49926578560939794, "step": 341, "batch_size": 64, "mean": 226.50100708007812, "std": 268.1539001464844, "min": -285.8695373535156, "p10": -83.95732269287109, "median": 210.98403930664062, "p90": 550.7614196777345, "max": 1119.803466796875, "pos_frac": 0.796875, "sample": [1119.803466796875, -84.4918212890625, -93.99238586425781, -149.68466186523438, 333.27239990234375, 525.9932861328125, 730.152099609375, 483.4325866699219, 248.35755920410156, 171.60409545898438, -35.225250244140625, 166.45486450195312, 71.95391845703125, 267.6871032714844, 17.13868522644043, -47.30961608886719, -82.71015930175781, 376.7877197265625, 41.43437957763672, 291.54718017578125, -162.96743774414062, 388.9029541015625, 432.63623046875, 233.79983520507812, 75.89989471435547, 595.9016723632812, 128.99066162109375, 227.80252075195312, -72.03752136230469, -56.09086608886719, 100.62562561035156, 455.759033203125, 613.9605712890625, 300.38531494140625, 283.3509521484375, 437.3780517578125, 101.91468811035156, 7.733240127563477, -106.90250396728516, 445.09869384765625, 533.1348266601562, 315.8748779296875, 51.5931396484375, 73.97940826416016, -260.82403564453125, 569.4329223632812, 129.0904083251953, 229.19761657714844, -285.8695373535156, 351.0676574707031, 194.16555786132812, 284.1647644042969, 460.9935302734375, -76.9893569946289, 808.2080688476562, 35.49272155761719, 347.96502685546875, 479.42083740234375, 440.5791015625, 54.58828353881836, 150.41629028320312, 107.28899383544922, 558.315673828125, 160.43014526367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000341.npy"} +{"epoch": 0.5007342143906021, "step": 342, "batch_size": 64, "mean": 201.77847290039062, "std": 280.6956481933594, "min": -582.7496337890625, "p10": -100.31032409667968, "median": 171.16423797607422, "p90": 498.5778961181642, "max": 1105.904541015625, "pos_frac": 0.78125, "sample": [742.4360961914062, -582.7496337890625, -238.49893188476562, 516.822998046875, 54.690128326416016, 44.34752655029297, 152.82815551757812, 369.64044189453125, 313.58612060546875, 349.5036926269531, 149.2448272705078, 94.56135559082031, 193.9039764404297, 192.98204040527344, 513.2181396484375, 313.4605712890625, 70.22236633300781, 463.7770080566406, -185.68130493164062, -36.56913757324219, 445.98388671875, -84.8009262084961, 61.663909912109375, -94.8093490600586, -76.1138687133789, 355.84771728515625, 919.211181640625, 383.6207275390625, 142.90403747558594, -209.9601287841797, -71.17237854003906, 1105.904541015625, 217.15609741210938, 185.04652404785156, 13.975540161132812, -30.117393493652344, 577.4082641601562, 444.7290344238281, 626.864501953125, 174.71974182128906, -24.220136642456055, 173.7373504638672, 343.5736999511719, 25.298980712890625, 464.4173278808594, 401.9107360839844, 156.6707763671875, 350.2077331542969, 111.67137145996094, 439.21185302734375, 446.97186279296875, 88.28131866455078, -213.07492065429688, 79.21250915527344, 418.2903137207031, -131.51699829101562, 168.59112548828125, -102.66788482666016, 192.62924194335938, 104.57392883300781, 208.57937622070312, 403.5238342285156, 138.64764404296875, 89.51290130615234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000342.npy"} +{"epoch": 0.5022026431718062, "step": 343, "batch_size": 64, "mean": 240.35006713867188, "std": 223.0820770263672, "min": -354.916259765625, "p10": 7.4985576629638695, "median": 212.0894317626953, "p90": 517.5120178222656, "max": 825.4210815429688, "pos_frac": 0.921875, "sample": [361.18341064453125, 211.33462524414062, 125.96861267089844, 153.121337890625, 250.82003784179688, 9.8076171875, 364.22930908203125, 349.1842041015625, 205.32525634765625, 747.5287475585938, 59.60292053222656, -180.7626953125, 226.18829345703125, 67.21395874023438, 194.66290283203125, 73.53948974609375, 77.92916870117188, 227.28025817871094, 251.14065551757812, 696.6226196289062, 189.53211975097656, 301.86761474609375, 76.98890686035156, 320.8920593261719, 393.302978515625, 299.21234130859375, 115.34011840820312, 99.05783081054688, 825.4210815429688, 173.6863250732422, 4.478157043457031, 91.02081298828125, 329.7221984863281, 118.62017822265625, 271.3111877441406, 342.5719909667969, 251.58346557617188, 95.66041564941406, 6.508960723876953, 464.5174865722656, -354.916259765625, 328.5371398925781, 138.89712524414062, 433.8299865722656, 341.0452880859375, 754.8187866210938, 445.70062255859375, 324.0660095214844, 149.42324829101562, 327.9637756347656, 183.96292114257812, 388.8661804199219, 52.86846923828125, -98.2017822265625, 509.82427978515625, 520.8067626953125, 183.6547088623047, 165.42294311523438, 173.75527954101562, 212.84423828125, -51.043128967285156, 610.7227783203125, 587.013671875, -190.67459106445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000343.npy"} +{"epoch": 0.5036710719530103, "step": 344, "batch_size": 64, "mean": 206.81173706054688, "std": 281.90093994140625, "min": -270.578125, "p10": -49.918048095703114, "median": 176.6980438232422, "p90": 561.5502197265625, "max": 1470.255126953125, "pos_frac": 0.78125, "sample": [49.49113464355469, -73.6865463256836, -5.9020233154296875, -12.425834655761719, 53.176395416259766, 73.29074096679688, 199.2345733642578, 88.88129425048828, 79.35868835449219, 293.462646484375, -11.649351119995117, 836.0181274414062, 82.85503387451172, 261.07269287109375, 138.35899353027344, 194.10360717773438, 177.54554748535156, -270.578125, -233.07839965820312, 282.3045349121094, 212.21534729003906, 53.60133361816406, 175.8505401611328, 0.3858833312988281, 232.39727783203125, 606.536865234375, 584.4889526367188, 395.2531433105469, 93.05648040771484, -38.87420654296875, 353.5586242675781, 241.1011962890625, 271.68048095703125, 91.16268920898438, 179.66278076171875, -18.818450927734375, 366.2840576171875, -54.651123046875, 304.70025634765625, -6.98967170715332, 534.1259765625, 280.7088317871094, 201.82664489746094, 12.330070495605469, 92.5707015991211, 1470.255126953125, 443.45947265625, 291.8017883300781, 12.428361892700195, 638.6590576171875, -21.858375549316406, 572.4127197265625, 163.65419006347656, 565.7294921875, 73.71177673339844, -212.76380920410156, -147.81402587890625, 452.9810791015625, 551.798583984375, -206.48995971679688, 279.1020812988281, 390.1280822753906, 38.72889709472656, 514.0283203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000344.npy"} +{"epoch": 0.5051395007342144, "step": 345, "batch_size": 64, "mean": 233.68710327148438, "std": 273.42999267578125, "min": -423.1139221191406, "p10": -55.48074645996093, "median": 199.5896224975586, "p90": 616.8588562011722, "max": 939.6617431640625, "pos_frac": 0.8125, "sample": [-15.325874328613281, 650.5565185546875, 91.54428100585938, 382.26348876953125, 143.5431365966797, -207.28964233398438, 724.77685546875, -169.31687927246094, 164.01934814453125, 445.9998779296875, -22.732961654663086, 320.3037109375, 118.54058837890625, 411.6398010253906, 543.0505981445312, 129.86135864257812, -307.0761413574219, 333.32501220703125, 18.91238021850586, 74.69697570800781, 426.6326599121094, 406.50787353515625, 73.37528991699219, 368.3485107421875, 250.31463623046875, 537.666015625, 116.44878387451172, 305.91204833984375, 93.80972290039062, 194.95033264160156, 29.580108642578125, 822.322998046875, 411.32012939453125, 74.05308532714844, 97.88265991210938, 438.5781555175781, 433.23516845703125, 447.80902099609375, 363.7345275878906, 236.9794464111328, 417.56964111328125, 939.6617431640625, 44.75735092163086, 141.44189453125, 407.4793701171875, 295.1803894042969, -2.742443084716797, 752.683349609375, 204.22891235351562, 474.229736328125, -49.23888397216797, -136.15023803710938, 206.41574096679688, 648.490966796875, -10.77479362487793, -200.33126831054688, -58.15583038330078, 148.86758422851562, 659.790771484375, 216.18704223632812, 176.8895263671875, 127.85519409179688, -423.1139221191406, 13.998825073242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000345.npy"} +{"epoch": 0.5066079295154186, "step": 346, "batch_size": 64, "mean": 220.168212890625, "std": 256.4474792480469, "min": -561.2949829101562, "p10": -89.96243438720703, "median": 242.04862213134766, "p90": 584.5332519531253, "max": 754.9864501953125, "pos_frac": 0.8125, "sample": [284.787353515625, 137.81556701660156, 36.99303436279297, 337.70599365234375, 631.2586669921875, 527.5384521484375, 689.10107421875, 228.83309936523438, 137.03477478027344, 740.8250122070312, -561.2949829101562, 111.28355407714844, 403.97442626953125, 85.08761596679688, 326.9662780761719, 639.819091796875, -12.812225341796875, 318.1630859375, 754.9864501953125, 59.98675537109375, 332.48541259765625, 181.16897583007812, -144.08450317382812, 400.81243896484375, 308.9714660644531, 354.671630859375, 331.1775817871094, 262.65411376953125, 398.0744323730469, 45.656402587890625, 13.158323287963867, -87.61641693115234, 213.50660705566406, 61.10923767089844, 608.9595947265625, 179.14305114746094, 255.26414489746094, 124.78152465820312, 301.6272888183594, -133.19749450683594, -44.31372833251953, 613.920166015625, 322.7279357910156, 516.484130859375, 173.3845977783203, 356.84625244140625, 384.839111328125, 348.5670166015625, 19.059429168701172, -88.32989501953125, 273.3573303222656, -90.66209411621094, 80.56893920898438, -195.08868408203125, 95.63975524902344, 514.8493041992188, 135.31460571289062, 151.3653564453125, 438.7681579589844, -237.90756225585938, -16.222381591796875, -171.18612670898438, 308.7441101074219, 313.66290283203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000346.npy"} +{"epoch": 0.5080763582966226, "step": 347, "batch_size": 64, "mean": 220.22601318359375, "std": 285.9312438964844, "min": -334.8613586425781, "p10": -112.49094085693359, "median": 185.0916290283203, "p90": 636.6517822265627, "max": 909.8409423828125, "pos_frac": 0.765625, "sample": [-334.8613586425781, 283.5430908203125, 468.85882568359375, 344.5904235839844, 88.33440399169922, -34.22107696533203, 225.81271362304688, 284.06964111328125, 676.0675048828125, 76.97411346435547, 425.43963623046875, 34.76025390625, 168.9235382080078, -80.75286865234375, 423.5916748046875, -313.80224609375, -291.3729553222656, -114.80776977539062, 770.3792724609375, 99.27410125732422, 527.850830078125, 37.06631851196289, 230.4564208984375, 340.61895751953125, -208.4757080078125, 526.052001953125, 598.613037109375, 297.84967041015625, 16.558692932128906, 652.9541015625, 314.62615966796875, 671.7171020507812, 346.0341796875, -65.74930572509766, 206.99966430664062, -97.64745330810547, 503.6646728515625, 147.14572143554688, 146.45465087890625, -107.08500671386719, 165.96559143066406, 285.2790222167969, 87.68785858154297, 337.56951904296875, 159.04446411132812, 102.85049438476562, 117.58404541015625, -4.825202941894531, 909.8409423828125, 84.74005889892578, 378.19769287109375, 654.2498168945312, 201.2597198486328, 815.1336669921875, -97.0971450805664, 92.18122100830078, 343.3182067871094, -229.2155303955078, 416.6074523925781, -3.8075408935546875, 465.2385559082031, -164.48472595214844, 130.89317321777344, 559.748291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000347.npy"} +{"epoch": 0.5095447870778267, "step": 348, "batch_size": 64, "mean": 181.44418334960938, "std": 260.2834777832031, "min": -331.294189453125, "p10": -133.92335433959957, "median": 199.37931060791016, "p90": 433.4000854492188, "max": 1103.7850341796875, "pos_frac": 0.765625, "sample": [332.5848693847656, 78.01512908935547, 178.91259765625, 426.3167724609375, 361.0412292480469, -78.0357437133789, 87.8730239868164, 339.0834045410156, 403.21441650390625, -331.294189453125, 114.56111145019531, 65.55914306640625, 605.9849853515625, 308.4425354003906, 135.65821838378906, 288.7143859863281, 13.478199005126953, 297.3711853027344, 135.64886474609375, 81.2010726928711, 315.1932678222656, 306.02313232421875, -106.14986419677734, 1103.7850341796875, 414.2528076171875, 271.5618591308594, -92.80986785888672, 227.54910278320312, -32.063323974609375, 73.47338104248047, -98.12684631347656, -145.82627868652344, -177.38815307617188, 848.2289428710938, 573.71923828125, 329.15118408203125, 308.2784423828125, 230.6233367919922, 230.3726806640625, 3.6806468963623047, -280.01904296875, 390.1525573730469, -32.97724151611328, 319.7829284667969, 269.6520080566406, 233.86346435546875, -189.000244140625, 12.193826675415039, 545.8564453125, 281.2030029296875, 95.63309478759766, -91.12400817871094, 436.435791015625, -245.77203369140625, 335.5711364746094, 219.8460235595703, 81.62853240966797, 27.980411529541016, 28.534393310546875, 544.5438232421875, 95.9805908203125, 298.3619689941406, -146.84779357910156, -46.911319732666016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000348.npy"} +{"epoch": 0.5110132158590308, "step": 349, "batch_size": 64, "mean": 213.40219116210938, "std": 302.7085266113281, "min": -494.3605041503906, "p10": -93.46201019287109, "median": 146.0917510986328, "p90": 586.9040405273438, "max": 1200.22998046875, "pos_frac": 0.765625, "sample": [134.12612915039062, -366.5832214355469, 480.9944152832031, 128.5244903564453, 100.28599548339844, 112.30532836914062, 471.19329833984375, -91.83202362060547, 243.98812866210938, 409.3927917480469, 152.50555419921875, 527.4595947265625, 154.1434783935547, -40.868736267089844, 47.67974090576172, 65.81851959228516, -26.341650009155273, 30.43682098388672, 233.7414093017578, -217.66397094726562, 493.36151123046875, 345.1891174316406, 376.1065673828125, 18.803937911987305, 333.6448669433594, -69.22962951660156, 429.19244384765625, -94.16057586669922, 99.21733856201172, 166.66940307617188, 816.2769165039062, 15.709892272949219, -20.309185028076172, 519.7001953125, 183.6260986328125, -494.3605041503906, -120.59984588623047, 107.17606353759766, 583.7913818359375, 44.79776382446289, 735.2403564453125, -4.839263916015625, -250.61770629882812, -42.21443176269531, 588.238037109375, 614.96435546875, 850.2982177734375, 192.34605407714844, 600.9570922851562, 139.67794799804688, 437.9051513671875, 490.715576171875, 4.058540344238281, 317.474365234375, 290.50958251953125, 282.8770446777344, -143.4583740234375, 63.50630187988281, -18.824512481689453, 469.5083923339844, 400.542724609375, 80.44549560546875, 74.29010009765625, 1200.22998046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000349.npy"} +{"epoch": 0.5124816446402349, "step": 350, "batch_size": 64, "mean": 206.09213256835938, "std": 287.3081970214844, "min": -540.822265625, "p10": -188.86669769287107, "median": 192.16341400146484, "p90": 505.84445800781265, "max": 909.2736206054688, "pos_frac": 0.828125, "sample": [814.4567260742188, -360.9170227050781, 123.34186553955078, 385.4510803222656, 330.8413391113281, 184.03131103515625, 403.6324157714844, 89.1085205078125, -142.6424102783203, 114.59727478027344, 94.63778686523438, 448.400634765625, -389.7562255859375, 167.11500549316406, 196.56655883789062, 12.955413818359375, 724.730224609375, 433.38543701171875, 909.2736206054688, 290.6800842285156, 179.76840209960938, 295.5799865722656, -31.6729793548584, 461.8616027832031, 215.934326171875, 799.9005737304688, 104.45120239257812, -245.03228759765625, 127.39270782470703, 57.18219757080078, 523.896240234375, 116.778564453125, 153.06015014648438, 180.09414672851562, 359.3236999511719, 305.24200439453125, 118.06771087646484, 221.31394958496094, -200.71347045898438, 674.09033203125, 187.76026916503906, 463.7236328125, 323.788330078125, -206.89376831054688, 126.84729766845703, 242.5400848388672, 600.8248901367188, 423.2947082519531, 212.84518432617188, 299.4982604980469, -540.822265625, 430.43511962890625, 143.14662170410156, 242.62872314453125, 59.500213623046875, 206.56973266601562, -383.35498046875, -45.805572509765625, 23.743309020996094, 187.65762329101562, 432.535888671875, 210.879638671875, -161.22422790527344, 463.369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000350.npy"} +{"epoch": 0.5139500734214391, "step": 351, "batch_size": 64, "mean": 253.93798828125, "std": 324.66754150390625, "min": -422.4456787109375, "p10": -114.25879592895501, "median": 232.38806915283203, "p90": 719.6615234375, "max": 1229.544677734375, "pos_frac": 0.78125, "sample": [103.017333984375, 293.2396240234375, 504.19378662109375, 1089.55029296875, 1229.544677734375, 141.89674377441406, 398.6943359375, 112.15336608886719, 278.597412109375, 310.80487060546875, 66.4312744140625, 242.218505859375, 734.8220825195312, 842.0382690429688, 385.22271728515625, 342.8908996582031, 509.93768310546875, 582.5311889648438, 707.587890625, 1.1016483306884766, -205.28366088867188, 375.868896484375, 126.75228881835938, 246.7840576171875, 156.80264282226562, 336.62017822265625, 561.4234008789062, 238.6744842529297, -422.4456787109375, -49.75190734863281, -324.7501525878906, 132.60772705078125, -140.6925811767578, 121.11846160888672, -39.14862060546875, 887.9114990234375, 27.886810302734375, 121.93685913085938, 232.330078125, 15.871482849121094, -145.77395629882812, 349.2059326171875, 15.906269073486328, 724.8359375, 110.11129760742188, -11.558191299438477, 215.2702178955078, -29.690338134765625, -30.37750244140625, -154.28709411621094, -52.57996368408203, 660.142333984375, 195.48390197753906, 382.69378662109375, 747.385009765625, -169.1682586669922, 282.1267395019531, 634.38232421875, 277.19830322265625, 293.8182067871094, 138.64686584472656, 355.87762451171875, 232.44606018066406, -47.05449676513672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000351.npy"} +{"epoch": 0.5154185022026432, "step": 352, "batch_size": 64, "mean": 158.47000122070312, "std": 285.0350341796875, "min": -622.8960571289062, "p10": -160.8986633300781, "median": 129.08557891845703, "p90": 505.80632019042974, "max": 1030.3193359375, "pos_frac": 0.71875, "sample": [409.06793212890625, 363.63470458984375, 154.99215698242188, 509.7094421386719, -315.4535827636719, 50.38005065917969, 66.07331085205078, 421.260009765625, 43.75132751464844, 239.3072052001953, 736.712158203125, 464.8265380859375, -46.10923385620117, 114.75811004638672, 242.96617126464844, -44.47099304199219, -112.8817138671875, 1030.3193359375, 275.7903137207031, -146.39874267578125, 176.5078125, 719.9412841796875, 137.2115936279297, 429.23773193359375, 43.59101867675781, -88.73596954345703, -127.68441772460938, -265.7576904296875, 3.9751663208007812, 99.02316284179688, 136.00167846679688, 496.69903564453125, 16.865888595581055, 17.24530792236328, 571.1426391601562, 130.1224365234375, -622.8960571289062, 58.61651611328125, 195.7205810546875, 353.14129638671875, 260.11090087890625, 121.23593139648438, 180.6060028076172, -176.03302001953125, -85.76408386230469, -19.080596923828125, -252.32333374023438, 571.885986328125, 484.98553466796875, -172.0721893310547, 262.21490478515625, -84.0338363647461, -167.1129150390625, 428.76800537109375, 252.02658081054688, 192.31903076171875, 441.956787109375, -87.38481903076172, 606.7493896484375, 128.04872131347656, 5.552947998046875, -31.359054565429688, 228.65740966796875, 113.9217529296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000352.npy"} +{"epoch": 0.5168869309838473, "step": 353, "batch_size": 64, "mean": 203.07354736328125, "std": 289.1540832519531, "min": -402.90374755859375, "p10": -85.00876846313476, "median": 160.27713012695312, "p90": 636.3913330078126, "max": 871.6627197265625, "pos_frac": 0.703125, "sample": [395.15557861328125, 405.2440490722656, 74.7791519165039, 468.53448486328125, 188.04428100585938, 415.9888000488281, 38.95130920410156, -124.86300659179688, 611.3255004882812, 183.673095703125, 287.7742004394531, -80.50702667236328, 249.64369201660156, 552.1220703125, 29.726806640625, 337.58392333984375, -143.0163116455078, -27.86517333984375, 611.1573486328125, 662.9164428710938, 157.66725158691406, 273.7257385253906, -63.49989700317383, 89.03460693359375, -402.90374755859375, 430.16546630859375, -205.62445068359375, -86.04413604736328, -80.16693878173828, 293.00701904296875, -43.99772644042969, 155.956298828125, 344.1090393066406, 317.01422119140625, 11.8267822265625, -22.47249984741211, 11.977508544921875, -157.23052978515625, 831.5740966796875, 1.5961170196533203, -24.23883056640625, 655.5252075195312, -65.8809585571289, 45.65174102783203, 112.19014739990234, 324.29693603515625, 331.76678466796875, 162.8870086669922, -46.784393310546875, 643.9130859375, 280.3671569824219, -82.59291076660156, 618.840576171875, 368.1818542480469, 871.6627197265625, 812.4207763671875, -232.35121154785156, 367.4639892578125, 669.187255859375, -80.96633911132812, 59.57817840576172, 27.640850067138672, -59.475433349609375, 245.34005737304688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000353.npy"} +{"epoch": 0.5183553597650514, "step": 354, "batch_size": 64, "mean": 228.19166564941406, "std": 238.4373016357422, "min": -197.09796142578125, "p10": -64.34891357421873, "median": 198.8369369506836, "p90": 523.5001373291017, "max": 958.309326171875, "pos_frac": 0.796875, "sample": [347.88775634765625, 535.8257446289062, 203.03819274902344, 40.0179443359375, 459.1271057128906, 194.63568115234375, 659.7177734375, -82.38710021972656, 276.3802795410156, -91.52934265136719, -75.57917022705078, -197.09796142578125, 243.98155212402344, 958.309326171875, 471.2592468261719, 142.86679077148438, -27.693870544433594, 344.4136962890625, 409.1319580078125, 312.8858947753906, 22.79749298095703, 384.2773742675781, 169.35189819335938, 155.26052856445312, -144.74783325195312, 106.70318603515625, 376.0868225097656, 194.19381713867188, 90.18517303466797, 401.1225891113281, 551.9136352539062, -79.63084411621094, 301.36907958984375, 267.4885559082031, 205.68661499023438, 203.86024475097656, -132.322021484375, 476.1046447753906, 156.1763916015625, 114.34822082519531, 494.7403869628906, -38.144981384277344, 342.8132019042969, 186.15264892578125, -16.91857147216797, 305.0018310546875, 78.1694564819336, 685.9358520507812, 383.4161376953125, -15.823272705078125, 10.882055282592773, 418.0732421875, 352.4788818359375, 152.2964324951172, -6.277595520019531, 788.1542358398438, 27.815208435058594, 384.50189208984375, 385.1470642089844, -18.224441528320312, 543.5726318359375, 123.742431640625, 86.63668823242188, 4.707118988037109], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000354.npy"} +{"epoch": 0.5198237885462555, "step": 355, "batch_size": 64, "mean": 224.4081268310547, "std": 240.5799102783203, "min": -375.7923583984375, "p10": -50.24597434997557, "median": 223.81344604492188, "p90": 522.1337951660156, "max": 960.360107421875, "pos_frac": 0.796875, "sample": [118.22297668457031, 512.7081298828125, 477.7782897949219, 513.1898803710938, 31.380882263183594, 270.39959716796875, 237.53704833984375, 960.360107421875, -375.7923583984375, 280.57928466796875, 217.21530151367188, 563.2998046875, -3.9139480590820312, -17.79011344909668, 521.0565795898438, 594.3554077148438, -69.83946228027344, 582.4213256835938, 177.94703674316406, 93.02741241455078, 397.13348388671875, 205.21218872070312, -57.27558898925781, -33.84354019165039, 308.2038269042969, 491.01861572265625, 496.68792724609375, 306.0874938964844, 150.5708770751953, -3.9816741943359375, -23.635494232177734, 249.11807250976562, 87.63810729980469, -329.4914855957031, 241.90478515625, -173.6977081298828, 351.9434814453125, 527.6295166015625, 343.3914794921875, 613.1338500976562, 62.001869201660156, 233.5142822265625, 110.94145202636719, 167.90997314453125, 157.1820068359375, 280.9444580078125, 64.55475616455078, 344.6852722167969, 90.81077575683594, 217.4481201171875, 522.595458984375, 443.4739990234375, 353.0263671875, 226.03961181640625, 111.51475524902344, -29.71417236328125, 151.0362091064453, -145.19403076171875, 221.5872802734375, -77.51530456542969, 84.0655517578125, 328.898193359375, 352.7450866699219, 257.6763916015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000355.npy"} +{"epoch": 0.5212922173274597, "step": 356, "batch_size": 64, "mean": 125.88236999511719, "std": 306.4451599121094, "min": -634.1937255859375, "p10": -226.50109405517577, "median": 92.38515090942383, "p90": 511.1202667236329, "max": 1004.815673828125, "pos_frac": 0.65625, "sample": [197.76852416992188, -8.567298889160156, 323.4600524902344, 91.0346908569336, -13.171527862548828, -90.31205749511719, -98.02752685546875, 175.01773071289062, -158.29966735839844, 249.6455078125, 340.88916015625, 0.9572944641113281, 740.8629760742188, 492.1963195800781, -115.43556213378906, 613.1182250976562, -152.62559509277344, -214.1513214111328, -634.1937255859375, -231.79385375976562, 74.6906967163086, 787.009033203125, -104.96452331542969, 122.1043701171875, 345.707275390625, 519.2305297851562, -46.664695739746094, 381.5221252441406, -283.1169738769531, 55.307395935058594, -300.09356689453125, 694.8455200195312, 93.73561096191406, -279.1387023925781, 397.13580322265625, 110.89222717285156, 272.59808349609375, 381.0640869140625, -181.97213745117188, 49.25044250488281, -71.26887512207031, -105.3867416381836, 194.316650390625, 238.6597900390625, 8.282707214355469, 250.7373046875, 334.20208740234375, -206.7229461669922, 717.4090576171875, 250.80899047851562, 170.544189453125, 26.940460205078125, -334.7940673828125, -20.66516876220703, 195.76788330078125, 125.11881256103516, 76.60386657714844, 278.1892395019531, -394.367919921875, 1004.815673828125, 323.3502197265625, 314.21185302734375, 11.504814147949219, 70.69800567626953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000356.npy"} +{"epoch": 0.5227606461086637, "step": 357, "batch_size": 64, "mean": 200.40087890625, "std": 252.05979919433594, "min": -565.578369140625, "p10": -61.99346542358398, "median": 179.6533203125, "p90": 581.4231079101563, "max": 783.540771484375, "pos_frac": 0.78125, "sample": [396.306396484375, 573.94775390625, 67.89662170410156, 622.4180908203125, 551.8095703125, 13.276412963867188, 33.61604309082031, 397.8192138671875, 115.41567993164062, 584.6268310546875, 256.5135498046875, 94.16020965576172, -121.43720245361328, 600.5704345703125, -57.4580078125, 164.12448120117188, 195.18215942382812, 207.28884887695312, 302.6717529296875, -86.68501281738281, 668.69287109375, -21.850440979003906, 333.9043884277344, -54.84925842285156, -565.578369140625, 61.73906326293945, 294.00628662109375, 131.81060791015625, -63.937232971191406, 595.2318725585938, 278.22247314453125, -158.13134765625, 498.0428161621094, -164.19091796875, 357.92095947265625, 16.192150115966797, 355.07763671875, 783.540771484375, 299.1900634765625, 255.35427856445312, 17.272186279296875, -27.19376564025879, 292.01513671875, 568.6630859375, 56.32752227783203, 66.22335815429688, 34.06260681152344, 239.05374145507812, -0.16943740844726562, 203.01025390625, -53.61572265625, 263.55279541015625, 161.88916015625, 9.271896362304688, 271.1770324707031, 334.1602783203125, 95.6203842163086, 338.8797912597656, -92.77700805664062, 285.27178955078125, 107.27772521972656, 691.37646484375, -8.10382080078125, 159.95889282226562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000357.npy"} +{"epoch": 0.5242290748898678, "step": 358, "batch_size": 64, "mean": 188.8855743408203, "std": 260.2104187011719, "min": -409.231201171875, "p10": -64.18499221801757, "median": 120.90529251098633, "p90": 531.0541687011721, "max": 932.255859375, "pos_frac": 0.765625, "sample": [74.88504028320312, 665.661376953125, 25.542068481445312, 278.5182189941406, 249.6669158935547, 305.8791809082031, 70.47697448730469, 293.8421630859375, 124.47802734375, 47.6842041015625, 20.2816162109375, -66.48358154296875, -88.83578491210938, -26.636001586914062, 3.8823070526123047, 789.6392822265625, -58.821617126464844, 555.0004272460938, 65.02334594726562, 100.91693115234375, -6.398397445678711, -35.93778991699219, 18.18115234375, 34.76300811767578, 147.2484130859375, -409.231201171875, 322.79620361328125, -149.4500732421875, 932.255859375, 406.02203369140625, 440.0394287109375, 167.3052978515625, 299.44659423828125, -12.026100158691406, 584.41455078125, 202.65426635742188, -15.629142761230469, 97.60302734375, -94.20360565185547, 330.4375915527344, 317.128662109375, 407.0651550292969, -217.97103881835938, 325.0396423339844, 399.4010925292969, -55.304412841796875, 406.9628601074219, 747.6143798828125, 17.120899200439453, 123.04515075683594, 378.81683349609375, 173.52120971679688, 78.9383544921875, -34.242042541503906, 475.1795654296875, 880.0791625976562, 76.52355194091797, 105.3866195678711, 210.4205780029297, -75.52297973632812, 199.8083038330078, 118.76543426513672, 222.36436462402344, 117.64303588867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000358.npy"} +{"epoch": 0.5256975036710719, "step": 359, "batch_size": 64, "mean": 224.70556640625, "std": 293.3180847167969, "min": -411.05682373046875, "p10": -120.74913940429687, "median": 167.94786834716797, "p90": 657.4714538574221, "max": 1010.2642822265625, "pos_frac": 0.75, "sample": [-411.05682373046875, 225.88125610351562, 188.24920654296875, 423.0404052734375, -148.62680053710938, 697.437744140625, -208.92755126953125, 837.2565307617188, 120.79674530029297, -50.69136047363281, -122.82362365722656, -32.396583557128906, 392.3507385253906, 306.86383056640625, -214.8529052734375, 128.5017852783203, 390.27532958984375, -53.477691650390625, 463.3099365234375, -202.82376098632812, 244.650146484375, 225.88235473632812, 489.83465576171875, 52.345664978027344, 55.48738479614258, 169.0489044189453, -42.09880828857422, 566.3650512695312, 114.68994140625, 66.0663070678711, 471.80194091796875, 279.3280944824219, 1010.2642822265625, 448.01812744140625, -115.90867614746094, -1.642965316772461, 166.84683227539062, 101.93183135986328, -252.21063232421875, 727.5781860351562, 163.150146484375, 504.6936340332031, 276.92889404296875, 424.97271728515625, -36.251312255859375, 161.76724243164062, 516.8883056640625, 259.4737548828125, 153.16970825195312, 720.3143920898438, 69.57910919189453, -36.84629821777344, -66.20695495605469, 308.672607421875, 204.7183380126953, 691.0513305664062, 41.427734375, 205.1520538330078, 764.3436279296875, 559.4943237304688, 93.3360366821289, 579.118408203125, 150.60362243652344, 165.04080200195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000359.npy"} +{"epoch": 0.527165932452276, "step": 360, "batch_size": 64, "mean": 160.10589599609375, "std": 201.30638122558594, "min": -157.98800659179688, "p10": -84.15243225097655, "median": 123.66600036621094, "p90": 430.6164764404297, "max": 602.581298828125, "pos_frac": 0.765625, "sample": [72.60397338867188, 487.18450927734375, 16.674278259277344, 146.1524658203125, -19.821741104125977, 349.68597412109375, 38.828712463378906, 175.6046142578125, 252.25576782226562, 83.11592102050781, 160.30917358398438, 516.372314453125, -136.00169372558594, 327.0738830566406, 602.581298828125, 114.91389465332031, 41.32950973510742, -44.33103942871094, 274.61712646484375, 116.94497680664062, 351.2546691894531, 197.109130859375, -62.178916931152344, 229.64378356933594, 9.282711029052734, 323.7039794921875, 409.8365478515625, 0.7383880615234375, 0.7033271789550781, 423.6487121582031, 519.1878051757812, 310.08038330078125, 540.116455078125, -68.4967041015625, 42.957847595214844, 95.99461364746094, 218.66009521484375, 144.97128295898438, 401.04840087890625, -66.13055419921875, 74.17849731445312, 562.817138671875, -157.98800659179688, -90.86203002929688, -18.295501708984375, 131.9481201171875, -131.15914916992188, 130.38702392578125, -54.44062042236328, -128.1388397216797, 13.397003173828125, 282.02117919921875, 30.057775497436523, 402.9385681152344, 349.7068786621094, 290.70477294921875, 236.2711944580078, -105.2356948852539, 71.29864501953125, 433.6026611328125, 86.61592102050781, -96.2167739868164, -19.075050354003906, 354.0176696777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000360.npy"} +{"epoch": 0.5286343612334802, "step": 361, "batch_size": 64, "mean": 163.7840576171875, "std": 259.81756591796875, "min": -471.3898010253906, "p10": -132.01415939331054, "median": 175.52256774902344, "p90": 490.68079223632816, "max": 899.111083984375, "pos_frac": 0.703125, "sample": [36.563682556152344, 208.66366577148438, 48.1236686706543, 332.52716064453125, -59.46521759033203, -290.030029296875, -85.34845733642578, 178.84568786621094, 558.21923828125, 142.8969268798828, 193.26492309570312, 258.2210388183594, -471.3898010253906, 423.88299560546875, -49.69306945800781, -147.07066345214844, 199.87088012695312, 120.61988830566406, -299.96533203125, 19.549619674682617, 213.19403076171875, -106.61199188232422, -62.859375, -47.107845306396484, 197.7414093017578, 626.912109375, 300.8667907714844, 416.84991455078125, 155.57916259765625, 387.8201904296875, -78.78150939941406, -93.76766967773438, 240.06581115722656, 142.05184936523438, 421.4284362792969, 468.8055419921875, -200.96360778808594, 899.111083984375, 669.4783935546875, 486.24713134765625, 226.93040466308594, 190.93174743652344, 547.2060546875, 402.3061218261719, 172.19944763183594, 169.33486938476562, 76.37664031982422, 202.88754272460938, -78.34915161132812, -91.92574310302734, 29.77985382080078, -58.17837905883789, 531.3639526367188, -2.6739501953125, 347.60693359375, 323.815673828125, 153.05099487304688, -142.9008026123047, 264.32684326171875, 75.25846862792969, 180.946044921875, -262.7623596191406, 492.5809326171875, 377.72149658203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000361.npy"} +{"epoch": 0.5301027900146843, "step": 362, "batch_size": 64, "mean": 270.9095153808594, "std": 272.2356262207031, "min": -126.67770385742188, "p10": -32.193933105468744, "median": 213.44778442382812, "p90": 629.2233215332031, "max": 1315.2091064453125, "pos_frac": 0.84375, "sample": [98.49105834960938, 108.13796997070312, 478.90435791015625, 582.78125, 330.0921630859375, -29.695152282714844, 1315.2091064453125, 267.40313720703125, 135.19808959960938, 606.608154296875, 532.8388671875, 102.34581756591797, 17.149085998535156, 90.61795043945312, 243.23472595214844, 495.0994567871094, 143.04991149902344, -33.26483917236328, 33.797607421875, 359.6214599609375, 528.747802734375, 115.09646606445312, 16.150854110717773, -19.71331787109375, -45.82695007324219, -15.314477920532227, 135.40451049804688, 163.6787567138672, -83.55225372314453, 683.1929931640625, 611.2517700195312, 108.291748046875, -49.054473876953125, 277.3326110839844, 227.87965393066406, 198.128173828125, 683.5980224609375, -46.503753662109375, 201.13597106933594, 636.9254150390625, 225.7595977783203, 107.58038330078125, 707.014892578125, 432.4812316894531, 124.30228424072266, 122.91971588134766, 375.5538330078125, 186.22198486328125, 64.53773498535156, 747.1544189453125, 236.89706420898438, 816.3407592773438, 496.5503234863281, 378.4884033203125, 256.1536865234375, 492.6634216308594, 492.522705078125, -66.10220336914062, 121.0592269897461, 381.0418395996094, 275.66656494140625, 262.6658020019531, 22.943756103515625, -126.67770385742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000362.npy"} +{"epoch": 0.5315712187958884, "step": 363, "batch_size": 64, "mean": 276.02301025390625, "std": 277.0833740234375, "min": -296.2618408203125, "p10": -17.953380584716797, "median": 262.46885681152344, "p90": 637.0526855468751, "max": 1318.193359375, "pos_frac": 0.828125, "sample": [-296.2618408203125, 325.308837890625, 556.087646484375, 239.40878295898438, 72.33436584472656, 225.65057373046875, 263.606689453125, 298.280517578125, 248.5569610595703, 123.57486724853516, 397.0899658203125, 243.76121520996094, 295.3213806152344, 65.3964614868164, 335.5811767578125, 828.3553466796875, -30.96587562561035, 600.638916015625, 60.51087951660156, 740.62841796875, 484.86981201171875, 77.14110565185547, 316.4898376464844, 387.0837707519531, 272.3815612792969, 410.17608642578125, 1318.193359375, 610.1077270507812, -43.1146240234375, 590.2783813476562, 216.46324157714844, 224.01649475097656, 19.079559326171875, 263.5269470214844, 449.1882019042969, 53.08118438720703, 218.0887908935547, -42.769989013671875, 648.6005249023438, 486.4639892578125, 393.7021179199219, -170.70040893554688, 41.41827392578125, 452.5832824707031, -9.612401962280273, 374.17816162109375, 705.8418579101562, -16.9312744140625, 261.4107666015625, 389.3465576171875, -8.447792053222656, 132.5080108642578, -97.71175384521484, 387.313232421875, 269.7020263671875, 24.866004943847656, -5.575660705566406, 220.361572265625, 27.369197845458984, 27.15363311767578, 709.7304077148438, 695.2568359375, -18.39142608642578, 327.88946533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000363.npy"} +{"epoch": 0.5330396475770925, "step": 364, "batch_size": 64, "mean": 206.69784545898438, "std": 280.54583740234375, "min": -531.979736328125, "p10": -79.00487670898436, "median": 192.91400146484375, "p90": 561.0714416503906, "max": 913.1270141601562, "pos_frac": 0.796875, "sample": [428.3495788574219, 251.06996154785156, -235.44664001464844, 35.55663299560547, -531.979736328125, 561.4849853515625, -302.2320861816406, 324.4101867675781, 224.67141723632812, 913.1270141601562, 178.03868103027344, 682.5673828125, 425.2515869140625, 330.28076171875, -53.779624938964844, 72.23728942871094, 56.707000732421875, -269.46978759765625, 307.99188232421875, -63.450469970703125, -98.16595458984375, 417.9654235839844, 223.5526123046875, 448.2863464355469, 326.0611572265625, 24.74907112121582, 9.16299057006836, 505.4217529296875, 207.78932189941406, 251.69549560546875, -26.921249389648438, 668.1683349609375, 133.87118530273438, 4.898956298828125, 137.025390625, 62.1846923828125, 470.53607177734375, 367.88067626953125, 288.35491943359375, -85.67105102539062, 86.06014251708984, 587.715576171875, 635.7139282226562, 135.83103942871094, 268.44140625, -46.26288604736328, -15.014841079711914, 353.80181884765625, 560.1065063476562, 378.0027160644531, 54.7588005065918, 159.83116149902344, 6.715299606323242, 151.3306884765625, 907.6602783203125, 72.46795654296875, 346.093994140625, 287.698974609375, -2.770792007446289, 261.5341796875, 74.34408569335938, 165.4578857421875, 485.03704833984375, -358.1249084472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000364.npy"} +{"epoch": 0.5345080763582967, "step": 365, "batch_size": 64, "mean": 218.00540161132812, "std": 263.7609558105469, "min": -199.95213317871094, "p10": -73.59536743164061, "median": 140.25416564941406, "p90": 554.889196777344, "max": 1264.002685546875, "pos_frac": 0.859375, "sample": [179.11703491210938, 106.63468170166016, 118.30010986328125, 136.53077697753906, 132.70343017578125, 137.3994140625, 335.6293029785156, 137.001953125, 242.26699829101562, 102.31210327148438, 317.54071044921875, 506.77581787109375, -81.80325317382812, 215.88070678710938, 51.38478088378906, -54.443634033203125, 84.98979187011719, 771.3493041992188, 582.9484252929688, 326.2999572753906, 107.53919219970703, 56.11042022705078, 74.29500579833984, 18.7869873046875, 186.86007690429688, -166.82525634765625, 100.12031555175781, 1.5705299377441406, 180.69024658203125, 134.62646484375, -140.4829559326172, -45.326900482177734, 111.0181655883789, 39.38961410522461, 403.53125, 664.8314819335938, 391.1079406738281, 63.47169876098633, -118.48281860351562, 454.58636474609375, 284.2005615234375, -168.8353271484375, 243.6294708251953, 36.18976593017578, 84.65508270263672, 335.6429443359375, 267.03668212890625, 232.06317138671875, 322.8642883300781, -141.386962890625, 87.19007110595703, 413.6668395996094, 43.292991638183594, 287.3382873535156, 238.0939483642578, 253.91796875, 1264.002685546875, 575.5092163085938, 501.8890380859375, -199.95213317871094, 785.204345703125, 765.99462890625, 143.10891723632812, 430.7931823730469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000365.npy"} +{"epoch": 0.5359765051395007, "step": 366, "batch_size": 64, "mean": 217.76443481445312, "std": 283.3239440917969, "min": -324.861328125, "p10": -123.26521835327148, "median": 192.16505432128906, "p90": 608.4205017089844, "max": 952.6790161132812, "pos_frac": 0.78125, "sample": [-119.03008270263672, 222.9935302734375, 303.7890319824219, 104.31594848632812, -317.4493103027344, 35.111881256103516, -92.21904754638672, 76.772216796875, -80.81814575195312, 451.5819091796875, 94.69235229492188, -225.24105834960938, 480.471923828125, 659.1760864257812, 186.45565795898438, -80.66932678222656, 340.3964538574219, 22.28091812133789, 99.82823944091797, -115.68641662597656, 349.4653015136719, 572.4686889648438, 374.02423095703125, 20.863372802734375, 620.8023681640625, 952.6790161132812, 346.0774841308594, 97.5761947631836, 595.391845703125, 215.10009765625, 161.498779296875, -35.892578125, -180.88116455078125, 614.0042114257812, 363.27862548828125, 393.70330810546875, 839.0698852539062, 353.07806396484375, 197.87445068359375, 136.78749084472656, 242.5475616455078, 138.31024169921875, 71.89141845703125, -149.891357421875, 514.4498291015625, -146.4241180419922, 308.15545654296875, 55.11406326293945, 523.240478515625, 584.0706787109375, 385.25616455078125, 293.3099670410156, 638.2122802734375, 330.35968017578125, 755.7872314453125, 233.55984497070312, -125.08027648925781, 41.45729064941406, -118.67129516601562, 130.60977172851562, 125.68419647216797, 100.5832748413086, 295.5299072265625, -324.861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000366.npy"} +{"epoch": 0.5374449339207048, "step": 367, "batch_size": 64, "mean": 183.06765747070312, "std": 227.944580078125, "min": -496.8251647949219, "p10": -60.19221038818359, "median": 155.29256439208984, "p90": 498.3701629638672, "max": 757.6092529296875, "pos_frac": 0.796875, "sample": [100.58868408203125, 169.6249237060547, 503.63397216796875, 69.74288940429688, 615.3511962890625, 6.485410690307617, 372.3145446777344, 159.42425537109375, 545.4365844726562, 19.952369689941406, 28.482666015625, -0.08938407897949219, -64.00433349609375, 498.2732849121094, 457.25909423828125, 310.74267578125, 423.17584228515625, 127.61412048339844, 251.27696228027344, -61.16001892089844, 237.16372680664062, 378.4901428222656, -64.97608184814453, 18.623252868652344, 311.4878845214844, 274.54278564453125, 757.6092529296875, -48.73512268066406, 515.2332153320312, 107.295654296875, 422.67144775390625, 187.6526641845703, -16.75136947631836, 328.208740234375, 63.81416320800781, 203.22482299804688, 300.57427978515625, 285.8527526855469, 367.5028076171875, 82.61767578125, 189.10731506347656, -224.26092529296875, -166.1964569091797, -57.933990478515625, -19.0430908203125, 29.023283004760742, 73.20498657226562, 368.6918640136719, 151.16087341308594, 81.95220947265625, -196.15225219726562, -496.8251647949219, 109.49060821533203, 125.05714416503906, 268.498291015625, 366.6778564453125, 404.83953857421875, 314.4071044921875, 510.38385009765625, 16.253005981445312, 133.5771942138672, -34.846092224121094, 498.41168212890625, 24.622833251953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000367.npy"} +{"epoch": 0.5389133627019089, "step": 368, "batch_size": 64, "mean": 287.25018310546875, "std": 220.2408905029297, "min": -212.74026489257812, "p10": 1.4839202880859546, "median": 306.0147247314453, "p90": 556.0627563476563, "max": 1054.4293212890625, "pos_frac": 0.890625, "sample": [316.7446594238281, 326.3857116699219, 222.967041015625, 484.5563049316406, 390.0106201171875, 570.5964965820312, 163.81048583984375, 47.58531188964844, 559.35595703125, 173.33718872070312, -25.441707611083984, 426.9725341796875, 470.67242431640625, 74.89556884765625, 298.5332336425781, 395.4399719238281, 313.4962158203125, 225.68663024902344, -38.693885803222656, 416.2152404785156, 209.84280395507812, 291.4240417480469, 550.8099365234375, 335.0853576660156, 372.9167175292969, 116.31402587890625, 445.59344482421875, -5.625865936279297, 22.412891387939453, 18.073421478271484, 167.0222625732422, 435.83734130859375, 397.8533020019531, 352.6956787109375, 348.3077392578125, 590.1222534179688, 396.9013671875, 61.606475830078125, 179.1294708251953, -38.540122985839844, 321.7934265136719, 512.017333984375, 420.6696472167969, 580.2923583984375, 155.85118103027344, 295.9757995605469, 667.0343017578125, 20.890426635742188, -212.74026489257812, 115.14227294921875, 107.81922149658203, 513.0592651367188, 449.9888000488281, 483.3740539550781, 352.7123107910156, 32.164459228515625, 228.07949829101562, 1054.4293212890625, 215.280029296875, 254.54812622070312, 283.0296936035156, -23.771041870117188, -62.85129928588867, 558.31396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000368.npy"} +{"epoch": 0.540381791483113, "step": 369, "batch_size": 64, "mean": 153.03839111328125, "std": 271.9935302734375, "min": -629.9818725585938, "p10": -121.50907745361327, "median": 134.21212005615234, "p90": 480.5529174804688, "max": 853.2477416992188, "pos_frac": 0.765625, "sample": [-216.3404541015625, -101.85841369628906, 853.2477416992188, -31.0222225189209, -252.38609313964844, 31.226438522338867, -80.93291473388672, 288.77740478515625, 18.83327865600586, 686.554443359375, 214.49813842773438, -78.94699096679688, 148.97994995117188, 306.96636962890625, 283.89898681640625, 796.2073364257812, 472.5619201660156, 162.90191650390625, 37.55424880981445, 400.5149841308594, 305.0142822265625, 111.86601257324219, 104.39340209960938, 238.97894287109375, 228.96875, 176.7203369140625, -464.74896240234375, 224.31353759765625, 181.89639282226562, 114.90904998779297, 263.048583984375, 383.9266357421875, 104.49651336669922, 106.60572814941406, 173.27890014648438, 27.97447967529297, 345.1593017578125, 92.21868133544922, -124.63179016113281, 364.76129150390625, 483.9776306152344, -186.1616668701172, 706.0120849609375, 48.19700622558594, 487.5111999511719, 171.82415771484375, 397.09588623046875, -365.625732421875, -1.1947212219238281, 607.808837890625, 177.63153076171875, 119.44429016113281, -629.9818725585938, 82.91461944580078, 200.74481201171875, -114.22274780273438, -96.13801574707031, 88.80651092529297, 115.50763702392578, 368.9046630859375, 175.46505737304688, 9.108177185058594, 99.00115966796875, -52.560569763183594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000369.npy"} +{"epoch": 0.5418502202643172, "step": 370, "batch_size": 64, "mean": 144.05679321289062, "std": 282.9492492675781, "min": -442.21746826171875, "p10": -227.66142120361326, "median": 148.87486267089844, "p90": 501.4794403076172, "max": 854.189208984375, "pos_frac": 0.703125, "sample": [95.0081558227539, 291.6215515136719, 212.36180114746094, -390.48095703125, 721.2814331054688, 371.71649169921875, 114.6275405883789, 254.25502014160156, -9.505525588989258, -50.816078186035156, 409.15203857421875, 496.7566833496094, 43.167449951171875, 159.1536865234375, -118.00753784179688, 503.50347900390625, 222.66433715820312, 125.34783935546875, 230.96780395507812, 854.189208984375, 772.5557250976562, 182.45928955078125, 326.0570373535156, -115.7553939819336, 281.4033203125, -186.47994995117188, 41.1754150390625, 302.79803466796875, -219.09463500976562, 185.00454711914062, 542.37255859375, 113.16146850585938, -1.7650604248046875, 282.17486572265625, -23.003273010253906, 399.26898193359375, 249.90469360351562, -136.48312377929688, -341.54559326171875, 24.472631454467773, 181.65994262695312, 231.1632080078125, -231.33290100097656, 83.12673950195312, 439.086669921875, 117.86154174804688, 203.98944091796875, 294.642333984375, 221.9668731689453, -442.21746826171875, -102.95924377441406, -50.95733642578125, 109.89163208007812, 138.59603881835938, -374.4965515136719, -314.4357604980469, 9.725128173828125, -282.4848937988281, -128.95339965820312, 464.76171875, 609.602294921875, 244.56671142578125, 10.53436279296875, 570.6514892578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000370.npy"} +{"epoch": 0.5433186490455213, "step": 371, "batch_size": 64, "mean": 212.34832763671875, "std": 300.55584716796875, "min": -530.7329711914062, "p10": -114.33891525268554, "median": 156.32872772216797, "p90": 612.3943664550783, "max": 933.8975219726562, "pos_frac": 0.734375, "sample": [384.90509033203125, 274.7866516113281, 536.3433837890625, 84.19417572021484, -530.7329711914062, 159.56430053710938, 563.6571655273438, 211.9822998046875, 804.9826049804688, 305.78778076171875, 561.7298583984375, -19.54856300354004, 345.66094970703125, 203.22055053710938, -301.4771423339844, 96.19136047363281, -15.62176513671875, 188.54580688476562, 489.59454345703125, 1.3438720703125, -44.06541442871094, -243.52552795410156, 633.28173828125, -117.80857849121094, -202.185791015625, 121.70153045654297, 931.2005004882812, 674.3510131835938, -222.46229553222656, 40.796905517578125, 712.9678344726562, -24.754074096679688, 330.73974609375, 249.52720642089844, -106.24303436279297, -18.536849975585938, 549.4996337890625, -133.14236450195312, 255.3057861328125, 444.22784423828125, -49.448822021484375, 114.91703033447266, 728.057861328125, 77.9625015258789, 17.791027069091797, 246.80892944335938, 286.82305908203125, 385.9934387207031, 478.0614013671875, -27.868240356445312, 431.00372314453125, 147.20721435546875, 117.14292907714844, 338.347412109375, 153.09315490722656, 151.11233520507812, 118.56651306152344, 73.30722045898438, -27.910232543945312, 523.9833374023438, 275.51336669921875, 933.8975219726562, -86.51957702636719, 6.463768005371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000371.npy"} +{"epoch": 0.5447870778267254, "step": 372, "batch_size": 64, "mean": 255.53970336914062, "std": 328.2554016113281, "min": -386.96002197265625, "p10": -131.11302261352537, "median": 183.35711669921875, "p90": 732.0615417480469, "max": 1299.8232421875, "pos_frac": 0.765625, "sample": [323.06658935546875, 311.214111328125, 732.9933471679688, -245.63978576660156, 46.43260192871094, 667.1011962890625, 518.7196655273438, 226.1856689453125, 301.4213562011719, -15.942413330078125, 1299.8232421875, 310.9601135253906, -17.17266845703125, 164.89535522460938, -38.349273681640625, 107.3897933959961, 66.95442199707031, 88.61092376708984, 761.1295166015625, -203.93795776367188, 131.7937774658203, 175.45127868652344, 729.8873291015625, -151.36483764648438, 511.4277038574219, 150.3231201171875, 104.9088363647461, 179.96218872070312, 186.75204467773438, 150.75286865234375, 177.78265380859375, 344.53466796875, 364.08203125, -125.52489471435547, 173.90997314453125, -124.3689193725586, 367.77642822265625, 739.3311157226562, 95.7076187133789, 1060.918701171875, 634.4934692382812, 351.196533203125, 187.8010711669922, -152.47042846679688, 487.6734619140625, 485.3784484863281, 717.7850952148438, 234.75462341308594, -10.032913208007812, 329.57098388671875, 159.76541137695312, 268.4546813964844, 100.64468383789062, -70.67186737060547, 773.6585693359375, 150.55137634277344, -23.452791213989258, 208.61474609375, -133.5079345703125, 875.1115112304688, -204.22122192382812, -386.96002197265625, 531.4111938476562, 189.09304809570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000372.npy"} +{"epoch": 0.5462555066079295, "step": 373, "batch_size": 64, "mean": 242.35186767578125, "std": 282.3642272949219, "min": -497.9678649902344, "p10": -70.8711776733398, "median": 207.74065399169922, "p90": 627.711328125, "max": 812.9537963867188, "pos_frac": 0.84375, "sample": [114.18387603759766, -164.12538146972656, -215.465576171875, 258.3079833984375, 57.21270751953125, 812.9537963867188, 456.7564392089844, 49.73866271972656, 245.36648559570312, 164.85223388671875, 683.292724609375, 190.5975799560547, -31.024078369140625, 271.6952209472656, 391.7835388183594, 58.79712677001953, 24.934627532958984, 749.9470825195312, 342.1835021972656, 12.078544616699219, -395.5986022949219, -33.972869873046875, 229.16537475585938, -497.9678649902344, 104.89239501953125, 58.12694549560547, 163.29525756835938, 132.5430908203125, 596.6473388671875, 443.8326416015625, 207.8041534423828, 486.19219970703125, 337.75830078125, 600.3876342773438, 461.4230041503906, -87.69711303710938, -179.30374145507812, 631.207763671875, 275.1749267578125, -3.8444671630859375, 778.1350708007812, 130.58712768554688, 392.60186767578125, 190.2716064453125, 388.0401916503906, 488.61700439453125, 284.5907897949219, 257.46282958984375, 564.7015380859375, 777.5294189453125, 163.36212158203125, 53.429107666015625, 111.73382568359375, 207.67715454101562, 0.2546539306640625, 283.77734375, 105.85182189941406, 126.81939697265625, 747.7547607421875, 404.62841796875, 440.6748962402344, 619.552978515625, -86.68473815917969, 75.01793670654297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000373.npy"} +{"epoch": 0.5477239353891337, "step": 374, "batch_size": 64, "mean": 218.37730407714844, "std": 358.6478576660156, "min": -701.4334106445312, "p10": -113.41287689208983, "median": 142.73760223388672, "p90": 578.4334106445314, "max": 1318.2310791015625, "pos_frac": 0.734375, "sample": [-144.15353393554688, 134.93075561523438, -32.48487854003906, 1318.2310791015625, 328.66693115234375, 1198.518310546875, 243.31939697265625, 600.25244140625, 449.69940185546875, -17.50818634033203, 140.36660766601562, 331.1541442871094, 30.978912353515625, 202.41940307617188, 66.30322265625, -48.58747100830078, 530.6968383789062, 52.210113525390625, 588.8627319335938, 358.5140075683594, -62.0830078125, 8.15389633178711, 425.79473876953125, 60.08003234863281, -87.26338195800781, 893.0155029296875, -5.133474349975586, 84.22975158691406, -234.11143493652344, 337.5664367675781, 255.29196166992188, 273.0031433105469, -118.93002319335938, 959.2194213867188, -482.6185302734375, 146.9589080810547, -6.9329986572265625, 545.6192016601562, 119.22077178955078, -39.873565673828125, 145.1085968017578, 82.03235626220703, 65.1303482055664, 101.35415649414062, 285.8255310058594, 238.39187622070312, -164.9590301513672, 521.6366577148438, 305.36187744140625, 459.28973388671875, 105.07511901855469, 554.0983276367188, -100.53953552246094, 415.6943359375, 296.5132751464844, -32.565086364746094, -150.00177001953125, 223.25823974609375, 6.7643280029296875, 79.71345520019531, 1167.2642822265625, -701.4334106445312, 163.34759521484375, 506.1877136230469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000374.npy"} +{"epoch": 0.5491923641703378, "step": 375, "batch_size": 64, "mean": 210.68402099609375, "std": 308.8144226074219, "min": -534.0509033203125, "p10": -153.57788772583, "median": 174.32685089111328, "p90": 584.2585327148439, "max": 1031.4248046875, "pos_frac": 0.765625, "sample": [649.7489013671875, 325.79510498046875, -13.9429931640625, 282.2173156738281, -99.38050842285156, -175.7362060546875, 482.69635009765625, 228.3541259765625, 260.34295654296875, -24.451587677001953, -10.083627700805664, -43.47456359863281, 19.727378845214844, -534.0509033203125, 140.70254516601562, 905.8597412109375, 391.21002197265625, 49.33123779296875, 329.87481689453125, 132.46624755859375, 24.779876708984375, 242.15390014648438, -470.1048583984375, 28.085365295410156, 465.818603515625, -234.0392608642578, 597.2655029296875, 75.41748046875, 140.60296630859375, 76.25028228759766, -337.1936950683594, 99.07133483886719, 553.908935546875, 651.6192016601562, 31.628345489501953, 265.5318603515625, 321.0325927734375, 493.752197265625, 544.479248046875, 684.339111328125, 407.2757873535156, 186.20762634277344, 162.44607543945312, -351.08673095703125, 453.01263427734375, 467.0486145019531, 289.4651794433594, -243.85769653320312, 1031.4248046875, 520.5957641601562, 364.65576171875, 0.6989707946777344, -1.636627197265625, 358.9908752441406, 128.36074829101562, 515.4788208007812, 107.46662902832031, 620.0242919921875, -101.8751449584961, 159.48162841796875, 424.873046875, 366.9378356933594, 97.452880859375, -31.270153045654297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000375.npy"} +{"epoch": 0.5506607929515418, "step": 376, "batch_size": 64, "mean": 263.78704833984375, "std": 319.98492431640625, "min": -564.4385986328125, "p10": -125.00766525268554, "median": 231.02755737304688, "p90": 678.4385559082032, "max": 859.479248046875, "pos_frac": 0.796875, "sample": [-122.6146011352539, 754.2797241210938, 237.70455932617188, 41.56057357788086, 859.479248046875, 88.04414367675781, 496.7021484375, -247.0748748779297, 22.633163452148438, -4.5548553466796875, 389.6141357421875, -0.022144317626953125, 649.30712890625, 120.19096374511719, -243.62051391601562, 500.8436279296875, 582.4832153320312, -211.99386596679688, -111.35592651367188, 176.4796600341797, 89.31260681152344, 12.757917404174805, -126.03326416015625, 842.1654663085938, 750.9877319335938, 139.2799072265625, 442.259521484375, 312.3701171875, 508.523681640625, -90.21275329589844, 686.4537353515625, 521.4011840820312, 141.42250061035156, 587.4425659179688, 125.24871826171875, 591.7324829101562, 518.0418701171875, 326.9480285644531, 458.65478515625, 266.297119140625, 229.5517120361328, 538.7024536132812, -126.66911315917969, 246.91436767578125, 637.3565673828125, 95.30015563964844, 847.85888671875, 15.457740783691406, -154.45213317871094, 47.85974884033203, 26.681137084960938, 112.16329193115234, 641.848388671875, 682.3639526367188, 304.38153076171875, 669.279296875, 164.84117126464844, 304.678466796875, 232.50340270996094, 192.314208984375, -90.4361801147461, 99.5538330078125, 645.618896484375, -564.4385986328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000376.npy"} +{"epoch": 0.5521292217327459, "step": 377, "batch_size": 64, "mean": 256.0513916015625, "std": 302.17694091796875, "min": -424.00677490234375, "p10": -87.80182266235352, "median": 216.34427642822266, "p90": 689.5483337402346, "max": 919.81640625, "pos_frac": 0.765625, "sample": [386.0150146484375, 919.81640625, 106.280029296875, -80.677734375, 320.8782653808594, -40.0347900390625, 238.55441284179688, -161.38925170898438, 275.19952392578125, 54.69397735595703, 850.3357543945312, 743.04345703125, -37.81499481201172, 903.6669921875, 41.880149841308594, 215.96365356445312, 208.10557556152344, 496.2308654785156, 211.94622802734375, -44.00811767578125, 242.0296630859375, 627.7178344726562, 457.1817932128906, -22.826705932617188, 584.8834228515625, 881.6124877929688, 416.6796569824219, -198.31199645996094, 623.0616455078125, 179.54763793945312, 335.864013671875, 216.7248992919922, 55.76726531982422, 147.36801147460938, 58.66006088256836, 467.05303955078125, 114.48088073730469, 332.4678039550781, 456.75115966796875, 637.99609375, 254.01759338378906, -53.0723876953125, -424.00677490234375, 154.96185302734375, -116.30984497070312, 87.4527816772461, 492.78192138671875, 401.608154296875, -224.0575714111328, 354.84423828125, 131.4658660888672, 711.6421508789062, -73.94490051269531, -32.1806640625, -132.95184326171875, 570.6139526367188, 199.16207885742188, 737.8721923828125, 378.188232421875, 14.437644958496094, 341.0670166015625, 174.85507202148438, 306.30206298828125, -90.8550033569336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000377.npy"} +{"epoch": 0.55359765051395, "step": 378, "batch_size": 64, "mean": 267.5641174316406, "std": 287.4216613769531, "min": -544.346435546875, "p10": -60.18887100219727, "median": 245.7113037109375, "p90": 636.6937744140625, "max": 1116.73193359375, "pos_frac": 0.84375, "sample": [730.643310546875, 326.75872802734375, 635.87841796875, 645.97021484375, 476.34747314453125, 122.03677368164062, 241.14031982421875, 366.21771240234375, 148.177734375, 202.06787109375, 342.19281005859375, 384.49273681640625, 653.689453125, 182.09254455566406, 125.15463256835938, -33.72740173339844, 266.49127197265625, 712.3019409179688, 242.11294555664062, 304.8567810058594, 195.60211181640625, -73.76026153564453, -58.125667572021484, 58.03620147705078, 249.30966186523438, 212.1263427734375, -69.09080505371094, 513.3075561523438, 59.487754821777344, 496.7477722167969, 518.3287353515625, 499.1545104980469, 276.3764953613281, 85.7857666015625, 3.9350357055664062, 485.8636169433594, 210.5810546875, 1116.73193359375, 49.83470153808594, -296.29412841796875, 280.35064697265625, -71.36736297607422, 201.3553924560547, 610.66162109375, 158.1201171875, -58.18182373046875, 470.1217956542969, 328.7126159667969, 502.0570983886719, 359.5914001464844, 104.43357849121094, 301.0455322265625, 466.2216796875, 269.09576416015625, -544.346435546875, 196.1608428955078, 234.27987670898438, 513.2183837890625, -61.049034118652344, 797.9596557617188, 99.24546813964844, -428.38641357421875, 148.92599487304688, 637.043212890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000378.npy"} +{"epoch": 0.5550660792951542, "step": 379, "batch_size": 64, "mean": 259.21221923828125, "std": 330.3843994140625, "min": -525.1060791015625, "p10": -116.05961990356442, "median": 221.10037994384766, "p90": 678.0278442382813, "max": 1335.815673828125, "pos_frac": 0.8125, "sample": [244.00332641601562, 490.0196533203125, 18.487640380859375, 222.3914794921875, -73.41796112060547, 96.85285186767578, 213.26150512695312, 691.0101318359375, 197.95626831054688, 127.7989501953125, 398.0243835449219, 878.6036376953125, 5.7932586669921875, 499.9867248535156, 343.41156005859375, 216.89599609375, -525.1060791015625, 969.6152954101562, 43.39793395996094, 294.2813720703125, 542.1021118164062, 418.6430358886719, 339.47216796875, 13.355060577392578, 127.22207641601562, -73.42648315429688, -306.30780029296875, 148.2393035888672, 23.66657257080078, 440.16900634765625, 771.8004150390625, 396.45465087890625, -287.05206298828125, 94.45635986328125, 487.4359436035156, 755.885009765625, 337.61090087890625, 384.5936279296875, 647.73583984375, 122.74783325195312, -93.12726593017578, 587.4072265625, 241.39697265625, -44.64850997924805, 465.34625244140625, -137.37249755859375, 180.39785766601562, 418.3856506347656, 299.84979248046875, 219.8092803955078, -125.88777160644531, 411.9046630859375, 763.4208374023438, 211.54408264160156, 1335.815673828125, 303.6401062011719, 31.20519256591797, 512.153564453125, 116.24272918701172, -252.9632568359375, 49.24568176269531, 602.2413330078125, -181.5408935546875, -62.95707702636719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000379.npy"} +{"epoch": 0.5565345080763583, "step": 380, "batch_size": 64, "mean": 284.94622802734375, "std": 329.4243469238281, "min": -822.4442138671875, "p10": -73.90530471801756, "median": 303.6613006591797, "p90": 671.7466064453125, "max": 1180.853271484375, "pos_frac": 0.859375, "sample": [140.95538330078125, 108.70301818847656, 83.46730041503906, 1180.853271484375, 64.51900482177734, 99.0707015991211, 146.96139526367188, 358.99493408203125, 481.08245849609375, 345.08905029296875, 126.76409149169922, 427.3838195800781, -52.574317932128906, 616.7847290039062, 855.660400390625, 467.66583251953125, 383.0420837402344, 467.538330078125, 516.587646484375, 175.86474609375, 438.6531066894531, -262.88385009765625, 236.1798095703125, 241.666015625, 83.58517456054688, -80.04222869873047, 481.0125732421875, 362.8634033203125, 130.84426879882812, 668.4896240234375, 378.74468994140625, 668.5750732421875, 620.29248046875, 658.1673583984375, 673.1058349609375, 404.0108642578125, 109.80610656738281, -59.5858154296875, 285.1138610839844, 41.85393524169922, 813.211181640625, 65.27619934082031, -254.8602294921875, 194.50592041015625, -262.82684326171875, 214.86392211914062, 322.208740234375, 687.5350952148438, 63.37518310546875, -353.170654296875, 444.02252197265625, 22.618131637573242, 747.7872314453125, 354.48211669921875, 277.45623779296875, -98.76969146728516, 514.3594360351562, 368.3463134765625, 186.1463165283203, 106.96685791015625, -822.4442138671875, 344.63531494140625, 844.493896484375, 381.47344970703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000380.npy"} +{"epoch": 0.5580029368575624, "step": 381, "batch_size": 64, "mean": 199.41497802734375, "std": 334.237548828125, "min": -688.20458984375, "p10": -163.53164825439453, "median": 200.0733871459961, "p90": 593.6789855957032, "max": 1055.070556640625, "pos_frac": 0.75, "sample": [263.9048767089844, 12.512603759765625, 54.8121337890625, 637.724853515625, 486.8897705078125, 689.1968383789062, 513.5513305664062, -31.367233276367188, 127.62928009033203, 555.89599609375, 424.46148681640625, 114.28196716308594, 405.9099426269531, 276.447021484375, 1019.1429443359375, -180.67535400390625, 445.147705078125, 361.45635986328125, 689.4347534179688, 213.20437622070312, 1004.1549682617188, 599.7503051757812, -232.70162963867188, -156.2824249267578, -129.19703674316406, 131.79269409179688, 579.5125732421875, 60.86277389526367, -297.4479064941406, -93.48859405517578, 139.12266540527344, 216.038818359375, 97.57780456542969, -41.799678802490234, 258.5462646484375, -104.01937866210938, 255.68417358398438, 113.01343536376953, 214.55166625976562, -77.18399047851562, 13.485435485839844, 109.37384033203125, 1055.070556640625, 223.14137268066406, 386.79345703125, 408.819091796875, 49.20965576171875, -166.63845825195312, 444.4517822265625, -108.21876525878906, 29.451040267944336, 369.57879638671875, -86.07766723632812, 233.59657287597656, 113.72087860107422, -250.05514526367188, 63.962158203125, 338.9273681640625, 401.82080078125, 235.62107849121094, 186.94239807128906, -688.20458984375, -600.254638671875, 379.992919921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000381.npy"} +{"epoch": 0.5594713656387665, "step": 382, "batch_size": 64, "mean": 247.65719604492188, "std": 257.2524719238281, "min": -289.37298583984375, "p10": -76.29117355346679, "median": 199.0723876953125, "p90": 630.066912841797, "max": 970.65380859375, "pos_frac": 0.8125, "sample": [970.65380859375, 686.3150024414062, 151.23715209960938, 136.43417358398438, 153.9752655029297, 719.119384765625, -76.7417984008789, 113.1079330444336, 299.3489990234375, -167.76397705078125, 151.52955627441406, 248.24783325195312, 396.74114990234375, 423.8822021484375, 485.13677978515625, 510.1117858886719, 215.80409240722656, 77.19276428222656, 443.1980285644531, 123.55104064941406, 377.1105041503906, -77.76359558105469, 34.57337951660156, 354.0921936035156, 747.914794921875, -101.35762786865234, 356.63739013671875, -75.23971557617188, 549.9948120117188, 271.1094055175781, 658.2052001953125, -93.09783172607422, -28.39468002319336, 332.7884521484375, 438.6501770019531, -289.37298583984375, -141.20388793945312, 410.55926513671875, -15.626861572265625, 327.14813232421875, 180.028076171875, 254.8277587890625, 361.38726806640625, 87.21337127685547, 189.96449279785156, 84.86100769042969, 123.60023498535156, 231.73385620117188, 208.18028259277344, 138.1802215576172, 664.8914794921875, 586.2080078125, 308.22845458984375, 11.889307022094727, 184.3524169921875, 648.8635864257812, 72.45106506347656, -12.86227798461914, 481.10479736328125, 579.17138671875, 153.0309295654297, 106.64153289794922, -15.125015258789062, 123.4306640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000382.npy"} +{"epoch": 0.5609397944199707, "step": 383, "batch_size": 64, "mean": 239.6513671875, "std": 339.3735046386719, "min": -892.0908203125, "p10": -108.47313461303709, "median": 243.4184341430664, "p90": 724.4600341796876, "max": 1001.7495727539062, "pos_frac": 0.734375, "sample": [-10.541221618652344, -76.20556640625, 104.60549926757812, -299.46539306640625, 543.1724243164062, -215.33973693847656, -125.14019012451172, 568.435791015625, 338.90032958984375, 48.01994705200195, 287.3232421875, 185.0607147216797, 87.114013671875, -44.47724914550781, -345.3433837890625, 255.73956298828125, 921.48291015625, 6.504798889160156, 267.8844909667969, 313.4036865234375, 781.0933837890625, -145.10842895507812, -9.863826751708984, 693.9224243164062, 181.62322998046875, -89.43701934814453, 730.9598388671875, 89.42451477050781, -79.12054443359375, 295.401123046875, 1001.7495727539062, 205.86634826660156, 709.2938232421875, 235.739990234375, 406.39837646484375, 413.34063720703125, 455.4978942871094, 69.13241577148438, 238.2721710205078, 286.837158203125, -51.719146728515625, -23.46868896484375, 253.46524047851562, 73.71937561035156, 343.1187438964844, 783.5463256835938, -116.6314697265625, 547.0075073242188, 19.891250610351562, 316.8260498046875, 731.6793823242188, 97.90409088134766, -86.20658874511719, 314.3375549316406, 407.86761474609375, -1.9034957885742188, 812.1634521484375, 563.1771240234375, 248.564697265625, 596.95556640625, -892.0908203125, 131.20809936523438, 466.36669921875, 519.75048828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000383.npy"} +{"epoch": 0.5624082232011748, "step": 384, "batch_size": 64, "mean": 189.2479248046875, "std": 235.35971069335938, "min": -377.0197448730469, "p10": -88.67387008666992, "median": 187.73779296875, "p90": 458.44370117187503, "max": 889.911376953125, "pos_frac": 0.8125, "sample": [188.94671630859375, 47.865203857421875, 889.911376953125, -154.76988220214844, 3.512847900390625, 95.09355163574219, -210.24607849121094, -33.50071716308594, 731.3986206054688, 179.38287353515625, 468.6391296386719, 463.735595703125, 176.4945831298828, -232.91595458984375, 384.3327941894531, 69.46673583984375, -91.33332824707031, 19.232952117919922, 355.55023193359375, 368.9561767578125, 186.52886962890625, 270.9286804199219, 72.09907531738281, 405.02593994140625, 297.7718505859375, 148.88246154785156, 149.31463623046875, -238.34796142578125, -377.0197448730469, 72.69640350341797, 299.4001770019531, 140.7339630126953, 50.94215393066406, 208.88388061523438, 191.0664825439453, -75.08219146728516, 394.8312683105469, 267.80987548828125, 340.10772705078125, 66.4442138671875, 13.227123260498047, 237.9697723388672, 340.90081787109375, 528.322509765625, 436.6484680175781, 272.3250732421875, 306.0452880859375, 280.83050537109375, 234.96804809570312, -170.7821044921875, 254.43060302734375, -54.600975036621094, 131.9688720703125, 482.96893310546875, 114.8021240234375, -82.46846771240234, -63.385372161865234, 446.095947265625, 429.10919189453125, 115.77314758300781, 590.9354248046875, 320.7110290527344, 51.91046142578125, 300.3902587890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000384.npy"} +{"epoch": 0.5638766519823789, "step": 385, "batch_size": 64, "mean": 237.38702392578125, "std": 239.79306030273438, "min": -343.0481872558594, "p10": 2.738681030273439, "median": 208.0983123779297, "p90": 584.7101196289062, "max": 788.698486328125, "pos_frac": 0.90625, "sample": [40.59161376953125, 12.794807434082031, 66.61632537841797, 261.1031799316406, 446.3179931640625, 312.5791931152344, 443.4898681640625, 206.22781372070312, -149.19332885742188, 495.5787353515625, 119.97792053222656, 19.188016891479492, 116.1703872680664, 185.279541015625, 506.334228515625, 162.96517944335938, 643.3267822265625, 311.01177978515625, 87.76317596435547, 137.63345336914062, 366.5035705566406, 151.13783264160156, -162.24610900878906, 300.35137939453125, -208.15072631835938, 240.5872802734375, 201.59471130371094, 585.77392578125, 110.94660949707031, 60.7073974609375, 348.77392578125, 251.25218200683594, 152.514404296875, 759.546630859375, 788.698486328125, 201.6394805908203, 204.35397338867188, 2.0371856689453125, 582.2279052734375, 267.36236572265625, 4.3755035400390625, 116.00904846191406, 445.8387756347656, 145.8434295654297, 222.1680908203125, 660.2741088867188, 387.1573181152344, -88.89483642578125, 641.78759765625, 325.249755859375, -343.0481872558594, 33.51808166503906, 160.38803100585938, -248.0113525390625, 213.95928955078125, 296.28277587890625, 482.1225891113281, 92.24286651611328, 308.45355224609375, 698.8436279296875, 109.12117004394531, 345.4481506347656, 209.96881103515625, 342.3018493652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000385.npy"} +{"epoch": 0.5653450807635829, "step": 386, "batch_size": 64, "mean": 248.9279022216797, "std": 302.6310119628906, "min": -470.534423828125, "p10": -130.249641418457, "median": 188.51953125, "p90": 667.7709838867189, "max": 967.8319702148438, "pos_frac": 0.828125, "sample": [-90.14752197265625, 389.5399475097656, -62.92571258544922, 388.6624450683594, 678.7308349609375, 202.88938903808594, 69.2785415649414, 480.0964050292969, 146.21493530273438, 427.78863525390625, 385.453857421875, 949.4689331054688, -279.985595703125, -36.386749267578125, 641.3968505859375, 341.6405029296875, 311.3150939941406, 411.4570617675781, 53.07438659667969, 113.85279083251953, 168.59286499023438, 258.97247314453125, 174.3392333984375, 171.30844116210938, 301.4281311035156, 265.4710998535156, 97.5892105102539, -155.66943359375, 967.8319702148438, 642.197998046875, -67.99078369140625, 128.82144165039062, 148.87759399414062, -178.44256591796875, 194.44129943847656, 795.821533203125, 931.5291137695312, 304.13214111328125, 706.8428955078125, 156.72166442871094, 234.40786743164062, 574.8775024414062, -230.5191192626953, 182.59776306152344, 43.370147705078125, 141.65045166015625, 174.38929748535156, 173.3975067138672, -147.43626403808594, 365.34613037109375, -470.534423828125, 754.8136596679688, 52.68592071533203, 84.1245346069336, 95.2716064453125, 161.9370880126953, 508.81451416015625, 364.1797180175781, -326.23114013671875, 466.9902038574219, 496.47479248046875, 242.67848205566406, 350.3615417480469, 103.50664520263672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000386.npy"} +{"epoch": 0.566813509544787, "step": 387, "batch_size": 64, "mean": 209.68357849121094, "std": 258.1988830566406, "min": -412.9181823730469, "p10": -65.83155364990235, "median": 203.55902862548828, "p90": 503.2237579345703, "max": 880.962890625, "pos_frac": 0.796875, "sample": [309.4371337890625, -34.744468688964844, 121.27146911621094, 207.34361267089844, 232.078125, 880.962890625, 199.77444458007812, 119.99555969238281, 393.8467102050781, 428.40655517578125, -412.9181823730469, 306.58221435546875, 130.43951416015625, 28.666576385498047, -363.11236572265625, 217.24032592773438, 503.426025390625, 567.4124145507812, -175.95266723632812, 105.30642700195312, 828.1380004882812, 61.7862663269043, 218.36526489257812, 707.01953125, 391.9757080078125, 141.53244018554688, 138.98373413085938, 233.76077270507812, 66.02849578857422, 166.9505157470703, 48.096778869628906, 174.46131896972656, 470.9453430175781, 175.87452697753906, 501.0801696777344, 150.74041748046875, 340.5789794921875, 102.13291931152344, 279.2355041503906, 360.5885314941406, 69.24376678466797, -6.7341718673706055, 635.8150634765625, 502.7518005371094, 361.8861999511719, 109.111572265625, 370.8709411621094, -49.099857330322266, 538.4710693359375, -66.19960021972656, 390.3143310546875, -148.96499633789062, 13.772941589355469, 338.20355224609375, -218.00479125976562, 249.1416778564453, -10.000190734863281, 394.45941162109375, -64.9727783203125, -29.353530883789062, 287.61236572265625, -274.4200134277344, 278.8827209472656, 423.22418212890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000387.npy"} +{"epoch": 0.5682819383259912, "step": 388, "batch_size": 64, "mean": 281.1998291015625, "std": 330.2901306152344, "min": -527.3045043945312, "p10": -29.57113800048826, "median": 218.10914611816406, "p90": 693.3907226562502, "max": 1533.1505126953125, "pos_frac": 0.84375, "sample": [1533.1505126953125, 567.5638427734375, -85.27165222167969, 517.2579345703125, 551.2025146484375, 805.0408325195312, 165.4611358642578, -256.3410949707031, 136.0897216796875, 296.4772644042969, 153.96710205078125, 864.2305908203125, 520.6636352539062, 246.9364013671875, -527.3045043945312, 59.42719268798828, 283.021484375, 203.5260772705078, 436.677978515625, -5.679893493652344, 152.68798828125, 735.2926635742188, 164.84176635742188, -69.03998565673828, 136.3265838623047, 237.09970092773438, 78.2639389038086, -9.674148559570312, 104.92868041992188, 273.5552673339844, 103.76354217529297, 146.34532165527344, 283.64642333984375, 59.276947021484375, 487.100341796875, 191.6710205078125, 147.48155212402344, 81.57489776611328, 551.1841430664062, 107.29105377197266, 432.4689025878906, 74.81678771972656, 404.0882568359375, 150.77774047851562, 187.83331298828125, 27.805458068847656, 444.4593505859375, 331.11578369140625, -233.81326293945312, 374.4888610839844, 334.24334716796875, 1170.326416015625, -138.03541564941406, 366.86065673828125, 244.05909729003906, 29.502029418945312, -38.098419189453125, 547.9444580078125, 717.1830444335938, 788.77783203125, 637.8753051757812, 488.7724914550781, 232.6922149658203, -9.07151985168457], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000388.npy"} +{"epoch": 0.5697503671071953, "step": 389, "batch_size": 64, "mean": 243.48245239257812, "std": 315.5850524902344, "min": -373.575927734375, "p10": -87.1248046875, "median": 242.57070922851562, "p90": 605.4242919921876, "max": 1009.7823486328125, "pos_frac": 0.71875, "sample": [55.9061279296875, 534.0128784179688, 412.7095642089844, -332.7928161621094, 157.57748413085938, 308.31317138671875, 216.8977813720703, 594.7637329101562, 419.77239990234375, 240.1168212890625, 272.6637268066406, -84.78882598876953, 331.1214599609375, -213.07473754882812, 146.623046875, 502.9269714355469, 132.45718383789062, 435.86370849609375, 436.5173034667969, -77.70718383789062, -242.36953735351562, -34.382137298583984, 609.9931030273438, -34.902652740478516, 327.1502380371094, 660.8145751953125, -373.575927734375, -7.8697967529296875, 129.61990356445312, 148.70953369140625, 367.3108825683594, -56.014251708984375, 139.59361267089844, 306.5485534667969, -320.9870910644531, 293.55810546875, 224.64952087402344, 466.4237365722656, 412.28546142578125, 988.4205322265625, 489.329345703125, -65.95408630371094, 465.7940368652344, 437.61248779296875, 233.99664306640625, 386.5852966308594, -7.051973342895508, 139.01829528808594, 503.0564270019531, 183.26016235351562, 54.501197814941406, 1009.7823486328125, -76.86134338378906, -88.12593841552734, 245.02459716796875, 816.519287109375, -28.1781005859375, 958.2681884765625, 366.4888610839844, -48.81284713745117, 276.0933532714844, -275.70263671875, 269.79559326171875, 843.5813598632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000389.npy"} +{"epoch": 0.5712187958883994, "step": 390, "batch_size": 64, "mean": 241.33123779296875, "std": 392.37335205078125, "min": -662.3434448242188, "p10": -146.04956970214843, "median": 262.4533386230469, "p90": 565.7798278808594, "max": 2276.023193359375, "pos_frac": 0.78125, "sample": [265.7914733886719, 330.9603271484375, 614.4610595703125, 526.9127197265625, -662.3434448242188, -164.31375122070312, -263.0664367675781, 134.7674102783203, 409.492431640625, 259.1152038574219, 468.6695251464844, 14.385086059570312, 303.11669921875, 406.2097473144531, 221.37603759765625, 23.381683349609375, -107.93138885498047, 2276.023193359375, -141.86087036132812, 448.26007080078125, 128.82839965820312, 549.8502197265625, 560.0068359375, 118.86259460449219, 193.3622283935547, -94.099853515625, 124.82547760009766, -74.52963256835938, 166.9630584716797, 317.69012451171875, 183.3067626953125, -39.704437255859375, -147.8447265625, -117.98673248291016, 157.7218475341797, 568.2539672851562, 835.6634521484375, 577.0773315429688, -447.95635986328125, 404.6697692871094, 545.4137573242188, 640.1200561523438, 448.4581604003906, 143.59634399414062, 581.2738037109375, 179.99220275878906, 166.8238525390625, 101.85816955566406, 528.4368896484375, 361.9198913574219, 504.5764465332031, 293.7860107421875, 302.6939392089844, 325.13641357421875, 342.8270568847656, -46.633140563964844, 2.953948974609375, -318.06634521484375, -535.4544067382812, 285.99346923828125, 494.4248046875, 83.984619140625, 320.73394775390625, 361.9834289550781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000390.npy"} +{"epoch": 0.5726872246696035, "step": 391, "batch_size": 64, "mean": 139.651611328125, "std": 288.9215393066406, "min": -628.4801635742188, "p10": -171.6012298583984, "median": 109.63602828979492, "p90": 468.2785949707031, "max": 1052.62109375, "pos_frac": 0.703125, "sample": [-46.38252639770508, 10.691123962402344, 105.75257110595703, -57.05317687988281, 176.31817626953125, 354.68048095703125, 42.33307647705078, 140.81146240234375, 35.20280838012695, 414.1448669433594, -95.23583221435547, 146.9530792236328, -221.19528198242188, -94.94139862060547, 28.302963256835938, 35.16602325439453, 396.17236328125, 118.73019409179688, 41.53412628173828, -141.79412841796875, 186.6715850830078, 470.04876708984375, 555.3619384765625, 464.148193359375, -74.0901870727539, 96.99613952636719, 1052.62109375, -233.38746643066406, -114.11642456054688, 310.539306640625, 358.4842529296875, -100.09249114990234, 82.31626892089844, -2.7690696716308594, -82.55779266357422, 231.91737365722656, 105.71802520751953, -62.308982849121094, 478.8665466308594, 154.57879638671875, 76.37505340576172, -194.22811889648438, 163.52688598632812, 165.42514038085938, 726.9627685546875, -326.4729919433594, 599.2673950195312, -75.91387176513672, -514.7798461914062, 26.593467712402344, 56.38067626953125, 281.1735534667969, -184.37570190429688, 404.0414123535156, 416.1945495605469, 113.51948547363281, 805.5078735351562, 359.80072021484375, 328.70501708984375, -628.4801635742188, 259.0635681152344, 247.0269775390625, 308.56671142578125, 254.68626403808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000391.npy"} +{"epoch": 0.5741556534508077, "step": 392, "batch_size": 64, "mean": 286.5428771972656, "std": 289.2859802246094, "min": -204.1536102294922, "p10": -62.56724395751948, "median": 232.6835479736328, "p90": 632.4061462402344, "max": 1248.43603515625, "pos_frac": 0.859375, "sample": [-0.12270355224609375, 182.00466918945312, -150.4317169189453, 56.245296478271484, 229.11781311035156, -111.97147369384766, 640.813232421875, 324.1934814453125, 365.55938720703125, 360.53131103515625, 612.7896118164062, -204.1536102294922, 303.97540283203125, 149.85165405273438, 178.73838806152344, 31.780973434448242, 656.231201171875, 802.0707397460938, 575.5274047851562, 221.62144470214844, -112.40550994873047, 419.2510986328125, 511.5806579589844, 45.17939758300781, 362.77069091796875, 406.688720703125, 219.268798828125, 1248.43603515625, -148.051025390625, 215.1832275390625, 236.24928283691406, 269.7291259765625, 437.50836181640625, 186.2528076171875, 558.4568481445312, 247.9419403076172, 377.21685791015625, 180.32130432128906, 211.9496307373047, 142.03375244140625, 396.6776123046875, 441.7107238769531, 61.67694091796875, -15.624198913574219, 534.3338623046875, 910.8653564453125, 441.10302734375, 469.3602600097656, 395.95343017578125, -173.5148162841797, 138.13694763183594, 117.75547790527344, 91.87554931640625, 328.43212890625, 170.78036499023438, 138.68743896484375, 310.46697998046875, 34.36906051635742, 1052.8690185546875, 50.10823059082031, -82.6856918334961, 79.60610961914062, 407.7310485839844, 798.134033203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000392.npy"} +{"epoch": 0.5756240822320118, "step": 393, "batch_size": 64, "mean": 223.21453857421875, "std": 358.58782958984375, "min": -298.7071228027344, "p10": -203.97174377441405, "median": 242.853759765625, "p90": 617.4684753417971, "max": 1671.9049072265625, "pos_frac": 0.703125, "sample": [253.9451446533203, 755.0984497070312, -2.7195167541503906, -110.15015411376953, 421.7265625, 518.7972412109375, -281.5785217285156, 415.9654541015625, 290.9287414550781, 244.27236938476562, 83.40283203125, 268.4793701171875, 189.88275146484375, 347.6634521484375, 352.11395263671875, 78.35590362548828, 1072.04052734375, 1003.4053955078125, 709.447021484375, -6.717964172363281, 241.43515014648438, 554.0103149414062, -273.4547119140625, -273.648193359375, -125.36079406738281, -22.725902557373047, -222.5099639892578, 264.8493347167969, 287.74444580078125, 306.7161865234375, 172.64207458496094, 444.80743408203125, 477.688232421875, 201.03781127929688, 569.6692504882812, 347.781494140625, 251.14730834960938, 142.22695922851562, 416.46014404296875, -104.63447570800781, 6.2047882080078125, 154.9403839111328, 637.953857421875, -180.30856323242188, -244.025634765625, 506.8775634765625, 304.1127624511719, -212.72055053710938, 29.52594757080078, 174.21768188476562, 141.603515625, 251.6863250732422, -173.2974090576172, -135.04605102539062, -183.557861328125, 380.3001403808594, 418.6398620605469, -298.7071228027344, 307.43634033203125, 43.408729553222656, -121.46366119384766, 1671.9049072265625, 651.3096923828125, -105.50674438476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000393.npy"} +{"epoch": 0.5770925110132159, "step": 394, "batch_size": 64, "mean": 233.43197631835938, "std": 315.04718017578125, "min": -366.4601135253906, "p10": -144.96865615844723, "median": 170.14122009277344, "p90": 657.9972045898439, "max": 1118.88671875, "pos_frac": 0.8125, "sample": [106.49180603027344, 346.41973876953125, -197.7609100341797, 369.4381408691406, 173.30319213867188, -194.99179077148438, 42.0595588684082, 113.85417175292969, 525.8363037109375, 74.20751953125, 614.4022216796875, -26.99805450439453, -366.4601135253906, 40.41758728027344, -50.77703857421875, -229.82949829101562, -119.65677642822266, 797.1328125, 676.0738525390625, -199.73033142089844, 527.33154296875, -238.47271728515625, 276.61529541015625, 330.0809631347656, 195.11488342285156, 407.93109130859375, 148.09535217285156, 7.9072418212890625, -24.981224060058594, 613.0214233398438, 615.818359375, 791.6774291992188, -155.8166046142578, 374.816162109375, 126.0479736328125, 48.32069396972656, 150.66397094726562, 34.40953826904297, 136.37799072265625, 679.3433837890625, 34.31336212158203, 534.487060546875, 18.814380645751953, 191.2201385498047, 159.06333923339844, 30.6650390625, 507.6979064941406, 956.7437744140625, 266.07586669921875, 166.979248046875, 471.360107421875, 296.78863525390625, 1118.88671875, 980.9712524414062, 347.7559814453125, 42.124900817871094, 90.22850036621094, -98.10012817382812, 248.99612426757812, 353.9927673339844, 217.65255737304688, 1.7462749481201172, 174.304931640625, 289.14288330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000394.npy"} +{"epoch": 0.57856093979442, "step": 395, "batch_size": 64, "mean": 252.64047241210938, "std": 343.5081787109375, "min": -531.4727783203125, "p10": -135.5839431762695, "median": 209.6084747314453, "p90": 761.6410522460939, "max": 1074.7318115234375, "pos_frac": 0.734375, "sample": [479.1500549316406, -273.830322265625, 195.49468994140625, 60.26277160644531, 270.43603515625, 334.767578125, 388.1055603027344, 237.21542358398438, -221.790283203125, 288.33795166015625, -152.7000274658203, 491.579833984375, 476.8879089355469, -51.702171325683594, 115.6126937866211, -27.566028594970703, 721.9281005859375, 286.5360412597656, 968.7171630859375, -54.71116638183594, 628.46826171875, 16.474403381347656, 490.48968505859375, -194.11924743652344, 247.23342895507812, -11.562347412109375, 175.40286254882812, 449.63275146484375, -104.1974105834961, 5.111289978027344, 874.4866943359375, 980.3643188476562, 542.9449462890625, 137.72781372070312, 346.57940673828125, 44.362335205078125, 152.04269409179688, 387.811767578125, 968.20703125, 155.12890625, 547.345703125, 517.1116943359375, -531.4727783203125, -38.80384826660156, -203.38247680664062, 354.038330078125, -91.80088806152344, 844.6130981445312, -9.383718490600586, -109.44215393066406, 84.333984375, -146.78756713867188, 8.481069564819336, 1074.7318115234375, 778.660888671875, 223.72225952148438, 300.36907958984375, 535.4404907226562, 158.5110626220703, 480.7515563964844, 441.4140625, 66.69889068603516, 103.2699203491211, -44.75117874145508], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000395.npy"} +{"epoch": 0.580029368575624, "step": 396, "batch_size": 64, "mean": 223.44097900390625, "std": 269.5262756347656, "min": -572.9200439453125, "p10": -44.61759910583493, "median": 162.15258026123047, "p90": 580.7717254638675, "max": 978.7274169921875, "pos_frac": 0.828125, "sample": [152.63963317871094, 628.5917358398438, 88.07463836669922, 193.48367309570312, 109.64273071289062, 610.4725952148438, 139.85574340820312, 229.32733154296875, 130.25958251953125, 262.46478271484375, 360.9987487792969, 301.32647705078125, 223.539306640625, 412.2607727050781, -133.66275024414062, 169.56349182128906, 210.61703491210938, 14.241714477539062, 64.26347351074219, 125.96556854248047, 77.54801177978516, 403.9941711425781, 882.7412109375, 321.0607604980469, 79.58920288085938, 476.6116027832031, 314.65716552734375, 129.02386474609375, 184.61871337890625, 647.5913696289062, -63.1528205871582, 391.18310546875, 141.04879760742188, 284.6595153808594, 63.7856559753418, 186.72413635253906, 468.208984375, -198.24859619140625, 479.54754638671875, 709.7294921875, 422.5369873046875, -12.45574951171875, -76.0059814453125, 511.4696960449219, -1.83929443359375, -58.401248931884766, 501.299560546875, -10.836326599121094, 43.640106201171875, 260.5312194824219, 911.5062866210938, 91.25406646728516, 14.974365234375, -104.87432861328125, -11.990127563476562, 302.353515625, 269.3258361816406, 138.63900756835938, 154.74166870117188, -572.9200439453125, 90.98818969726562, 978.7274169921875, 50.90643310546875, 131.80355834960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000396.npy"} +{"epoch": 0.5814977973568282, "step": 397, "batch_size": 64, "mean": 294.6309509277344, "std": 310.2337951660156, "min": -315.20025634765625, "p10": -17.836246871948227, "median": 239.31919860839844, "p90": 732.7612304687501, "max": 1100.436279296875, "pos_frac": 0.875, "sample": [38.494571685791016, 545.5816650390625, 7.580108642578125, 366.1274108886719, 155.13739013671875, 81.11717224121094, 269.2242431640625, 380.63671875, 520.6385498046875, 197.1071014404297, 757.5185546875, 4.9642791748046875, 607.0662841796875, -117.91143798828125, 158.33953857421875, 391.1460876464844, 228.49502563476562, -232.50326538085938, 442.1328125, 715.1552734375, 250.14337158203125, 740.306640625, 326.677490234375, 438.7029113769531, 1100.436279296875, -1.7455902099609375, 707.9205322265625, 447.82354736328125, -178.03121948242188, 147.73912048339844, 553.8992919921875, 269.35418701171875, 108.78329467773438, 951.1329345703125, 465.3045349121094, 196.60128784179688, 56.2352180480957, 381.12957763671875, 171.9725341796875, 168.97300720214844, -315.20025634765625, -186.97879028320312, 84.34886932373047, -268.6643371582031, 483.4177551269531, 128.9097442626953, 61.94788360595703, 120.46666717529297, -24.732242584228516, 526.6650390625, 46.6694450378418, 767.023681640625, 178.33505249023438, 955.0327758789062, 137.08169555664062, 1008.1690673828125, 128.32594299316406, 189.70703125, 536.368408203125, 152.00582885742188, 294.4625244140625, 320.975830078125, 412.5562438964844, 300.07977294921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000397.npy"} +{"epoch": 0.5829662261380323, "step": 398, "batch_size": 64, "mean": 269.42486572265625, "std": 354.5132141113281, "min": -374.10101318359375, "p10": -126.43796234130858, "median": 171.44021606445312, "p90": 670.9688049316406, "max": 1393.1993408203125, "pos_frac": 0.78125, "sample": [121.81124877929688, 62.54380798339844, 456.1236877441406, 700.7810668945312, -130.54249572753906, 78.37061309814453, 318.70635986328125, -307.9384765625, 92.97990417480469, -131.15887451171875, 551.9505615234375, -374.10101318359375, 673.40625, 407.06072998046875, 488.9301452636719, 1049.5216064453125, 165.6509246826172, 64.76165008544922, 618.8248291015625, 305.91827392578125, 523.19140625, 157.30850219726562, 618.2608032226562, 466.8486633300781, 1222.838134765625, -12.82244873046875, 236.67669677734375, 177.22950744628906, 665.2814331054688, 401.8486633300781, 40.43408203125, 186.37738037109375, -116.8607177734375, -24.169021606445312, 350.8211975097656, 96.06849670410156, 85.11580657958984, 385.23211669921875, 47.126441955566406, 441.20391845703125, 101.29701232910156, 476.64056396484375, 136.43515014648438, 63.80670928955078, 141.70431518554688, -50.537757873535156, 398.43670654296875, 4.473358154296875, -33.48603820800781, 53.73724365234375, 210.4791259765625, 1393.1993408203125, 610.7047119140625, 787.4501342773438, -352.8045654296875, 543.0606689453125, 435.009033203125, 562.0365600585938, 867.43505859375, 164.24705505371094, -54.36705017089844, -35.47314453125, -140.7406005859375, -201.16558837890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000398.npy"} +{"epoch": 0.5844346549192364, "step": 399, "batch_size": 64, "mean": 237.1878662109375, "std": 330.4715881347656, "min": -453.79248046875, "p10": -197.9011688232421, "median": 203.0102081298828, "p90": 641.5745605468751, "max": 1088.0712890625, "pos_frac": 0.75, "sample": [137.57180786132812, 338.63726806640625, 93.27274322509766, -284.42962646484375, 611.968994140625, -49.98015594482422, 647.7149658203125, 37.25083541870117, 147.2471923828125, -39.489501953125, 272.3608093261719, -261.1718444824219, 679.186279296875, -34.28271484375, 627.2469482421875, 186.9298553466797, 317.72930908203125, -86.13488006591797, 18.676355361938477, 90.84559631347656, 792.7872314453125, 140.66612243652344, 384.22332763671875, 220.76637268066406, 605.9594116210938, 801.9561767578125, 203.14801025390625, 382.1923828125, -308.23095703125, 239.10885620117188, -16.719161987304688, 419.2611389160156, 202.87240600585938, 139.1441192626953, 442.5179138183594, -234.01034545898438, 1088.0712890625, 503.9341125488281, 607.31298828125, -295.05364990234375, 387.6853942871094, 510.1524353027344, 192.8419647216797, -453.79248046875, -44.78609848022461, 409.08203125, 70.368896484375, 800.1270751953125, 18.861534118652344, -113.64642333984375, 465.6021423339844, 966.3667602539062, 511.27325439453125, 122.25120544433594, 564.1085815429688, 229.23658752441406, 178.25466918945312, 246.3355712890625, -358.7386779785156, 263.4276428222656, 442.1190490722656, -90.11674499511719, 191.73626708984375, -99.78597259521484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000399.npy"} +{"epoch": 0.5859030837004405, "step": 400, "batch_size": 64, "mean": 338.11065673828125, "std": 315.2834167480469, "min": -499.58160400390625, "p10": -80.38878707885739, "median": 339.76690673828125, "p90": 721.0635009765625, "max": 1158.21533203125, "pos_frac": 0.8125, "sample": [274.6685485839844, 478.17816162109375, 454.07952880859375, 256.9183654785156, 578.3345947265625, -101.78768920898438, 473.0855712890625, 231.01199340820312, -103.56609344482422, 596.1454467773438, 217.96438598632812, 81.7364273071289, 507.1319274902344, 295.861083984375, -499.58160400390625, 755.3762817382812, 681.1170654296875, 179.63580322265625, 419.806640625, 392.49847412109375, 793.414306640625, 587.2797241210938, 76.11917114257812, 376.0484619140625, 100.90636444091797, 353.34490966796875, 76.44090270996094, 538.8584594726562, 362.59808349609375, 379.92694091796875, 410.8214416503906, 174.114990234375, -95.45459747314453, 405.85369873046875, 264.5882873535156, -242.2035369873047, -10.225349426269531, 778.3825073242188, 326.18890380859375, 551.484619140625, -22.286331176757812, 192.35821533203125, 325.15447998046875, 1015.8079833984375, -16.149534225463867, 609.373779296875, 324.6605224609375, 176.01002502441406, -6.002025604248047, 1158.21533203125, 708.9314575195312, -95.88784790039062, 243.02227783203125, 469.2120361328125, 319.3391418457031, 668.3526611328125, -272.98870849609375, 725.8497314453125, 651.5064697265625, 389.264404296875, 252.47113037109375, 709.8956298828125, 781.104248046875, -45.2352294921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000400.npy"} +{"epoch": 0.5873715124816447, "step": 401, "batch_size": 64, "mean": 192.7608642578125, "std": 395.2124328613281, "min": -816.3862915039062, "p10": -215.9897262573242, "median": 119.4109878540039, "p90": 776.7494018554689, "max": 1144.490966796875, "pos_frac": 0.6875, "sample": [111.43016052246094, 527.596923828125, -44.81599044799805, 232.37152099609375, 551.397705078125, 70.3274154663086, -359.6578369140625, -371.9735412597656, -816.3862915039062, -74.66069030761719, 71.81664276123047, 239.63571166992188, -223.515869140625, 85.41405487060547, -27.94573974609375, -28.4982967376709, 374.6968688964844, 263.9377746582031, 127.39181518554688, 11.753372192382812, -71.85562133789062, -191.15733337402344, -58.22547149658203, 1006.0535888671875, 40.847755432128906, 669.711181640625, 946.5023803710938, 463.19677734375, 795.6967163085938, 204.19239807128906, 100.48585510253906, 294.9466247558594, -58.218990325927734, 49.30817413330078, -92.74811553955078, -497.4604797363281, 1000.93359375, -67.13166809082031, 243.8336944580078, 551.4302978515625, 195.38597106933594, 523.5400390625, 316.21502685546875, -40.412899017333984, 278.874267578125, 191.36279296875, 181.16200256347656, -192.7491912841797, 1144.490966796875, -507.85302734375, 650.81884765625, 483.10467529296875, 370.8598937988281, 88.43657684326172, 912.1082763671875, 882.2730712890625, 53.71186065673828, 341.84649658203125, 28.566009521484375, 59.384395599365234, 128.60955810546875, 732.5390014648438, -337.8073425292969, -198.42872619628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000401.npy"} +{"epoch": 0.5888399412628488, "step": 402, "batch_size": 64, "mean": 350.63134765625, "std": 317.7698669433594, "min": -262.6744689941406, "p10": -1.5566047668457004, "median": 281.19447326660156, "p90": 821.0599975585938, "max": 1423.74951171875, "pos_frac": 0.890625, "sample": [468.379150390625, 28.1163387298584, 1.1936264038085938, 436.416259765625, 225.4127197265625, 426.92327880859375, -7.5193023681640625, 275.0831604003906, -42.156578063964844, 682.3616333007812, 381.35546875, 579.9024658203125, 681.9989013671875, 195.39942932128906, -90.49908447265625, 596.1932373046875, -2.7352752685546875, 267.7616271972656, 211.59449768066406, 431.19158935546875, 563.841796875, 205.81478881835938, 601.0399780273438, -262.6744689941406, 356.1004943847656, 510.20928955078125, 52.177093505859375, 946.5162353515625, 49.63157653808594, 898.30126953125, 210.21804809570312, 357.26983642578125, 198.56187438964844, 454.2655029296875, 129.52615356445312, 259.04718017578125, 879.95458984375, 99.70925903320312, 180.511474609375, 726.011962890625, 8.224777221679688, -120.35443115234375, 810.0391845703125, 945.333984375, 231.96224975585938, 473.9577941894531, 825.783203125, 152.26039123535156, 319.2220153808594, 1423.74951171875, 464.02435302734375, 574.4124755859375, 48.59202575683594, 919.7720947265625, 218.4840087890625, 325.99798583984375, 333.0675048828125, 274.7002258300781, 156.92318725585938, -168.98251342773438, 359.2623291015625, 193.17019653320312, 221.09085083007812, 287.3057861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000402.npy"} +{"epoch": 0.5903083700440529, "step": 403, "batch_size": 64, "mean": 268.3475341796875, "std": 307.86566162109375, "min": -344.90106201171875, "p10": -77.46739959716794, "median": 258.1399383544922, "p90": 667.5409179687501, "max": 1165.1241455078125, "pos_frac": 0.8125, "sample": [225.11199951171875, 20.484413146972656, 83.28958892822266, 210.86529541015625, 378.8893127441406, 40.09953689575195, 250.79547119140625, 375.0314025878906, -44.15587615966797, -200.82862854003906, 145.97357177734375, -29.252044677734375, -344.90106201171875, 534.738525390625, 125.41619873046875, 864.0804443359375, 273.2659912109375, 22.520925521850586, 420.4881591796875, 295.4149169921875, 357.97576904296875, 76.07241821289062, 741.47265625, -19.70227813720703, -91.74376678466797, 576.7831420898438, 524.3794555664062, 682.508544921875, 265.4844055175781, 921.3526611328125, -139.6262664794922, 479.5031433105469, 614.6871337890625, 25.774715423583984, -18.52124786376953, 530.19775390625, -119.29427337646484, 275.7547607421875, 632.616455078125, 511.9295654296875, -174.01719665527344, 294.3140563964844, 1165.1241455078125, 20.53888511657715, 530.9653930664062, 130.77745056152344, 283.7168884277344, 764.1170654296875, 366.6388854980469, 945.20849609375, 296.16412353515625, 486.5464172363281, 366.1875305175781, 121.06269836425781, -42.4117431640625, 18.233474731445312, 51.172996520996094, 348.1732482910156, 4.2194671630859375, 99.11949157714844, 355.41070556640625, 229.3226318359375, -139.47171020507812, 178.19479370117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000403.npy"} +{"epoch": 0.591776798825257, "step": 404, "batch_size": 64, "mean": 263.1905212402344, "std": 367.5498352050781, "min": -581.5407104492188, "p10": -209.05089416503904, "median": 251.0316619873047, "p90": 733.0750488281253, "max": 1286.637939453125, "pos_frac": 0.75, "sample": [-298.51263427734375, 284.09375, 55.01439666748047, 616.640380859375, 596.3072509765625, 182.30931091308594, 195.08485412597656, 1087.9166259765625, 137.66217041015625, 797.5580444335938, 1286.637939453125, 237.02291870117188, 291.6031494140625, 872.0501708984375, -106.12085723876953, -360.7832946777344, 888.8303833007812, 100.57665252685547, -303.8775634765625, 129.68980407714844, -80.98379516601562, -18.753929138183594, -102.99214172363281, 435.85009765625, 236.37017822265625, 308.97784423828125, -224.39413452148438, 459.8589782714844, 256.18060302734375, 337.0788269042969, 538.7968139648438, -266.0541687011719, 399.61712646484375, 150.19412231445312, 582.7069091796875, 112.95793914794922, -160.28651428222656, 168.7943572998047, 308.5280456542969, 489.911376953125, -581.5407104492188, 414.8524169921875, 94.01327514648438, 607.7596435546875, 245.88272094726562, -256.20574951171875, -173.25, 542.541748046875, 619.2855224609375, -103.61613464355469, 400.5937805175781, -5.037635803222656, 385.27093505859375, 394.257080078125, 507.84796142578125, 763.2520751953125, 317.1148986816406, 48.98573303222656, 2.9203243255615234, -97.00331115722656, 895.2898559570312, 139.79254150390625, 662.6619873046875, 396.46295166015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000404.npy"} +{"epoch": 0.593245227606461, "step": 405, "batch_size": 64, "mean": 215.46173095703125, "std": 391.1243896484375, "min": -532.3126220703125, "p10": -288.88966979980466, "median": 167.55264282226562, "p90": 787.842077636719, "max": 1109.4901123046875, "pos_frac": 0.703125, "sample": [872.1931762695312, 804.448974609375, 243.83645629882812, -202.0233917236328, -487.64556884765625, -41.402862548828125, -458.3470458984375, 129.46060180664062, 401.4983215332031, -254.40301513671875, 52.582725524902344, 1108.689208984375, 546.4450073242188, 120.33370971679688, 82.29390716552734, 1109.4901123046875, -47.166046142578125, -102.77518463134766, 65.10476684570312, -149.65115356445312, 139.59521484375, 254.67262268066406, 114.17478942871094, 491.7491455078125, 169.30117797851562, 150.04893493652344, -156.27149963378906, 569.5997314453125, -387.15655517578125, -71.21422576904297, 427.5127258300781, -20.481645584106445, 422.63922119140625, 9.067144393920898, 416.478271484375, 165.80410766601562, 276.9541320800781, -299.84375, 25.557714462280273, 69.37435913085938, 521.1126708984375, 201.93466186523438, 749.0926513671875, 380.0829162597656, -33.28982925415039, 178.11068725585938, 603.2662353515625, -288.8740539550781, 251.11691284179688, 283.82855224609375, 375.43292236328125, -288.8963623046875, 229.93675231933594, 945.34423828125, 91.6680679321289, -532.3126220703125, 475.88031005859375, 329.8471374511719, -50.290069580078125, 989.773681640625, 857.3580322265625, -328.0238037109375, 550.3795776367188, 736.547607421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000405.npy"} +{"epoch": 0.5947136563876652, "step": 406, "batch_size": 64, "mean": 283.2893981933594, "std": 337.6618347167969, "min": -351.65081787109375, "p10": -88.09148178100584, "median": 221.9390869140625, "p90": 704.5681274414063, "max": 1229.801513671875, "pos_frac": 0.8125, "sample": [447.8423156738281, 145.60997009277344, 278.2334899902344, 84.13882446289062, 233.28643798828125, 211.507568359375, 186.35281372070312, -323.0037536621094, 1197.2091064453125, 170.15374755859375, 487.1400451660156, 244.3678436279297, 1229.801513671875, 574.9722900390625, 308.0594482421875, -11.247119903564453, 528.6973266601562, 307.2388610839844, 547.2990112304688, -97.00940704345703, 867.5145263671875, 355.8169250488281, 347.3258056640625, -110.66578674316406, 111.4843521118164, 130.2452850341797, 35.59765625, -67.28298950195312, 525.5972290039062, -5.918487548828125, -116.84254455566406, 196.6025848388672, 467.8813781738281, -313.52764892578125, -159.62017822265625, 513.1633911132812, 833.3294677734375, 199.0682373046875, 156.61163330078125, -13.875608444213867, 171.34268188476562, 199.34384155273438, 429.0802307128906, -38.347023010253906, 4.813385009765625, 71.49185943603516, 719.9649658203125, 346.8013610839844, 35.5540771484375, 232.37060546875, 672.2633666992188, 681.026611328125, 442.0760192871094, 21.202621459960938, -351.65081787109375, 12.052375793457031, 714.6573486328125, 554.3174438476562, 1074.270751953125, 436.8966064453125, 297.055419921875, 562.5230712890625, 97.22752380371094, 41.030982971191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000406.npy"} +{"epoch": 0.5961820851688693, "step": 407, "batch_size": 64, "mean": 171.37548828125, "std": 324.82891845703125, "min": -873.1190795898438, "p10": -261.7927963256836, "median": 158.5663299560547, "p90": 580.8947753906251, "max": 849.3638305664062, "pos_frac": 0.75, "sample": [-313.7042236328125, 556.8092041015625, 510.5126647949219, -352.3411560058594, -290.2933654785156, -91.6382064819336, 266.89129638671875, -137.68228149414062, -329.2121887207031, 278.70526123046875, 395.2393798828125, 122.71168518066406, 352.6322021484375, 686.2410278320312, 597.0001831054688, 30.121797561645508, -45.96975326538086, 478.98687744140625, 72.60370635986328, -90.11820983886719, 355.2977600097656, 104.03692626953125, 102.75935363769531, 110.13717651367188, 79.23686218261719, 508.9378662109375, 213.2528076171875, 224.27590942382812, 112.37825012207031, 148.89378356933594, -277.3645935058594, 188.21139526367188, 12.7587890625, -116.0595474243164, 37.433982849121094, 114.04093933105469, 513.0505981445312, 755.3618774414062, 62.40315628051758, -873.1190795898438, 714.78173828125, 385.4932861328125, 426.88580322265625, 8.893836975097656, 390.193603515625, 168.23887634277344, 205.2099609375, 849.3638305664062, 381.1499328613281, 287.6253662109375, 623.7909545898438, -204.2164306640625, -86.45986938476562, 548.8045654296875, 6.063713073730469, 86.3484878540039, 173.0283660888672, 523.3776245117188, -225.45860290527344, -367.6953125, 591.2171630859375, 306.13128662109375, -179.67189025878906, 281.5140075683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000407.npy"} +{"epoch": 0.5976505139500734, "step": 408, "batch_size": 64, "mean": 199.35397338867188, "std": 262.37542724609375, "min": -281.5787353515625, "p10": -160.31171722412108, "median": 222.6162567138672, "p90": 551.3729675292971, "max": 820.1580810546875, "pos_frac": 0.765625, "sample": [44.22380065917969, 673.7825317382812, 820.1580810546875, -26.48968505859375, 424.415771484375, 359.86395263671875, -101.381103515625, 145.37135314941406, -214.15423583984375, -148.94407653808594, -26.02161979675293, -281.5787353515625, 84.47077178955078, 11.3223876953125, 19.655067443847656, -229.9952850341797, 153.20115661621094, 333.86590576171875, 286.340087890625, 92.32295227050781, 212.79771423339844, 366.81329345703125, -106.61273193359375, 415.2051086425781, -122.52421569824219, 276.7889404296875, -22.022958755493164, 263.8956604003906, 78.27295684814453, 682.4100341796875, 190.1891326904297, 254.06973266601562, 519.43798828125, 20.782424926757812, -266.83489990234375, -177.50100708007812, 280.19366455078125, 269.0115966796875, 76.21672058105469, 226.70370483398438, 348.6253967285156, 391.3998718261719, 348.55517578125, 436.572998046875, 218.52880859375, -245.16567993164062, 716.4244995117188, -8.166879653930664, 31.269428253173828, 68.00973510742188, 154.15499877929688, 565.0593872070312, 267.9480895996094, -165.18356323242188, 335.35284423828125, 311.9005432128906, 48.61157989501953, 424.47393798828125, 238.15513610839844, 322.1846008300781, 665.7130126953125, 609.3946533203125, 476.03594970703125, 341.0777893066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000408.npy"} +{"epoch": 0.5991189427312775, "step": 409, "batch_size": 64, "mean": 197.1689453125, "std": 284.3892822265625, "min": -375.3445129394531, "p10": -113.63123474121092, "median": 152.73265838623047, "p90": 554.8817016601564, "max": 939.827880859375, "pos_frac": 0.734375, "sample": [-39.148162841796875, -360.309814453125, 275.1625061035156, 211.21517944335938, 209.88677978515625, 326.0477294921875, -96.43924713134766, -91.885986328125, -55.665283203125, 139.64671325683594, 501.2615966796875, 504.5274353027344, -284.5538635253906, 153.34951782226562, 405.0556640625, 134.28067016601562, 219.48709106445312, 278.8916320800781, 170.95986938476562, 791.273193359375, 307.76580810546875, -120.99922943115234, 328.00823974609375, 61.00645065307617, 634.7122192382812, -131.95819091796875, -4.688819885253906, 21.070587158203125, -41.910980224609375, 152.1157989501953, 421.68243408203125, 728.9144287109375, 526.6717529296875, -375.3445129394531, 58.897682189941406, 939.827880859375, -1.8951644897460938, 96.9169921875, -336.50909423828125, 114.31156158447266, 479.3385314941406, -6.475547790527344, 566.9716796875, 101.80342102050781, 240.47705078125, 264.2730407714844, 80.94062042236328, 146.8148193359375, 802.3175659179688, 518.5272216796875, -34.35120391845703, 188.07666015625, -160.51124572753906, 204.75267028808594, 197.64576721191406, 445.4736022949219, 358.1229248046875, 319.9856872558594, 114.98721313476562, 69.00715637207031, 766.2042236328125, -24.255599975585938, 68.21160888671875, 138.8350830078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000409.npy"} +{"epoch": 0.6005873715124816, "step": 410, "batch_size": 64, "mean": 228.97573852539062, "std": 282.6621398925781, "min": -582.3479614257812, "p10": -92.02460708618163, "median": 231.5849609375, "p90": 598.9407226562502, "max": 871.841064453125, "pos_frac": 0.796875, "sample": [208.71292114257812, 292.20361328125, -119.7322998046875, 72.54120635986328, 498.6546630859375, 341.7688903808594, 515.848388671875, 484.3288269042969, 424.0709228515625, 15.912897109985352, 150.48255920410156, 229.12741088867188, 12.050643920898438, 469.5980224609375, -97.88434600830078, -129.40777587890625, 625.3052978515625, -418.9066162109375, -8.84006118774414, 369.0472412109375, 412.3575439453125, 707.2344970703125, -78.35188293457031, 83.19996643066406, 618.7451171875, 642.4994506835938, 431.50616455078125, 19.145381927490234, 364.531494140625, 871.841064453125, 291.5876770019531, 26.135013580322266, 356.3816833496094, 60.098175048828125, 382.04296875, -30.755889892578125, -54.171485900878906, -66.48969268798828, 124.225341796875, -150.7796630859375, 178.65126037597656, 440.92034912109375, 162.30816650390625, 403.3916015625, 203.16653442382812, -582.3479614257812, 234.04251098632812, 247.3255157470703, 685.8773803710938, 347.77716064453125, 109.2735595703125, -3.1500492095947266, 418.04046630859375, 538.6793212890625, 293.14227294921875, 725.6133422851562, 163.92176818847656, 347.3605041503906, -348.66314697265625, 111.75529479980469, 244.16183471679688, 29.59474754333496, 552.73046875, 205.0089569091797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000410.npy"} +{"epoch": 0.6020558002936858, "step": 411, "batch_size": 64, "mean": 230.6470947265625, "std": 336.9100341796875, "min": -545.6041259765625, "p10": -82.0435432434082, "median": 167.1031723022461, "p90": 648.199383544922, "max": 1370.7257080078125, "pos_frac": 0.734375, "sample": [247.113525390625, 18.087059020996094, -169.723876953125, 83.21456146240234, 564.8941650390625, 91.48812103271484, 74.29931640625, 261.4012145996094, 409.04461669921875, 386.96563720703125, 146.041259765625, -33.161048889160156, 180.329833984375, -54.610958099365234, 31.927160263061523, -13.891481399536133, 466.81427001953125, 281.0556945800781, 82.98577880859375, 421.1095886230469, 867.9778442382812, 346.6117858886719, -184.6480712890625, -23.078039169311523, 140.20785522460938, 160.04112243652344, -3.2692108154296875, 273.716064453125, 114.55447387695312, 1054.056640625, 129.46444702148438, 434.4113464355469, 861.0021362304688, -72.0785140991211, 520.5014038085938, 18.876739501953125, 376.7003479003906, 620.5962524414062, 692.9749145507812, 177.00833129882812, 28.61549949645996, 369.80511474609375, 148.232421875, 312.030029296875, 154.97256469726562, 331.3791809082031, -28.614391326904297, -545.6041259765625, 1370.7257080078125, 262.2427673339844, 176.1421661376953, 377.68804931640625, 584.3035888671875, 657.39990234375, 626.7315063476562, -198.57901000976562, -21.46228790283203, -440.97882080078125, 174.16522216796875, -86.31427001953125, 786.0245361328125, -162.78965759277344, -24.944778442382812, -70.76884460449219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000411.npy"} +{"epoch": 0.6035242290748899, "step": 412, "batch_size": 64, "mean": 243.07411193847656, "std": 318.76434326171875, "min": -477.32220458984375, "p10": -130.01181182861328, "median": 220.8287353515625, "p90": 599.2802490234376, "max": 1179.9510498046875, "pos_frac": 0.765625, "sample": [-158.27012634277344, 177.36895751953125, 382.7421875, 490.5201416015625, 380.5064697265625, -134.91415405273438, 733.2644653320312, -60.52442169189453, 208.12557983398438, 563.389404296875, 104.89566040039062, 382.8505554199219, 70.1719970703125, 334.79083251953125, -3.7786426544189453, 515.1806640625, 319.71771240234375, 66.65335845947266, -477.32220458984375, 232.75082397460938, 39.44672393798828, 40.982154846191406, 622.7360229492188, 203.26963806152344, 85.41773986816406, 412.4625549316406, -10.554534912109375, 377.0055236816406, -160.42611694335938, 212.01080322265625, -50.22783660888672, 43.81372833251953, 229.64666748046875, 295.9822998046875, 184.43260192871094, -50.03111267089844, -308.5654296875, -149.36512756347656, 114.39800262451172, 946.529541015625, 8.7999267578125, -434.18511962890625, 333.05438232421875, 554.8623657226562, 703.3291015625, -118.57301330566406, 538.2920532226562, 794.8870239257812, 48.076324462890625, 517.7313232421875, 354.9781494140625, -68.00656127929688, 110.49085998535156, 582.308837890625, 581.3128662109375, 1179.9510498046875, 18.076915740966797, 388.80511474609375, 415.45880126953125, -82.60403442382812, 467.65838623046875, 326.7456970214844, 606.5537109375, 521.6556396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000412.npy"} +{"epoch": 0.604992657856094, "step": 413, "batch_size": 64, "mean": 238.66033935546875, "std": 300.4703369140625, "min": -719.5596313476562, "p10": -132.94041671752922, "median": 246.29509735107422, "p90": 598.6456909179689, "max": 1012.4501342773438, "pos_frac": 0.859375, "sample": [67.4180908203125, 62.84218215942383, 142.92681884765625, 378.6451721191406, 147.27032470703125, 83.69859313964844, 210.25161743164062, 237.07003784179688, 50.259132385253906, 201.19052124023438, 663.1674194335938, 927.8065185546875, -56.62860870361328, 264.32366943359375, 31.955726623535156, -26.79835319519043, 374.66461181640625, 140.05712890625, 51.96748352050781, 781.2077026367188, 90.87196350097656, 462.4417724609375, 357.4571838378906, -719.5596313476562, 78.178955078125, 255.52015686035156, 280.58599853515625, 287.9378662109375, 628.1552734375, 1012.4501342773438, 357.983642578125, 354.6357421875, 560.3837890625, 342.8978271484375, -165.64547729492188, 560.2416381835938, 326.66522216796875, 161.2135009765625, 321.16961669921875, 318.0616455078125, 116.64958190917969, 158.5724639892578, 568.6671142578125, -295.1612548828125, 611.49365234375, 369.6800537109375, 204.16366577148438, -289.0309753417969, -285.0711669921875, -393.61846923828125, 200.90528869628906, 411.67742919921875, 445.093994140625, 641.022216796875, 19.439876556396484, 422.58660888671875, 307.7952575683594, 481.3161315917969, 188.7069549560547, 295.2352294921875, 114.18658447265625, 217.02027893066406, 399.60186767578125, -241.61383056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000413.npy"} +{"epoch": 0.6064610866372981, "step": 414, "batch_size": 64, "mean": 262.623779296875, "std": 312.8013916015625, "min": -688.9818115234375, "p10": -39.92876396179199, "median": 262.8604736328125, "p90": 673.5169311523438, "max": 987.6622314453125, "pos_frac": 0.796875, "sample": [349.039794921875, 266.2633361816406, 403.6090087890625, 266.36212158203125, -6.140041351318359, 900.2952270507812, 190.40399169921875, -688.9818115234375, 25.480026245117188, 201.2352294921875, 327.9910888671875, 266.9580383300781, 656.96533203125, 883.192138671875, 262.6720275878906, -89.10786437988281, 360.0267333984375, 92.89413452148438, -79.58162689208984, 161.7309112548828, 256.35113525390625, 514.55810546875, -34.9576301574707, 140.04043579101562, 473.1497802734375, 277.06353759765625, -192.88125610351562, -10.43931770324707, 447.870361328125, 86.04353332519531, 680.6104736328125, 487.52972412109375, -31.588603973388672, 339.55206298828125, 703.656494140625, 55.41451644897461, 395.87164306640625, -42.05924987792969, -411.7901611328125, 263.0489196777344, 329.08575439453125, 222.86190795898438, 590.2991333007812, -271.1608581542969, 74.41436767578125, 488.9073486328125, 31.074981689453125, 617.0107421875, 106.6562271118164, -5.505359649658203, 339.8614501953125, 179.1416473388672, 147.06692504882812, 123.9037857055664, 259.54833984375, 307.81982421875, 837.509033203125, -0.515960693359375, 449.0989990234375, 943.9207153320312, 987.6622314453125, 395.29888916015625, 150.40151977539062, 355.20703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000414.npy"} +{"epoch": 0.6079295154185022, "step": 415, "batch_size": 64, "mean": 198.6478271484375, "std": 307.19818115234375, "min": -870.1644287109375, "p10": -131.20554428100584, "median": 202.99459075927734, "p90": 589.886767578125, "max": 965.5198974609375, "pos_frac": 0.75, "sample": [292.6638488769531, -121.0565185546875, 362.01678466796875, 774.4862670898438, -41.90464782714844, 745.4720458984375, 598.2584228515625, 166.2056121826172, -31.80596160888672, 220.4238739013672, 516.3147583007812, 377.5472106933594, 477.45184326171875, 418.77447509765625, -127.09093475341797, -132.9689483642578, 202.45291137695312, -291.5783386230469, 324.20416259765625, 214.90708923339844, 6.561796188354492, 32.29827880859375, 6.093841552734375, 668.5350341796875, -193.86978149414062, 127.66352081298828, 736.472900390625, -42.515235900878906, 83.69782257080078, 305.87957763671875, -41.5523681640625, 392.8213806152344, 278.3681945800781, 63.77850341796875, 965.5198974609375, -306.0286865234375, -110.17671203613281, 123.40936279296875, 461.73785400390625, 738.192138671875, 114.71861267089844, 354.2250671386719, 159.137451171875, 93.1400146484375, 136.10650634765625, 203.53627014160156, -9.924657821655273, 268.99481201171875, 420.13623046875, -136.4527587890625, 204.707763671875, 436.8247375488281, 343.6966247558594, 14.392730712890625, 338.021728515625, -18.10041618347168, 224.99844360351562, 3.652801513671875, 453.91522216796875, -200.47390747070312, 570.3529052734375, 112.96824645996094, -870.1644287109375, 253.38986206054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000415.npy"} +{"epoch": 0.6093979441997063, "step": 416, "batch_size": 64, "mean": 318.10662841796875, "std": 295.9786071777344, "min": -174.4176025390625, "p10": -57.99577674865722, "median": 260.36253356933594, "p90": 729.4890197753906, "max": 1036.03076171875, "pos_frac": 0.859375, "sample": [862.2131958007812, 143.32427978515625, 183.832763671875, 420.01507568359375, -104.83345794677734, 571.1655883789062, 232.17327880859375, 110.77916717529297, 482.31781005859375, 729.8838500976562, -60.388282775878906, 6.634124755859375, 62.941200256347656, 280.9063720703125, 254.34927368164062, 541.0580444335938, 331.9503173828125, 15.111656188964844, 622.2493896484375, 173.4408721923828, 455.89837646484375, 547.436279296875, 63.3376579284668, 184.53024291992188, 594.5239868164062, 266.37579345703125, 347.9117126464844, 838.7980346679688, 137.45370483398438, 728.5677490234375, 217.5502471923828, 586.6007690429688, 1001.920654296875, -174.4176025390625, -154.61593627929688, 84.0592269897461, 532.6575317382812, 494.19769287109375, 24.50902557373047, 860.500732421875, -20.60938262939453, 1036.03076171875, 271.6890563964844, 205.94190979003906, 481.42694091796875, 553.5386352539062, 326.25604248046875, 604.135009765625, 231.24945068359375, 113.92267608642578, 14.707866668701172, 218.04652404785156, -113.86343383789062, 767.713623046875, 342.5980529785156, -52.706024169921875, 324.99566650390625, -60.262813568115234, 228.52210998535156, -68.40922546386719, 122.54528045654297, 244.81021118164062, 428.2255554199219, 661.3988037109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000416.npy"} +{"epoch": 0.6108663729809104, "step": 417, "batch_size": 64, "mean": 259.7749328613281, "std": 270.2167663574219, "min": -260.778076171875, "p10": -81.41023635864256, "median": 221.3822250366211, "p90": 657.9479064941406, "max": 973.9407958984375, "pos_frac": 0.84375, "sample": [30.29218101501465, 17.794692993164062, -7.510747909545898, 286.67657470703125, 7.378124237060547, 567.2843017578125, -9.306442260742188, 214.48745727539062, -146.34939575195312, 661.4649047851562, 55.16342544555664, 618.2613525390625, 414.50872802734375, 127.20806884765625, 188.3666229248047, -84.95439147949219, -94.21867370605469, 708.942626953125, 703.4375, 62.681365966796875, 634.6029052734375, 299.6952209472656, 757.6536254882812, 213.9947509765625, 577.832275390625, 973.9407958984375, 241.98941040039062, 226.62246704101562, 156.54330444335938, 336.02935791015625, 82.4317398071289, 135.5421905517578, -125.53189849853516, 112.34992218017578, 327.745849609375, 600.7731323242188, 28.58157730102539, 291.2170715332031, 378.267822265625, 250.42604064941406, 91.10960388183594, 316.85894775390625, 216.14198303222656, 331.7861633300781, -122.0628662109375, 254.8885498046875, 130.30731201171875, 138.20938110351562, 524.6685180664062, 297.71759033203125, 466.1448974609375, -260.778076171875, 209.42337036132812, 408.9775085449219, 2.8169326782226562, -182.00302124023438, 234.47003173828125, 131.47869873046875, 699.777099609375, 475.81756591796875, 649.7415771484375, 167.2653045654297, 693.6636962890625, -73.14054107666016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000417.npy"} +{"epoch": 0.6123348017621145, "step": 418, "batch_size": 64, "mean": 274.78253173828125, "std": 379.2420654296875, "min": -456.14959716796875, "p10": -148.7391632080078, "median": 201.94451904296875, "p90": 707.0582336425782, "max": 1325.1104736328125, "pos_frac": 0.75, "sample": [217.91510009765625, -138.88308715820312, 219.03915405273438, -14.702091217041016, 109.29606628417969, 1000.3069458007812, 66.66834259033203, 62.565189361572266, 713.6193237304688, 565.5133056640625, -7.75750732421875, -35.720855712890625, 385.3734130859375, -5.342155456542969, 1164.605224609375, 624.049072265625, 326.73028564453125, -165.44105529785156, 544.598388671875, 53.27069091796875, -311.5377502441406, 691.7490234375, -117.90530395507812, 546.1982421875, 229.4366455078125, 1.2489166259765625, 303.2669677734375, 630.7600708007812, 572.935546875, 185.97393798828125, 546.0245971679688, 68.30879211425781, 97.69226837158203, 834.6673583984375, -456.14959716796875, -58.730491638183594, -195.16244506835938, 456.51019287109375, 227.1468963623047, 484.4761962890625, 1325.1104736328125, -55.236366271972656, 71.417236328125, 146.74142456054688, 304.6390380859375, 149.11875915527344, 170.61041259765625, 138.53317260742188, -317.2598876953125, -282.4730224609375, 370.0955810546875, 77.22242736816406, 609.7135009765625, -103.13401794433594, 607.4625854492188, 304.71990966796875, 618.088134765625, 1116.237060546875, 138.98007202148438, 34.87888717651367, 593.9385986328125, 975.7083740234375, 321.3188171386719, -152.96319580078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000418.npy"} +{"epoch": 0.6138032305433186, "step": 419, "batch_size": 64, "mean": 301.0840759277344, "std": 337.7168273925781, "min": -519.9669189453125, "p10": -121.15608062744133, "median": 303.91773986816406, "p90": 791.5593994140626, "max": 974.0504150390625, "pos_frac": 0.84375, "sample": [802.4600830078125, -421.76531982421875, 381.2976989746094, 568.5526123046875, 80.38782501220703, 865.969970703125, -29.62191390991211, -47.9073371887207, 26.90838623046875, 695.77001953125, 945.393798828125, 168.24429321289062, 314.9159851074219, 557.5628662109375, 460.9675598144531, 544.3795776367188, 226.12533569335938, 138.85076904296875, -519.9669189453125, 57.061973571777344, -152.54840087890625, 776.447021484375, -259.3453063964844, 292.91949462890625, 611.0562133789062, -218.6505126953125, 168.91629028320312, 124.4585952758789, 462.99639892578125, 916.4842529296875, 401.9894714355469, 95.904052734375, 424.6117248535156, 324.552734375, 432.78948974609375, 88.00371551513672, 121.93269348144531, 23.38677978515625, 521.532470703125, 356.31158447265625, 622.546630859375, 188.441162109375, 798.0361328125, 43.29740524291992, 88.5453872680664, 64.39994812011719, 62.31915283203125, -180.16702270507812, 704.4232177734375, 595.7628784179688, 608.2877197265625, 422.7348937988281, 99.248291015625, 974.0504150390625, 410.859130859375, 423.1433410644531, 490.30078125, 231.34173583984375, 119.44082641601562, 804.3206176757812, -209.92234802246094, -5.989288330078125, 169.67373657226562, 414.9504699707031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000419.npy"} +{"epoch": 0.6152716593245228, "step": 420, "batch_size": 64, "mean": 175.04345703125, "std": 404.20941162109375, "min": -922.3924560546875, "p10": -260.60480346679685, "median": 141.72257232666016, "p90": 631.7746154785157, "max": 1244.71484375, "pos_frac": 0.640625, "sample": [-166.4317626953125, 203.75625610351562, -15.587928771972656, -12.65286636352539, 634.4232177734375, -87.1158676147461, -281.7257995605469, -17.46331787109375, 210.9411163330078, 604.392578125, -6.63325309753418, -56.09654998779297, -46.2691535949707, 182.94224548339844, -21.85662841796875, 108.09009552001953, 608.3553466796875, 261.16229248046875, 84.20869445800781, 210.48892211914062, 15.217710494995117, 239.21560668945312, -211.32247924804688, 417.1786193847656, -105.57382202148438, 139.14492797851562, -79.80024719238281, 443.41534423828125, 163.846435546875, -91.25955200195312, 475.8403625488281, 475.12811279296875, 472.2303466796875, -328.90936279296875, 106.34285736083984, 187.1236572265625, -306.17181396484375, 22.43329620361328, 703.0238037109375, 770.91162109375, -13.510213851928711, -922.3924560546875, 641.1181640625, 446.2230529785156, 625.5945434570312, 152.028076171875, 560.318115234375, 436.0422668457031, 376.61016845703125, 348.09490966796875, 33.69055938720703, -485.6843566894531, 285.1578369140625, 49.72146987915039, -66.54972839355469, 1221.1636962890625, -770.4302978515625, 80.21717834472656, 144.3002166748047, 216.884033203125, 1244.71484375, -108.2766342163086, -417.1904602050781, 1219.9921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000420.npy"} +{"epoch": 0.6167400881057269, "step": 421, "batch_size": 64, "mean": 206.85391235351562, "std": 321.661865234375, "min": -895.900390625, "p10": -186.62778472900385, "median": 255.9329376220703, "p90": 602.7158142089844, "max": 870.4821166992188, "pos_frac": 0.75, "sample": [335.35595703125, 91.90191650390625, 132.49278259277344, -76.28687286376953, 271.5566711425781, 438.269775390625, 870.4821166992188, 1.954833984375, 101.09373474121094, 75.61000061035156, 285.13787841796875, -4.864048004150391, 590.437744140625, 662.0996704101562, 502.63751220703125, 659.0267333984375, 316.431396484375, -209.34922790527344, 115.82105255126953, 159.92257690429688, 775.77001953125, 477.949951171875, 119.75933837890625, -242.39859008789062, 234.1837921142578, 258.937255859375, 440.9156494140625, 285.09442138671875, -13.399253845214844, 212.84405517578125, 635.09228515625, 378.6275939941406, 607.9778442382812, 252.92861938476562, 339.7785339355469, -532.373291015625, 497.17218017578125, 344.1861572265625, -424.6907653808594, 555.5707397460938, 359.14300537109375, -20.417593002319336, 350.718017578125, 869.8255615234375, 259.6699523925781, 301.00799560546875, 162.57691955566406, -29.28361701965332, 30.83329200744629, 478.97418212890625, 105.49020385742188, -7.119682312011719, 285.468017578125, -895.900390625, -251.3241729736328, 126.20619201660156, -66.515625, -133.611083984375, -96.27099609375, 270.44854736328125, 190.98728942871094, -272.0531311035156, 320.9631652832031, 375.1748046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000421.npy"} +{"epoch": 0.618208516886931, "step": 422, "batch_size": 64, "mean": 241.48162841796875, "std": 346.12890625, "min": -782.71875, "p10": -134.23046875, "median": 230.62425231933594, "p90": 682.5755920410156, "max": 1202.6287841796875, "pos_frac": 0.78125, "sample": [312.0087890625, 682.7091674804688, 771.1266479492188, 513.7540893554688, 357.08892822265625, -187.21595764160156, -620.9985961914062, 783.1788330078125, -54.05516815185547, 1202.6287841796875, -147.7425079345703, 178.26551818847656, 248.4519500732422, 48.61083984375, 405.3426513671875, 190.15243530273438, 63.20049285888672, -128.12277221679688, -44.68716049194336, 186.3659210205078, 21.475601196289062, 808.7027587890625, 338.6158447265625, 138.23687744140625, 447.5605163574219, 168.56332397460938, 188.04534912109375, 683.1207275390625, 262.9815368652344, 583.7330322265625, 757.3829956054688, 347.97589111328125, 407.7019958496094, 212.7965545654297, 360.0581359863281, 347.7673034667969, 318.78515625, -428.4002685546875, 291.7359313964844, 40.05169677734375, -46.4642219543457, 389.96514892578125, 210.633544921875, -782.71875, -72.63711547851562, 595.0206298828125, 488.12567138671875, 29.76704216003418, 682.263916015625, -109.50462341308594, 408.66461181640625, 182.10523986816406, 20.95855712890625, 70.83558654785156, 643.6079711914062, 461.7234191894531, 380.70635986328125, -64.65217590332031, 509.0077819824219, 495.7387390136719, -136.84805297851562, 143.1200408935547, 189.37677001953125, -290.92498779296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000422.npy"} +{"epoch": 0.6196769456681351, "step": 423, "batch_size": 64, "mean": 272.1937561035156, "std": 310.9986572265625, "min": -431.39471435546875, "p10": -86.6031585693359, "median": 217.64097595214844, "p90": 715.9066833496096, "max": 1097.23779296875, "pos_frac": 0.78125, "sample": [273.6288146972656, 92.75792694091797, 182.31724548339844, 36.48382568359375, -99.97140502929688, -331.0894470214844, 798.2338256835938, 213.68869018554688, 638.423095703125, 475.0898132324219, 731.8250122070312, -136.91070556640625, 909.4207763671875, 293.43731689453125, 503.0030822753906, -31.99545669555664, 797.435546875, 812.367919921875, 794.7188720703125, 678.763916015625, -21.25262451171875, -50.5589599609375, 403.90838623046875, 198.90676879882812, 593.2373657226562, 254.66815185546875, 98.95587921142578, 60.876861572265625, -431.39471435546875, -131.58230590820312, 371.5634765625, -128.3828582763672, 307.7250671386719, 324.8765869140625, 647.2735595703125, 216.34149169921875, -2.9723987579345703, 508.7198486328125, -139.96009826660156, 137.07208251953125, 314.383544921875, 528.2266845703125, 218.94046020507812, 341.3523254394531, 1097.23779296875, 190.76255798339844, 454.7349548339844, -28.943649291992188, 443.92535400390625, 18.056472778320312, 59.41267013549805, 165.0417938232422, 509.892578125, 435.1011657714844, 172.92156982421875, 184.2457733154297, -55.41058349609375, 219.6685791015625, 173.778564453125, -42.938507080078125, 577.2544555664062, 96.70792388916016, 124.69247436523438, 371.7063903808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000423.npy"} +{"epoch": 0.6211453744493393, "step": 424, "batch_size": 64, "mean": 246.94146728515625, "std": 335.9071350097656, "min": -346.24932861328125, "p10": -190.9647735595703, "median": 227.43800354003906, "p90": 631.04404296875, "max": 1261.65380859375, "pos_frac": 0.796875, "sample": [-256.3470764160156, 341.2500915527344, 56.54417419433594, -24.45193099975586, -56.39727783203125, 531.5452880859375, 435.2151184082031, 299.4146423339844, 82.95377349853516, -346.24932861328125, 258.6968688964844, 273.84576416015625, 22.140275955200195, -236.51097106933594, 265.5362243652344, 683.2979736328125, 469.6280212402344, 888.0587158203125, 133.0167694091797, 423.4003601074219, 517.8180541992188, 176.47666931152344, 633.332275390625, 625.704833984375, 322.53314208984375, -129.63491821289062, 230.6537322998047, 95.73722076416016, -212.26763916015625, -194.57012939453125, 119.98332977294922, 1125.13232421875, -182.55227661132812, 302.8516540527344, 405.3380126953125, -124.5113525390625, 353.47283935546875, -282.01025390625, 234.56719970703125, 79.96560668945312, -83.80797576904297, 138.46347045898438, -325.70855712890625, 66.49371337890625, 270.5622863769531, 299.4455261230469, 211.47537231445312, 83.84809112548828, 336.2838439941406, 1261.65380859375, 365.1329650878906, 612.4549560546875, 376.4134826660156, 181.5653076171875, 374.74896240234375, 90.34736633300781, 813.6777954101562, 164.72793579101562, 166.7066650390625, 1087.6466064453125, 467.75970458984375, 224.22227478027344, 129.5416717529297, 147.990478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000424.npy"} +{"epoch": 0.6226138032305433, "step": 425, "batch_size": 64, "mean": 190.22943115234375, "std": 327.501953125, "min": -832.8908081054688, "p10": -142.7957015991211, "median": 196.04537963867188, "p90": 594.6785522460938, "max": 811.0615234375, "pos_frac": 0.765625, "sample": [-91.33145141601562, 21.942487716674805, 428.1379089355469, -336.84429931640625, -832.8908081054688, 189.58438110351562, 184.8005828857422, 275.09991455078125, 125.54391479492188, 192.99977111816406, -75.45134735107422, 520.6321411132812, -771.5028076171875, -60.83449172973633, 282.0426940917969, 198.03656005859375, 121.31632232666016, 151.4298095703125, 598.7151489257812, 24.894729614257812, 356.33258056640625, 195.80706787109375, -1.6959495544433594, 238.90357971191406, 643.637939453125, -43.50530242919922, 477.2652587890625, 71.41828918457031, -78.34757995605469, 791.2275390625, 136.82168579101562, 585.2598266601562, 616.5952758789062, -214.9292449951172, 176.60047912597656, 259.2447814941406, 678.9827270507812, 163.0972137451172, 83.43536376953125, 751.666259765625, -481.53875732421875, 396.99578857421875, 100.19929504394531, -347.1521301269531, 489.4547424316406, 357.82122802734375, 446.8091735839844, 226.81683349609375, 6.14617919921875, 368.26239013671875, 196.28369140625, 233.8297119140625, -143.4524688720703, -141.26324462890625, 258.9889221191406, 355.4774169921875, 492.511962890625, 811.0615234375, 479.7212829589844, 263.8235168457031, 481.50799560546875, -128.18142700195312, 259.8353271484375, 156.58612060546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000425.npy"} +{"epoch": 0.6240822320117474, "step": 426, "batch_size": 64, "mean": 240.19671630859375, "std": 305.95257568359375, "min": -649.3045654296875, "p10": -65.49773178100583, "median": 183.42970275878906, "p90": 687.3767822265627, "max": 855.2794799804688, "pos_frac": 0.796875, "sample": [259.48126220703125, 105.62689971923828, 106.9115982055664, 172.624267578125, 230.98153686523438, 74.62117004394531, 104.94891357421875, 223.87744140625, 742.2926025390625, 159.16798400878906, 165.10462951660156, 237.40737915039062, 11.208797454833984, -0.17346763610839844, 638.1505126953125, -241.86749267578125, 64.32127380371094, 207.55160522460938, 499.0206604003906, 166.71295166015625, -78.05020904541016, 768.6681518554688, -138.68389892578125, 194.23513793945312, 126.28953552246094, 111.45455932617188, -113.47737121582031, 358.9857482910156, 780.8685302734375, 387.9369812011719, 226.7650909423828, 616.98095703125, 68.98114776611328, 348.1797180175781, 165.97401428222656, 825.049560546875, -36.2086181640625, -356.19183349609375, 117.82454681396484, 493.667236328125, -35.98187255859375, 343.10205078125, 855.2794799804688, 75.65868377685547, 449.18597412109375, 52.292686462402344, 348.7048645019531, 543.7017211914062, -21.2967529296875, 102.72114562988281, -290.38140869140625, 354.6189880371094, -11.714698791503906, 435.56903076171875, 558.0777587890625, 308.215576171875, -16.59795570373535, 466.248779296875, 829.3765869140625, -649.3045654296875, 584.3741455078125, 708.4737548828125, 47.301605224609375, 537.7457275390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000426.npy"} +{"epoch": 0.6255506607929515, "step": 427, "batch_size": 64, "mean": 180.07781982421875, "std": 286.99896240234375, "min": -452.6756591796875, "p10": -160.45197906494138, "median": 154.61355590820312, "p90": 443.8719970703126, "max": 956.353759765625, "pos_frac": 0.765625, "sample": [332.7876281738281, 55.31534957885742, 320.97662353515625, 410.6332092285156, 818.9610595703125, 82.70931243896484, -51.870338439941406, 12.037452697753906, 386.8540954589844, 246.35202026367188, -22.863189697265625, 60.68513488769531, 118.33151245117188, 154.13050842285156, 155.0966033935547, 359.306396484375, 417.2903137207031, 888.8665771484375, 34.122596740722656, -237.89175415039062, 37.307212829589844, 133.13543701171875, 306.92547607421875, 325.0092468261719, 107.8171615600586, 305.1351318359375, 269.8572998046875, 902.0559692382812, 29.094100952148438, 170.7053680419922, -220.6746826171875, 487.8512268066406, 423.2862548828125, -114.8899917602539, -92.65388488769531, 280.63897705078125, 338.00567626953125, -57.334896087646484, 452.6944580078125, 189.54458618164062, 224.35955810546875, 115.63475036621094, 134.23550415039062, 141.13925170898438, -452.6756591796875, 270.0388488769531, 414.80511474609375, 395.4725036621094, -350.347900390625, -304.31524658203125, 483.76416015625, 219.96160888671875, 393.0436096191406, 2.9090805053710938, 956.353759765625, -119.99362182617188, 62.0302734375, 312.3968811035156, 364.0513610839844, 106.0516586303711, -140.0765380859375, -324.6298522949219, -25.385887145996094, -169.18431091308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000427.npy"} +{"epoch": 0.6270190895741556, "step": 428, "batch_size": 64, "mean": 253.51040649414062, "std": 369.8224792480469, "min": -682.4998168945312, "p10": -152.75807266235347, "median": 238.04273986816406, "p90": 739.6875732421875, "max": 1103.1668701171875, "pos_frac": 0.765625, "sample": [-47.02299118041992, -196.83297729492188, 19.256555557250977, 420.3139953613281, 821.05419921875, 407.64947509765625, 112.47679138183594, 538.272216796875, 418.4053955078125, 1103.1668701171875, -224.65127563476562, 55.04779052734375, 510.7420959472656, 215.02694702148438, 442.7959899902344, -98.66424560546875, -585.969482421875, 740.7425537109375, 465.38189697265625, 563.8134765625, 484.20111083984375, 401.93115234375, -79.82119750976562, 26.25320053100586, 474.7798767089844, 752.5039672851562, 378.47210693359375, 210.275634765625, -96.5399169921875, 274.4470520019531, -58.13677215576172, 368.52471923828125, 892.50146484375, -88.7422103881836, 261.05853271484375, 29.54680061340332, 564.3763427734375, 494.8524169921875, 26.781299591064453, 198.90325927734375, 96.69308471679688, 153.1824951171875, 3.6314010620117188, -117.99662017822266, -591.3901977539062, -76.86238098144531, 68.53977966308594, 126.44049835205078, 1086.1376953125, 168.70321655273438, 451.9906005859375, 450.42596435546875, 549.729736328125, -682.4998168945312, 553.6224975585938, 277.3660888671875, 737.2259521484375, 769.0225830078125, 369.54498291015625, 179.3059539794922, 682.2373046875, -167.6558380126953, 126.06565856933594, -185.96810913085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000428.npy"} +{"epoch": 0.6284875183553598, "step": 429, "batch_size": 64, "mean": 192.28273010253906, "std": 349.7599182128906, "min": -621.39306640625, "p10": -232.1340148925781, "median": 193.67237854003906, "p90": 609.4571044921876, "max": 1030.1611328125, "pos_frac": 0.734375, "sample": [70.0045394897461, 157.658447265625, -380.669189453125, 319.6271057128906, -175.19070434570312, 619.2205810546875, -122.85692596435547, 124.62964630126953, 164.54656982421875, 362.60272216796875, -406.81707763671875, 179.50076293945312, 57.34862518310547, 132.17372131347656, -278.17559814453125, 342.3856506347656, 129.76480102539062, -233.44593811035156, 715.6298828125, -218.38771057128906, 1030.1611328125, 954.036865234375, 62.3221435546875, 374.1121520996094, 227.14675903320312, 38.08249282836914, 333.25897216796875, 8.180095672607422, 650.5851440429688, 519.394775390625, 240.25860595703125, 389.75665283203125, 382.97015380859375, -602.6761474609375, -78.4372329711914, -229.07286071777344, -240.56283569335938, 216.34388732910156, 236.5827178955078, -111.4657211303711, 573.329833984375, 370.77166748046875, 281.48834228515625, -25.671478271484375, 811.2740478515625, 294.2845458984375, 555.0064697265625, -210.90078735351562, 137.78904724121094, -178.47531127929688, 333.3249816894531, 156.2998809814453, 498.65570068359375, 207.843994140625, -1.476144790649414, -621.39306640625, 5.2016754150390625, 290.14825439453125, 56.76068878173828, 586.6756591796875, 389.7723083496094, 909.1142578125, 487.17694091796875, 438.56597900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000429.npy"} +{"epoch": 0.6299559471365639, "step": 430, "batch_size": 64, "mean": 200.9507293701172, "std": 279.0693664550781, "min": -364.5477294921875, "p10": -93.379630279541, "median": 154.52639770507812, "p90": 616.9558837890628, "max": 1026.72705078125, "pos_frac": 0.765625, "sample": [232.25811767578125, 210.1974334716797, -32.512229919433594, -79.33395385742188, 712.398193359375, 415.2826232910156, 19.404815673828125, 253.50970458984375, 158.60733032226562, 517.1475219726562, 61.04121780395508, -32.76799774169922, 87.35262298583984, -98.9593276977539, 120.37083435058594, 711.7987670898438, -276.7811584472656, 401.93646240234375, 77.52079772949219, 84.17007446289062, 191.2287139892578, 99.25711822509766, 145.3650665283203, 150.44546508789062, -21.202966690063477, 85.83158874511719, 383.53302001953125, 334.7643737792969, -80.36033630371094, 195.87562561035156, 84.78282165527344, -150.6365203857422, 1026.72705078125, -15.210220336914062, 108.3761978149414, 549.0935668945312, 42.39147186279297, -364.5477294921875, 23.843931198120117, 228.8636016845703, 454.6043701171875, 750.0943603515625, 441.9651184082031, 133.0791015625, 268.21563720703125, 700.298583984375, -188.4542999267578, -215.87457275390625, 495.2910461425781, 162.02195739746094, 78.31340026855469, -56.16953659057617, 436.4301452636719, 227.44065856933594, -4.25897216796875, 277.28106689453125, 348.173828125, -226.38986206054688, 338.3731689453125, 791.4745483398438, 646.0397338867188, 224.94473266601562, 29.23011016845703, 187.65904235839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000430.npy"} +{"epoch": 0.631424375917768, "step": 431, "batch_size": 64, "mean": 296.3401794433594, "std": 311.6828918457031, "min": -431.9054870605469, "p10": -105.97272109985347, "median": 258.3608093261719, "p90": 654.9005432128907, "max": 995.7406005859375, "pos_frac": 0.84375, "sample": [-231.79771423339844, 175.2014923095703, -144.46775817871094, 476.0992126464844, 238.73367309570312, 99.14701080322266, 785.0052490234375, 154.62469482421875, 345.9182434082031, 522.3546142578125, 256.8866882324219, 449.7737121582031, 103.2664794921875, -54.24300765991211, -135.31692504882812, 154.30601501464844, 32.490020751953125, -404.95306396484375, 539.4158935546875, -237.7196502685547, 620.3228759765625, 805.1655883789062, 995.7406005859375, 858.138671875, 150.68040466308594, 624.7637939453125, 408.8958740234375, 439.035888671875, 250.34446716308594, 287.71148681640625, 664.490966796875, 360.88934326171875, 242.702392578125, 83.67604064941406, 441.38330078125, -431.9054870605469, 976.3909912109375, 7.3450164794921875, 259.8349304199219, -65.72258758544922, 486.0613708496094, 228.52027893066406, 871.7593994140625, -3.5916481018066406, 169.4008331298828, 184.00216674804688, 424.09722900390625, 31.010324478149414, 271.7071838378906, 330.556884765625, 399.4628601074219, 626.4011840820312, 632.5228881835938, 351.32257080078125, 526.993408203125, 429.7541809082031, 172.090576171875, 118.07575225830078, -123.2227783203125, 236.14939880371094, 595.9600219726562, 179.0113525390625, 174.4451141357422, 548.671142578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000431.npy"} +{"epoch": 0.6328928046989721, "step": 432, "batch_size": 64, "mean": 277.9825134277344, "std": 291.83416748046875, "min": -301.68914794921875, "p10": -9.546617507934561, "median": 232.50311279296875, "p90": 687.4619567871093, "max": 1293.360107421875, "pos_frac": 0.875, "sample": [35.89079284667969, 167.42324829101562, 199.7931365966797, 244.27923583984375, 193.10035705566406, 205.19505310058594, 143.06756591796875, 10.307422637939453, 170.645263671875, 3.5677032470703125, 33.338077545166016, 22.483287811279297, 653.301513671875, 420.3561096191406, -13.153766632080078, 1293.360107421875, 581.2919921875, 279.7154846191406, 4.9843902587890625, 220.72698974609375, 175.92758178710938, 68.56613159179688, 273.3524169921875, 45.574188232421875, 497.67919921875, 295.1812744140625, 685.2906494140625, 173.69834899902344, 435.244384765625, 50.40655517578125, 254.26495361328125, -19.561317443847656, -180.53929138183594, 247.2772979736328, -159.38616943359375, 613.9800415039062, 156.6365203857422, -151.5294952392578, 354.8279724121094, 140.2055206298828, 188.73892211914062, -131.8951416015625, 312.34027099609375, 454.51434326171875, 269.1627197265625, 387.280517578125, 612.0333862304688, 432.2349853515625, 406.7939453125, 349.4140625, 152.35292053222656, 535.6626586914062, 18.487686157226562, 822.9622802734375, 268.3855895996094, 119.57302856445312, 751.6953735351562, 342.1283264160156, 688.3925170898438, 834.0179443359375, -301.68914794921875, 755.4327392578125, -1.1299362182617188, 697.220703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000432.npy"} +{"epoch": 0.6343612334801763, "step": 433, "batch_size": 64, "mean": 185.67514038085938, "std": 313.65447998046875, "min": -781.3613891601562, "p10": -189.79500122070309, "median": 196.85953521728516, "p90": 559.5354431152344, "max": 701.102294921875, "pos_frac": 0.765625, "sample": [31.533050537109375, 531.0285034179688, 502.7533264160156, 475.5074462890625, 418.8401794433594, -118.07252502441406, 289.2899169921875, -472.489013671875, 499.2437438964844, 570.60107421875, -781.3613891601562, 38.91923522949219, 562.4290771484375, 321.8430480957031, 89.55340576171875, 12.723766326904297, 189.92649841308594, 307.9010009765625, 615.7094116210938, -735.6954345703125, -18.5462646484375, 552.7836303710938, 562.9437866210938, 295.19580078125, 219.27810668945312, 189.60513305664062, 47.51277160644531, 203.79257202148438, -41.66032409667969, 392.1481628417969, 28.226531982421875, 701.102294921875, -298.6877136230469, 432.74505615234375, 156.3175506591797, 431.08660888671875, 500.27459716796875, -229.98001098632812, 344.230224609375, 8.667793273925781, 678.1493530273438, 491.0878601074219, 268.8655090332031, -129.16717529296875, 188.8275146484375, 57.354591369628906, 350.4879455566406, 14.147804260253906, -204.94874572753906, -266.78533935546875, 592.0054321289062, 421.4702453613281, 172.5316162109375, -33.96128463745117, 118.60018920898438, -154.43626403808594, 9.034400939941406, 363.0152282714844, -76.88200378417969, -0.13443756103515625, 307.51220703125, 414.09088134765625, 185.60389709472656, 289.5189208984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000433.npy"} +{"epoch": 0.6358296622613803, "step": 434, "batch_size": 64, "mean": 224.49386596679688, "std": 348.83428955078125, "min": -282.6473388671875, "p10": -95.26909027099607, "median": 165.888427734375, "p90": 650.6037536621096, "max": 1978.857177734375, "pos_frac": 0.71875, "sample": [459.2978210449219, 201.2190399169922, 1978.857177734375, 86.85194396972656, -117.21156311035156, 43.327423095703125, 192.17404174804688, 439.4589538574219, -5.451833724975586, -58.55821228027344, 236.8166961669922, -79.15348815917969, 86.3728256225586, 671.4127197265625, -37.5266227722168, 115.8128662109375, -115.7853012084961, -282.6473388671875, 488.5182189941406, 602.0494995117188, 812.7061767578125, -162.21640014648438, 127.94790649414062, 231.22357177734375, 293.6058349609375, 191.29710388183594, -11.339040756225586, -40.1368408203125, 227.0673370361328, 236.83944702148438, 82.7540283203125, 210.46633911132812, 407.1806945800781, -101.39128112792969, 35.54029846191406, 423.5848693847656, 117.20852661132812, 101.56179809570312, 196.61553955078125, 165.04116821289062, -53.49505615234375, 386.2742614746094, 596.484375, 75.06352233886719, 166.73568725585938, 260.47650146484375, 422.2157897949219, 131.1014862060547, 184.03729248046875, 806.2904663085938, 290.04949951171875, -42.14849853515625, 701.0137939453125, 7.423572540283203, 137.4336395263672, -20.612762451171875, 849.2034912109375, 189.3912353515625, -80.98397827148438, -184.850341796875, 847.2916259765625, 472.7655029296875, -5.0756683349609375, -219.86961364746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000434.npy"} +{"epoch": 0.6372980910425844, "step": 435, "batch_size": 64, "mean": 244.86376953125, "std": 302.4591064453125, "min": -548.207763671875, "p10": -40.08277587890624, "median": 227.8274688720703, "p90": 624.2052856445314, "max": 992.9705810546875, "pos_frac": 0.828125, "sample": [951.7783203125, 399.379638671875, -548.207763671875, 361.55865478515625, 515.0526733398438, 237.39517211914062, 312.9373779296875, 512.9464721679688, 340.8703308105469, 100.41645812988281, 523.5924682617188, 179.02366638183594, 85.6324462890625, 264.3952331542969, -44.33290100097656, 590.913330078125, 992.9705810546875, 849.0405883789062, 260.806640625, -131.48826599121094, -11.371490478515625, 5.30316162109375, -30.165817260742188, 37.49125671386719, -114.29196166992188, 100.35360717773438, 248.27992248535156, 226.55670166015625, -120.18250274658203, 213.35699462890625, 255.21038818359375, -15.84321403503418, 157.46405029296875, 334.9220275878906, 451.42535400390625, 328.2316589355469, 440.9908447265625, -464.528076171875, 483.5765686035156, -355.6251220703125, 217.17828369140625, 229.09823608398438, 638.4732666015625, 1.49285888671875, 104.0873031616211, 50.90114212036133, 528.9202880859375, 557.3208618164062, 18.30405044555664, -16.445497512817383, 823.57421875, 44.30171203613281, 237.14205932617188, 49.9337158203125, 178.61672973632812, 709.2234497070312, 649.4585571289062, 270.5333557128906, 213.59495544433594, 473.021728515625, 153.02609252929688, 3.3448333740234375, 397.903564453125, 212.44161987304688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000435.npy"} +{"epoch": 0.6387665198237885, "step": 436, "batch_size": 64, "mean": 213.5735321044922, "std": 277.6088562011719, "min": -596.918212890625, "p10": -139.2433006286621, "median": 184.8743133544922, "p90": 524.2713806152344, "max": 887.57666015625, "pos_frac": 0.828125, "sample": [-85.63920593261719, 169.09056091308594, -167.5062713623047, 60.260643005371094, 517.9721069335938, 260.98284912109375, 326.8407287597656, 777.6779174804688, 516.438720703125, -163.28726196289062, 151.93016052246094, 804.859619140625, -11.354888916015625, -154.7533721923828, 267.7130126953125, 241.25439453125, -147.12339782714844, 44.332489013671875, 872.3900146484375, -220.17977905273438, 419.177490234375, 365.3528747558594, 267.12030029296875, 380.18194580078125, 526.9710693359375, 113.54107666015625, 425.432861328125, 570.9832153320312, 102.68757629394531, 53.79255676269531, 887.57666015625, 45.99237823486328, 478.2041320800781, 160.5799102783203, 276.4778137207031, 186.30191040039062, 59.46153259277344, 48.190773010253906, 249.36376953125, 130.21435546875, 425.4653015136719, 63.290103912353516, 181.482421875, 120.12317657470703, 138.6888885498047, 296.1444396972656, 234.0612335205078, -172.3565673828125, 183.44671630859375, 194.02374267578125, 280.54132080078125, 212.41555786132812, 223.80575561523438, 458.3995361328125, 827.6874389648438, 32.75053405761719, 84.01593780517578, 28.320663452148438, -596.918212890625, 224.3633575439453, -120.85640716552734, 109.71969604492188, 448.4774169921875, -17.889190673828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000436.npy"} +{"epoch": 0.6402349486049926, "step": 437, "batch_size": 64, "mean": 220.84902954101562, "std": 268.0671691894531, "min": -455.6229248046875, "p10": -83.92538604736328, "median": 217.6727523803711, "p90": 535.9862426757813, "max": 968.8397827148438, "pos_frac": 0.78125, "sample": [116.50404357910156, -254.15414428710938, 211.9887237548828, 462.4299011230469, 386.9308776855469, 18.477407455444336, 603.6633911132812, 181.67034912109375, 343.26397705078125, -30.441253662109375, -455.6229248046875, 214.67498779296875, -276.6971130371094, 447.2334899902344, -46.103363037109375, 203.89309692382812, 166.01797485351562, -240.29403686523438, 167.04681396484375, 541.70458984375, 44.35540008544922, 405.47564697265625, 220.67051696777344, 1.2797908782958984, -133.4515380859375, 614.876708984375, 178.65744018554688, 112.6337661743164, 386.80517578125, 366.0675048828125, 769.552490234375, 343.65643310546875, 711.460205078125, -87.15592956542969, 968.8397827148438, -76.387451171875, -37.408531188964844, 17.475584030151367, 265.1935119628906, 243.59829711914062, 17.387718200683594, 27.150543212890625, 276.949462890625, 321.635009765625, 386.69427490234375, 162.80743408203125, 460.6252746582031, 438.5615234375, 437.97650146484375, 522.6434326171875, 178.3894500732422, 441.9559631347656, 401.20318603515625, -48.822265625, -101.2232894897461, 565.6619262695312, 298.6283874511719, 390.3963623046875, -69.18913269042969, 277.42333984375, 38.024539947509766, 416.7113952636719, 227.27066040039062, -12.905693054199219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000437.npy"} +{"epoch": 0.6417033773861968, "step": 438, "batch_size": 64, "mean": 254.470458984375, "std": 280.3930358886719, "min": -390.41815185546875, "p10": -64.91844482421874, "median": 241.19740295410156, "p90": 643.617724609375, "max": 941.7203369140625, "pos_frac": 0.8125, "sample": [90.872314453125, 29.42533302307129, 393.3481140136719, -97.10731506347656, 157.21133422851562, 121.46846008300781, 801.663818359375, -44.49726867675781, 155.76547241210938, -50.30908203125, 283.29437255859375, 390.5615234375, 3.2240867614746094, 236.52761840820312, 382.61651611328125, 445.50604248046875, 302.89715576171875, 941.7203369140625, 119.89497375488281, 325.4915771484375, 155.84774780273438, 579.8436889648438, 389.92071533203125, 628.8485107421875, 165.61532592773438, 191.0257568359375, 43.84980010986328, 276.8838806152344, -330.1214294433594, -44.1634521484375, 9.965696334838867, -3.451292037963867, 444.3481140136719, 676.4754028320312, 581.0574951171875, 338.4203186035156, 258.88934326171875, -303.2053527832031, 551.178955078125, 299.3300476074219, 701.632568359375, 191.17269897460938, -89.83790588378906, 716.5970458984375, -55.02154541015625, 245.8671875, 526.59033203125, 386.055419921875, 472.57635498046875, 134.84100341796875, 85.96575927734375, 329.9692687988281, 705.74609375, 93.98826599121094, -69.15997314453125, 121.29806518554688, 193.85302734375, -390.41815185546875, -78.81452178955078, 600.1888427734375, 123.42533874511719, 649.9473876953125, 331.4766845703125, 458.0364074707031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000438.npy"} +{"epoch": 0.6431718061674009, "step": 439, "batch_size": 64, "mean": 209.87954711914062, "std": 323.9346923828125, "min": -518.1369018554688, "p10": -84.1396415710449, "median": 157.5280532836914, "p90": 699.7507263183594, "max": 1040.1805419921875, "pos_frac": 0.75, "sample": [257.4119567871094, 743.2926025390625, 102.50942993164062, 311.8546142578125, 90.042236328125, -65.57339477539062, 700.4603881835938, 50.62965393066406, 169.45309448242188, 47.30352020263672, -365.5030517578125, 701.446044921875, 87.60649108886719, -41.62706756591797, -6.302696228027344, 291.4482116699219, 320.9125061035156, 524.712158203125, 654.4495849609375, 195.98007202148438, 87.95521545410156, 908.4019775390625, 233.36270141601562, -50.249237060546875, 182.33709716796875, -207.3492431640625, 145.60301208496094, -14.559406280517578, -3.4623489379882812, 43.46758270263672, 313.7989196777344, -206.10226440429688, 248.30259704589844, 430.6458435058594, 1040.1805419921875, 349.56268310546875, 566.4623413085938, 236.23370361328125, 100.88313293457031, 285.77880859375, 134.7952880859375, -88.66915130615234, -394.75653076171875, -46.870697021484375, 40.750099182128906, 215.19622802734375, 136.23617553710938, 260.98040771484375, -371.8164978027344, 584.00146484375, 698.0948486328125, 33.807247161865234, 254.14707946777344, 738.3046875, 81.94953155517578, -71.56004333496094, 479.15582275390625, 384.9121398925781, 106.1964111328125, 87.52684020996094, 991.3967895507812, 308.46038818359375, -518.1369018554688, -73.57078552246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000439.npy"} +{"epoch": 0.644640234948605, "step": 440, "batch_size": 64, "mean": 237.09425354003906, "std": 338.2253112792969, "min": -269.9552307128906, "p10": -111.87857055664062, "median": 141.69517517089844, "p90": 712.3460571289063, "max": 1421.0328369140625, "pos_frac": 0.78125, "sample": [310.39459228515625, 346.2265625, 362.6537170410156, 329.979248046875, -118.62042236328125, 44.91362762451172, 622.2859497070312, 117.86322784423828, 58.74019241333008, 821.0775756835938, 38.648521423339844, 108.91085815429688, 264.0843505859375, 122.15371704101562, 746.1121826171875, -197.011474609375, 131.22439575195312, -188.8848419189453, 28.365447998046875, 6.532676696777344, 178.65451049804688, 590.5460815429688, 103.8123779296875, 44.03260040283203, 385.58880615234375, 233.2590789794922, -113.22075653076172, 267.4451904296875, 402.87481689453125, 97.40786743164062, 121.66714477539062, 696.8355712890625, -100.01699829101562, 266.98455810546875, 18.051454544067383, -57.74969482421875, 1189.8101806640625, -269.9552307128906, 207.84637451171875, -7.423883438110352, 839.2601318359375, 267.3398742675781, 938.696533203125, 495.56591796875, 215.62612915039062, 65.9223403930664, -131.91574096679688, 152.18760681152344, 420.1643371582031, 508.25128173828125, 152.16595458984375, 718.993408203125, -28.422706604003906, 113.7334976196289, 13.149654388427734, 532.1976318359375, 40.34714126586914, -40.55804443359375, -108.7468032836914, 166.91217041015625, 1421.0328369140625, 501.2826843261719, -265.0733642578125, -26.18096923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000440.npy"} +{"epoch": 0.6461086637298091, "step": 441, "batch_size": 64, "mean": 242.307861328125, "std": 422.6373291015625, "min": -758.6912231445312, "p10": -224.913833618164, "median": 218.89728546142578, "p90": 728.9767150878907, "max": 1949.5408935546875, "pos_frac": 0.703125, "sample": [261.6409912109375, 163.05677795410156, 222.4073486328125, 527.698486328125, 432.7228698730469, 780.564208984375, -61.33282470703125, 29.131820678710938, 145.86239624023438, -385.1640319824219, -144.0116424560547, -47.61113739013672, 693.4464111328125, 966.3817138671875, 330.0970458984375, -356.9550476074219, 136.8452911376953, 750.29150390625, 112.67236328125, -18.709136962890625, 41.45164489746094, 106.52293395996094, -9.868011474609375, 741.0672607421875, 755.0481567382812, 120.77310180664062, 243.91929626464844, 397.6943664550781, 377.23663330078125, 1949.5408935546875, 58.84009552001953, -489.9412536621094, 298.030029296875, 592.77099609375, -478.29693603515625, -164.0416259765625, 120.5118179321289, 282.6739501953125, 627.72216796875, -385.3050537109375, -80.49166870117188, 499.874267578125, -68.92631530761719, -758.6912231445312, 443.01873779296875, 462.75421142578125, 363.5408020019531, 446.2137756347656, 359.3819274902344, -55.92678451538086, 215.38722229003906, -251.00192260742188, -39.40716552734375, 406.251953125, 338.64813232421875, 101.02166748046875, 700.7654418945312, 382.50091552734375, 519.1722412109375, 1072.838623046875, -24.266239166259766, -62.498252868652344, 142.3424072265625, 669.8145751953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000441.npy"} +{"epoch": 0.6475770925110133, "step": 442, "batch_size": 64, "mean": 288.12567138671875, "std": 379.1205749511719, "min": -467.2045593261719, "p10": -68.66317634582518, "median": 178.54246520996094, "p90": 771.5019775390625, "max": 1239.7232666015625, "pos_frac": 0.796875, "sample": [-77.25972747802734, 172.7056121826172, 528.7990112304688, 45.18359375, 301.36688232421875, 149.68392944335938, -23.25298309326172, 223.6863555908203, 64.03373718261719, 60.106197357177734, 5.545787811279297, 413.66937255859375, -8.504371643066406, 430.351806640625, 384.6461181640625, 643.2684326171875, 902.2628784179688, 425.247314453125, -13.7484130859375, 184.3793182373047, 170.16656494140625, 511.09381103515625, 48.20362854003906, 299.8768005371094, -206.4717254638672, 239.1339111328125, 759.1015014648438, 320.48321533203125, 954.0906982421875, 197.1710205078125, 109.42666625976562, -467.2045593261719, 141.45217895507812, -86.01322937011719, -4.435127258300781, 591.0211181640625, -48.604557037353516, 486.8692321777344, 102.24425506591797, 1215.86328125, -227.8431396484375, 324.4539794921875, 776.8164672851562, 41.39617156982422, 54.50469970703125, 1239.7232666015625, 754.73779296875, 270.5860900878906, 628.7326049804688, 108.61495208740234, 63.2620849609375, -352.6156311035156, 30.027191162109375, 76.27552795410156, -25.527297973632812, -221.26829528808594, 342.1467590332031, 480.462646484375, 1203.456298828125, 686.4143676757812, 1115.3587646484375, 751.0132446289062, 124.24507141113281, 49.42994689941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000442.npy"} +{"epoch": 0.6490455212922174, "step": 443, "batch_size": 64, "mean": 181.2572021484375, "std": 407.65924072265625, "min": -828.9405517578125, "p10": -247.88780517578124, "median": 179.47354125976562, "p90": 629.2391906738285, "max": 1395.349365234375, "pos_frac": 0.765625, "sample": [1229.2459716796875, 77.80950164794922, 886.222900390625, 19.854019165039062, 280.9000549316406, 120.44512939453125, -63.699867248535156, -104.83613586425781, -408.67828369140625, 108.85201263427734, 55.11149597167969, 28.119003295898438, 176.6466522216797, 222.20452880859375, 64.18545532226562, -783.1419067382812, 127.05030822753906, -216.24537658691406, -41.488731384277344, 13.553586959838867, 1173.0128173828125, 369.1768798828125, 547.1105346679688, 304.9827575683594, -594.1082153320312, -630.7301635742188, 195.74826049804688, 234.93251037597656, 661.7214965820312, -828.9405517578125, 389.26068115234375, 376.7913818359375, 294.7744140625, 135.24024963378906, 119.60157775878906, -127.04895782470703, 143.38092041015625, 429.9681396484375, 455.6659240722656, 184.6569061279297, -248.76683044433594, -59.60581970214844, 426.7402038574219, -270.1935119628906, 390.16119384765625, 126.79508972167969, 308.2156982421875, 1395.349365234375, 300.9069519042969, 132.571533203125, 553.4471435546875, 104.92742919921875, 196.3126983642578, 182.30043029785156, -180.2158966064453, 283.9382019042969, -245.8367462158203, 246.4922637939453, 166.6702880859375, 218.57965087890625, 809.0203857421875, 752.0997314453125, 194.3543701171875, 188.88870239257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000443.npy"} +{"epoch": 0.6505139500734214, "step": 444, "batch_size": 64, "mean": 291.16546630859375, "std": 382.1694030761719, "min": -600.3107299804688, "p10": -109.14409484863279, "median": 243.67723846435547, "p90": 756.6373046875004, "max": 1450.9932861328125, "pos_frac": 0.78125, "sample": [345.627197265625, -329.63336181640625, 83.06415557861328, 595.219482421875, 32.04308319091797, 326.4784851074219, -177.66114807128906, 1450.9932861328125, 839.6063232421875, -47.99495315551758, -91.20956420898438, 220.14102172851562, 185.58642578125, 561.1853637695312, 618.9464111328125, 154.31121826171875, 798.6385498046875, 458.85791015625, -154.81967163085938, 124.39979553222656, 78.60279083251953, -16.324411392211914, 401.0252380371094, 845.5009765625, 87.28797149658203, 370.13067626953125, 329.4105224609375, -35.111083984375, 243.73350524902344, -401.73577880859375, -18.41106605529785, 1401.490966796875, 285.8908996582031, 269.13616943359375, 308.0478515625, 462.60491943359375, 1043.859130859375, 429.1774597167969, 526.4725952148438, 226.34512329101562, -116.830322265625, 1000.4182739257812, 624.320068359375, 69.63423919677734, 555.4668579101562, 560.3212280273438, 616.8084716796875, -0.9400844573974609, 182.32679748535156, 148.88587951660156, 248.9389190673828, 199.78977966308594, 263.40972900390625, 213.77401733398438, 481.3269958496094, 243.6209716796875, 17.008943557739258, -39.93745422363281, -288.1499938964844, 67.02658081054688, 658.6343994140625, -600.3107299804688, 205.32644653320312, 492.80609130859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000444.npy"} +{"epoch": 0.6519823788546255, "step": 445, "batch_size": 64, "mean": 275.9429931640625, "std": 352.4450988769531, "min": -421.99224853515625, "p10": -113.23955383300779, "median": 213.82263946533203, "p90": 687.4292785644532, "max": 1289.794189453125, "pos_frac": 0.796875, "sample": [232.69161987304688, 57.21794891357422, 3.4404220581054688, 876.9952392578125, 17.913528442382812, 555.2823486328125, 488.63751220703125, 125.82894134521484, 41.126922607421875, 177.98838806152344, 118.6009521484375, 599.202880859375, 21.664077758789062, 419.1983947753906, 209.08047485351562, -421.99224853515625, 81.98812866210938, 466.2181091308594, 416.7038269042969, 218.56480407714844, 412.1383056640625, 377.13116455078125, 523.0631103515625, 681.1966552734375, 54.5457649230957, 133.96441650390625, 1289.794189453125, 472.2721862792969, 534.0497436523438, 638.1159057617188, 702.0958251953125, 673.7062377929688, 174.29884338378906, 692.3230590820312, 14.650009155273438, -192.1153564453125, 205.64498901367188, 314.2275695800781, -123.37486267089844, 549.8582153320312, 30.643089294433594, 1004.1171875, 498.5564880371094, -273.9095458984375, 690.1004028320312, 315.77606201171875, 139.13702392578125, -234.9840087890625, -5.225639343261719, -64.49739074707031, 273.0466003417969, -47.81909942626953, -88.81370544433594, 415.71099853515625, 119.9598388671875, 373.0858154296875, -31.324310302734375, -234.97787475585938, 1214.2554931640625, -89.59049987792969, 599.1041259765625, 174.5264892578125, 380.8966064453125, -331.3621520996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000445.npy"} +{"epoch": 0.6534508076358296, "step": 446, "batch_size": 64, "mean": 297.735595703125, "std": 318.5719299316406, "min": -617.8701782226562, "p10": -27.297586822509757, "median": 258.38414764404297, "p90": 756.0079772949218, "max": 1146.9598388671875, "pos_frac": 0.828125, "sample": [19.08460235595703, 397.46807861328125, 27.088638305664062, 760.08837890625, 856.3079223632812, 349.8673400878906, 16.314075469970703, -617.8701782226562, 223.17164611816406, 687.7974853515625, 843.4495239257812, 401.39105224609375, -3.7780990600585938, 718.7744140625, 486.5478820800781, 592.380126953125, 607.42431640625, 363.15216064453125, 172.83349609375, 391.41064453125, -51.01568603515625, 164.43048095703125, 136.2218017578125, 218.03746032714844, 236.54859924316406, -401.30914306640625, 129.2909393310547, 154.79556274414062, 508.275634765625, 282.2353515625, 468.0987243652344, -99.93595886230469, -17.634674072265625, 193.53688049316406, 36.862953186035156, 198.03927612304688, -12.415023803710938, 461.32354736328125, 295.25787353515625, 757.013427734375, 389.30816650390625, 182.3120574951172, 753.6619262695312, 1146.9598388671875, 445.6221923828125, -31.43883514404297, 855.149658203125, 461.9782409667969, 335.21453857421875, 225.24102783203125, 431.2973327636719, -1.0995025634765625, -89.6258544921875, 142.57186889648438, 113.69542694091797, 215.542724609375, 126.8541259765625, 407.332275390625, 757.97216796875, 211.87005615234375, -249.12689208984375, 660.677001953125, 332.3265075683594, 280.2196960449219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000446.npy"} +{"epoch": 0.6549192364170338, "step": 447, "batch_size": 64, "mean": 312.77081298828125, "std": 389.6471252441406, "min": -478.48187255859375, "p10": -117.48008422851562, "median": 280.9343566894531, "p90": 724.7666015625001, "max": 2055.42138671875, "pos_frac": 0.828125, "sample": [304.9451904296875, 94.62284088134766, 371.96337890625, 256.1649169921875, 28.233306884765625, -478.48187255859375, 462.1097106933594, 228.00076293945312, 327.4022216796875, -41.322479248046875, 29.33578872680664, 699.7735595703125, 265.5856628417969, 220.4945831298828, 770.0047607421875, 600.9479370117188, 789.4556274414062, 34.661773681640625, 2055.42138671875, 784.9448852539062, 278.45989990234375, 1140.65771484375, 462.859375, 422.54193115234375, -280.4287109375, 283.4088134765625, 558.1398315429688, -119.38677978515625, 318.9519348144531, 212.4800262451172, -166.76461791992188, -19.211715698242188, 0.6203384399414062, -122.86630249023438, 420.2314758300781, -113.0311279296875, 206.7392578125, 589.6704711914062, 95.9063720703125, 33.09436798095703, 569.5608520507812, 122.02327728271484, 139.68202209472656, -349.519287109375, 735.4779052734375, 508.42462158203125, 471.0036926269531, -223.8035888671875, 216.40975952148438, 28.14444923400879, 458.2203369140625, 433.7544860839844, 397.89208984375, 351.8436584472656, 148.9390869140625, 601.5785522460938, 372.0653076171875, 106.1604995727539, 1022.0177001953125, 638.689208984375, 94.99260711669922, 660.7198486328125, 537.6854858398438, -30.965606689453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000447.npy"} +{"epoch": 0.6563876651982379, "step": 448, "batch_size": 64, "mean": 295.4989929199219, "std": 342.8846435546875, "min": -377.94903564453125, "p10": -118.21312713623044, "median": 252.69781494140625, "p90": 686.3301269531252, "max": 1380.341064453125, "pos_frac": 0.84375, "sample": [40.63127899169922, 19.810543060302734, -319.9833068847656, 165.49713134765625, 607.63427734375, 30.1495361328125, 128.1474151611328, -52.014625549316406, 313.35028076171875, -377.94903564453125, 491.6910400390625, 84.74417877197266, 707.0372314453125, 92.06777954101562, 253.27593994140625, 567.3194580078125, 41.82286834716797, -20.46461296081543, 622.6646728515625, -150.37289428710938, -131.6215057373047, 107.12652587890625, 712.3829345703125, 462.5648193359375, 250.52688598632812, 338.0111999511719, 237.4969482421875, 146.29437255859375, 477.36346435546875, 885.115234375, 299.09417724609375, 594.8101196289062, 0.8700160980224609, 368.63079833984375, 1380.341064453125, 11.11298942565918, 85.00996398925781, 191.4657440185547, 92.1098403930664, 342.2417907714844, 613.433349609375, 615.282470703125, 280.59967041015625, -211.6065673828125, 1075.626953125, 362.80120849609375, 252.11968994140625, -86.92691040039062, 480.8432922363281, 121.41909790039062, 301.864013671875, -191.6577606201172, 960.761962890625, -169.423583984375, 632.3016967773438, 877.68359375, 55.12506103515625, 355.18621826171875, 121.8019790649414, 325.81121826171875, 560.787841796875, 638.0135498046875, 599.1480102539062, 244.93218994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000448.npy"} +{"epoch": 0.657856093979442, "step": 449, "batch_size": 64, "mean": 298.13922119140625, "std": 441.2258605957031, "min": -861.931884765625, "p10": -170.46488189697266, "median": 245.79324340820312, "p90": 804.9815734863282, "max": 2228.80517578125, "pos_frac": 0.78125, "sample": [146.2692108154297, 32.432044982910156, 323.843017578125, 31.393417358398438, 703.9310913085938, 696.4794921875, 253.4423828125, 90.82488250732422, 244.6217041015625, 246.96478271484375, 898.92626953125, -231.48487854003906, -55.23145294189453, 230.23770141601562, 659.8994140625, 26.380550384521484, 457.62127685546875, 136.57272338867188, 113.95256042480469, -295.24151611328125, 824.8173828125, 294.67108154296875, 546.6625366210938, 785.135009765625, 365.11749267578125, 711.9011840820312, -278.8285217285156, 92.60829162597656, 405.0994873046875, 430.5421142578125, 170.5636749267578, 220.32452392578125, 52.34564208984375, 629.453857421875, 574.1015014648438, 730.7128295898438, -121.55461883544922, 93.58967590332031, -166.521240234375, -172.15501403808594, 2228.80517578125, 584.1475830078125, -131.78749084472656, 309.84564208984375, 429.86309814453125, 162.71817016601562, -88.68478393554688, -861.931884765625, -48.865726470947266, 1106.2060546875, 283.0150451660156, 848.083251953125, 813.4872436523438, -225.8180694580078, 490.40313720703125, 150.46365356445312, 456.22857666015625, 1020.156982421875, 288.21600341796875, -55.09318542480469, -221.64540100097656, 53.745849609375, 155.3922576904297, 433.5375671386719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000449.npy"} +{"epoch": 0.6593245227606461, "step": 450, "batch_size": 64, "mean": 325.58074951171875, "std": 322.12158203125, "min": -703.2261352539062, "p10": 14.25559577941895, "median": 303.1432342529297, "p90": 751.0333007812502, "max": 1221.704345703125, "pos_frac": 0.90625, "sample": [381.9990234375, 663.5286254882812, 777.02978515625, 369.6996765136719, 73.142333984375, 685.6118774414062, 1221.704345703125, -71.26829528808594, 369.7063293457031, 12.377204895019531, 595.9802856445312, 581.0457153320312, 706.9932861328125, 376.7872619628906, 389.5497131347656, 780.080810546875, 79.7186279296875, 64.1314697265625, 223.30126953125, 88.54345703125, 397.37384033203125, 609.3881225585938, 530.0556640625, 769.9075927734375, 199.3820037841797, -33.22677230834961, 86.78117370605469, 902.7052001953125, 344.170166015625, 660.548828125, 76.24375915527344, 372.6268310546875, 250.2053680419922, 253.7841033935547, 302.2659606933594, 18.638507843017578, 212.58226013183594, 400.1438903808594, 239.4822998046875, 127.44792175292969, 874.5175170898438, -231.77137756347656, 125.08818817138672, 436.8830261230469, 148.86053466796875, 316.90423583984375, 56.34136199951172, 683.0179443359375, 229.24752807617188, 54.68784713745117, 501.56048583984375, -63.490020751953125, 587.2192993164062, 334.36126708984375, 77.0911636352539, -703.2261352539062, 347.08685302734375, 304.0205078125, 59.00049591064453, 226.6348419189453, 186.32452392578125, 1070.6400146484375, -36.69390106201172, 162.69287109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000450.npy"} +{"epoch": 0.6607929515418502, "step": 451, "batch_size": 64, "mean": 233.91940307617188, "std": 429.757080078125, "min": -1278.791748046875, "p10": -230.60373077392572, "median": 265.2984161376953, "p90": 701.6096923828126, "max": 1188.33447265625, "pos_frac": 0.78125, "sample": [65.74514770507812, -134.97332763671875, -3.615936279296875, 323.68743896484375, 532.1426391601562, 267.4071960449219, 374.116943359375, 681.7728271484375, -36.78179168701172, 263.18963623046875, 515.3369140625, 1188.33447265625, 508.29486083984375, 823.55810546875, 139.65338134765625, 219.53787231445312, 130.1534423828125, -1278.791748046875, -671.0040893554688, -271.44964599609375, 174.15594482421875, 65.3156509399414, 472.6271057128906, 584.892333984375, 740.18115234375, 279.1002197265625, 427.307861328125, 197.9944305419922, 366.4124450683594, 617.1717529296875, 179.003662109375, 1125.0506591796875, 924.8400268554688, 363.4613342285156, 499.8690490722656, 515.0665283203125, 460.5085754394531, -359.07086181640625, 269.0024108886719, 343.72027587890625, -45.929771423339844, -145.6978759765625, 425.1796569824219, -1032.435302734375, 176.0077667236328, 178.85702514648438, 184.8754425048828, 167.2042236328125, 281.3406982421875, 65.21671295166016, 668.80615234375, -110.10411071777344, -255.22195434570312, 780.2260131835938, 541.38720703125, 96.50625610351562, 398.7322998046875, -495.6972351074219, 199.6905517578125, 10.682266235351562, 710.1112060546875, -173.1612091064453, 309.0564880371094, 152.28353881835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000451.npy"} +{"epoch": 0.6622613803230544, "step": 452, "batch_size": 64, "mean": 281.9278564453125, "std": 296.2518310546875, "min": -373.6860656738281, "p10": -102.18248443603515, "median": 280.74534606933594, "p90": 687.0710876464844, "max": 922.7520141601562, "pos_frac": 0.84375, "sample": [880.71337890625, -259.6435852050781, 538.4014282226562, 671.68310546875, -103.0362777709961, 486.8250732421875, 442.3407287597656, 504.9695739746094, 566.337890625, -100.19029998779297, 511.61798095703125, 267.8137512207031, 142.3609161376953, 170.60333251953125, 385.963134765625, 299.6015930175781, 673.240234375, 293.67694091796875, 459.63079833984375, 38.23051452636719, 412.37982177734375, 8.802619934082031, 399.18328857421875, 78.01386260986328, 307.32403564453125, 692.9985961914062, 88.00527954101562, -269.4757995605469, 549.2059326171875, 201.06674194335938, -74.687255859375, 393.50408935546875, 113.76791381835938, 184.59866333007812, 697.2726440429688, 96.98855590820312, 264.8067932128906, 663.7733154296875, 112.16204071044922, 195.88140869140625, 314.29193115234375, -256.10198974609375, 234.99209594726562, 760.6439208984375, 217.49392700195312, 402.358154296875, 131.4632568359375, 776.0594482421875, 922.7520141601562, 83.11436462402344, 114.87840270996094, 392.88250732421875, 384.9749755859375, 10.899715423583984, -373.6860656738281, 323.141357421875, -141.24676513671875, 495.2157287597656, 461.71282958984375, 726.1737060546875, -109.07292938232422, 230.43618774414062, -59.00389099121094, 12.298151016235352], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000452.npy"} +{"epoch": 0.6637298091042585, "step": 453, "batch_size": 64, "mean": 266.5997314453125, "std": 360.1244812011719, "min": -357.9365234375, "p10": -144.18380279541014, "median": 196.67718505859375, "p90": 697.9859985351563, "max": 1648.50390625, "pos_frac": 0.75, "sample": [584.9031372070312, 586.5772094726562, -150.80279541015625, 115.72814178466797, 227.70028686523438, 23.666336059570312, 330.7523193359375, 165.28636169433594, 138.98570251464844, 8.6527099609375, 203.74932861328125, 549.8754272460938, 544.6217651367188, 648.8798217773438, 15.23282241821289, 112.89283752441406, 330.569580078125, 212.16204833984375, -121.27461242675781, 76.54750061035156, 590.4981079101562, -183.8529815673828, 226.15060424804688, 352.3062744140625, 669.0709228515625, -196.52301025390625, -128.73948669433594, 710.378173828125, -357.9365234375, 329.1837158203125, -51.59593200683594, 173.08743286132812, 555.1791381835938, 849.81396484375, 887.0062255859375, 16.236804962158203, -68.14518737792969, 142.73944091796875, 1648.50390625, 787.0469970703125, 433.0538024902344, -115.90511322021484, 854.8602905273438, -16.43195343017578, 269.09417724609375, 102.66169738769531, 494.25054931640625, 216.4041748046875, 383.73687744140625, 441.71600341796875, -196.2606964111328, 494.12127685546875, -152.21240234375, 588.33349609375, 47.405029296875, -66.14289093017578, -176.35948181152344, 189.60504150390625, 1019.595947265625, 441.705322265625, -23.435779571533203, -22.22180938720703, 139.94094848632812, 159.75515747070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000453.npy"} +{"epoch": 0.6651982378854625, "step": 454, "batch_size": 64, "mean": 301.10711669921875, "std": 382.0713195800781, "min": -683.4032592773438, "p10": -43.76784362792965, "median": 248.8004608154297, "p90": 738.7660034179688, "max": 1517.018310546875, "pos_frac": 0.875, "sample": [243.66085815429688, 10.6378173828125, 1020.8978881835938, 283.72906494140625, 513.546875, 51.26496887207031, 708.820068359375, 355.79937744140625, 3.0687599182128906, 225.6746063232422, -616.411376953125, 623.10107421875, 360.9559326171875, 319.3846435546875, 179.03927612304688, 253.9400634765625, 393.7707214355469, 411.77752685546875, 17.73767852783203, -683.4032592773438, 71.96397399902344, 572.4288940429688, -70.76083374023438, 790.1731567382812, -352.8545227050781, -59.860809326171875, 751.5999755859375, 70.53507232666016, 93.03321838378906, 243.2723388671875, 499.6870422363281, 139.0299530029297, 384.7255859375, 89.09297943115234, 433.4318542480469, 137.67855834960938, 217.74349975585938, 204.55239868164062, 282.9793701171875, 140.3090057373047, 429.529052734375, 461.1978454589844, 111.75166320800781, 383.5776672363281, -6.21759033203125, 685.56005859375, 1451.4677734375, 367.791015625, 37.14722442626953, 167.74688720703125, 1517.018310546875, 1032.3865966796875, 302.6607360839844, 508.00775146484375, 89.11894989013672, 447.1076354980469, 667.7526245117188, -86.78195190429688, 413.3758850097656, -116.30912780761719, 165.2631072998047, 68.35353088378906, 827.6546630859375, 29.940906524658203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000454.npy"} +{"epoch": 0.6666666666666666, "step": 455, "batch_size": 64, "mean": 344.6717224121094, "std": 409.9918518066406, "min": -677.155029296875, "p10": -53.10468177795406, "median": 240.952392578125, "p90": 1022.7577819824221, "max": 1350.63720703125, "pos_frac": 0.875, "sample": [127.83683776855469, 134.8743896484375, 776.7949829101562, 1041.311767578125, -485.2024230957031, 331.2119445800781, 663.8856811523438, -68.69879150390625, 1149.8643798828125, -213.26303100585938, 1060.757080078125, 239.61561584472656, 979.4651489257812, -248.42404174804688, 1142.3045654296875, -111.04930877685547, 235.2249755859375, 712.0243530273438, 210.04867553710938, 188.0291290283203, 62.089324951171875, 476.1331481933594, 243.0299530029297, 25.478403091430664, 415.0732421875, 1121.771484375, 685.1383666992188, -168.73602294921875, 245.51487731933594, 343.60986328125, 241.34320068359375, 577.9974365234375, 240.56158447265625, 200.47634887695312, 137.7725830078125, 179.01953125, 216.50369262695312, 273.6405944824219, 82.69229125976562, -677.155029296875, 382.7703857421875, 84.14727783203125, 256.351806640625, 585.5364990234375, 45.1483154296875, 84.9660415649414, 0.5251178741455078, 477.14996337890625, 842.5054931640625, 212.66751098632812, 680.3314208984375, 76.2491455078125, 418.350830078125, 444.5119934082031, 63.10816192626953, 114.68400573730469, -16.718425750732422, 846.939208984375, 462.300048828125, 440.6396789550781, 1177.048583984375, 208.50686645507812, 32.06635665893555, 1350.63720703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000455.npy"} +{"epoch": 0.6681350954478708, "step": 456, "batch_size": 64, "mean": 337.71661376953125, "std": 345.2431640625, "min": -182.8037109375, "p10": -8.60530185699462, "median": 260.7631530761719, "p90": 838.9914367675783, "max": 1412.659912109375, "pos_frac": 0.875, "sample": [164.24237060546875, 228.0537567138672, 765.0875854492188, 74.26651000976562, -25.4049072265625, 306.7081604003906, 96.92379760742188, 62.118255615234375, 251.9866943359375, 778.4912719726562, 360.953857421875, 26.259889602661133, 112.62532043457031, 1412.659912109375, 294.43359375, -105.11200714111328, 11.068008422851562, 315.5289611816406, 317.69500732421875, 428.0281066894531, 269.53961181640625, 63.444664001464844, -70.73928833007812, 1064.511474609375, -82.63299560546875, 404.12066650390625, 1323.198974609375, 372.8123779296875, 610.5302734375, 76.53903198242188, 311.5205078125, 513.1767578125, 576.6102905273438, 185.62045288085938, 176.32418823242188, 234.16026306152344, -0.10200309753417969, 459.556640625, 926.0693969726562, 94.19503784179688, 785.0233764648438, 424.19732666015625, 320.1085205078125, 71.23589324951172, -182.8037109375, 343.7315673828125, 121.49146270751953, -47.585113525390625, 862.12060546875, 118.94893646240234, 876.3235473632812, 1062.96337890625, 330.1416320800781, -12.24957275390625, 233.04861450195312, 100.07585144042969, 406.1325378417969, 116.4180908203125, 472.8011474609375, 110.13660430908203, 164.20639038085938, 571.1737060546875, 778.130615234375, 203.0207977294922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000456.npy"} +{"epoch": 0.6696035242290749, "step": 457, "batch_size": 64, "mean": 337.23883056640625, "std": 326.47406005859375, "min": -305.478515625, "p10": -40.06490287780761, "median": 364.5570373535156, "p90": 738.7369873046875, "max": 1125.901611328125, "pos_frac": 0.84375, "sample": [-41.78978729248047, 423.9132080078125, 436.62017822265625, 348.341064453125, 525.9949340820312, 733.6329345703125, 823.094970703125, 699.7778930664062, 63.663963317871094, 52.379180908203125, 1104.139404296875, 440.38861083984375, 56.78659439086914, 205.4412384033203, 106.14009094238281, 236.1082000732422, 513.4722290039062, 223.62448120117188, 120.29752349853516, 549.9127807617188, 740.9244384765625, -13.02950668334961, 169.0271759033203, 451.0578308105469, 477.77838134765625, 946.3378295898438, -302.7081298828125, -193.1418914794922, 617.8446655273438, 634.2575073242188, 56.27058410644531, 511.4823913574219, 491.6964416503906, 533.54345703125, 323.80029296875, -81.01112365722656, 648.486328125, 486.5537414550781, 109.05758666992188, 17.7574462890625, 343.31689453125, 860.4943237304688, 440.2421875, 119.79463195800781, 47.72067642211914, 150.14796447753906, 599.740234375, -23.97356414794922, 527.5955200195312, 380.77301025390625, -123.88109588623047, 646.2655029296875, 266.67877197265625, 803.138671875, -305.478515625, 386.115234375, 30.204269409179688, -148.93966674804688, 433.181640625, -36.0401725769043, 206.55523681640625, 525.9246215820312, 79.88115692138672, 1125.901611328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000457.npy"} +{"epoch": 0.671071953010279, "step": 458, "batch_size": 64, "mean": 306.4158935546875, "std": 368.9295654296875, "min": -439.05279541015625, "p10": -123.0059455871582, "median": 303.4040222167969, "p90": 626.4427612304689, "max": 1778.630615234375, "pos_frac": 0.796875, "sample": [-439.05279541015625, 641.5076904296875, 446.89569091796875, 267.1802673339844, 538.907470703125, -43.32987976074219, 1778.630615234375, -205.7944793701172, 301.7501220703125, 493.7596740722656, 224.01834106445312, 302.5474853515625, 524.6517944335938, 287.0892639160156, 719.11767578125, -117.35455322265625, -130.6161651611328, -15.44331169128418, 244.29971313476562, 568.6135864257812, 470.7998352050781, 469.7337951660156, 86.75627899169922, 515.0906372070312, 360.61688232421875, 357.50714111328125, 385.6229248046875, 49.6527099609375, 381.86297607421875, 451.6101989746094, 313.6121520996094, 739.5633544921875, 484.6319274902344, 815.755126953125, 135.63040161132812, -24.2459716796875, 343.47747802734375, 74.96723937988281, 471.7281494140625, 304.26055908203125, 698.7413330078125, 365.9841613769531, -110.9011459350586, -340.6380310058594, 299.73370361328125, -264.258056640625, 180.0205078125, -125.42797088623047, 198.70831298828125, 1592.0283203125, 499.416015625, 222.11038208007812, 295.0863952636719, 85.07337951660156, 202.20501708984375, 591.291259765625, 324.7650146484375, 431.3565673828125, 253.6185760498047, 313.8272399902344, -113.74781036376953, 431.1087951660156, -257.1632385253906, 261.66583251953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000458.npy"} +{"epoch": 0.6725403817914831, "step": 459, "batch_size": 64, "mean": 331.6836853027344, "std": 407.98236083984375, "min": -758.259765625, "p10": -147.27821502685543, "median": 306.6326141357422, "p90": 822.6238220214844, "max": 1475.5008544921875, "pos_frac": 0.828125, "sample": [70.71891784667969, 50.605133056640625, 435.0791015625, 165.97308349609375, 258.36041259765625, 385.00543212890625, -235.03543090820312, 156.8815155029297, 568.25390625, -299.4705810546875, 581.93994140625, 400.583740234375, 452.5840759277344, 527.6680908203125, 23.061460494995117, 460.96539306640625, -44.932830810546875, 607.3336181640625, 319.99810791015625, 889.8875732421875, 23.8104248046875, 867.1599731445312, 641.25390625, -110.97660827636719, 343.4040222167969, -238.4780731201172, 693.73291015625, -116.93901062011719, 120.51203918457031, 53.261878967285156, 245.5308380126953, -21.94994354248047, 829.73291015625, -758.259765625, 1475.5008544921875, 300.87469482421875, 1256.789794921875, 547.9690551757812, 227.24574279785156, -294.5423278808594, 195.29232788085938, 266.1169738769531, 481.23199462890625, 243.77630615234375, 67.274169921875, 278.70599365234375, -358.0869445800781, 179.40106201171875, 429.8945007324219, 635.0777587890625, 732.98193359375, 94.55717468261719, 379.2371826171875, 1229.8763427734375, 312.3905334472656, 90.67797088623047, 982.5320434570312, -160.28073120117188, 787.9330444335938, 772.994384765625, 806.0359497070312, 327.6905517578125, 23.990055084228516, 567.36181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000459.npy"} +{"epoch": 0.6740088105726872, "step": 460, "batch_size": 64, "mean": 357.0635070800781, "std": 388.44488525390625, "min": -301.51031494140625, "p10": -136.0722457885742, "median": 322.5360412597656, "p90": 887.1218811035156, "max": 1483.50927734375, "pos_frac": 0.8125, "sample": [766.6982421875, 657.16064453125, 508.6838073730469, -129.3579559326172, 106.48316955566406, 890.408935546875, 902.54736328125, 325.7181701660156, -13.352272033691406, 226.10475158691406, -138.94979858398438, 691.3106079101562, 125.80619812011719, 590.595458984375, 199.47715759277344, 270.76708984375, 290.0373229980469, -301.51031494140625, 174.6988067626953, 1338.849853515625, 1242.5220947265625, 210.6328125, 505.3775939941406, 29.026123046875, 952.484375, 306.9683532714844, 1483.50927734375, 407.4256591796875, -117.6096420288086, 26.10565185546875, 289.7858581542969, -161.28317260742188, 529.3480834960938, -188.3064727783203, 470.4658508300781, 569.0485229492188, -99.2113265991211, 436.72454833984375, 319.3539123535156, -0.5747299194335938, 868.44677734375, 632.8690185546875, 531.4008178710938, 345.7208557128906, 477.0113525390625, 127.66751098632812, 116.58921813964844, 102.71189880371094, 371.3390808105469, 253.3671417236328, -197.4661102294922, 76.58760833740234, 120.81719207763672, 403.7592468261719, -256.50079345703125, 572.0438232421875, 895.450439453125, 580.1681518554688, -213.16744995117188, 879.4520874023438, 549.2493286132812, 420.0487060546875, 64.91576385498047, 435.6119384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000460.npy"} +{"epoch": 0.6754772393538914, "step": 461, "batch_size": 64, "mean": 282.1562805175781, "std": 402.6361389160156, "min": -500.6500244140625, "p10": -167.44052124023432, "median": 210.50312042236328, "p90": 704.9762451171877, "max": 1599.95751953125, "pos_frac": 0.75, "sample": [114.39334106445312, -18.945877075195312, 653.252685546875, 343.7588195800781, -191.3558349609375, 534.3829345703125, -500.6500244140625, 1324.7166748046875, 85.07192993164062, 616.107666015625, 145.04598999023438, -48.56385040283203, 262.18499755859375, 636.2051391601562, 993.8883056640625, 458.9755859375, 587.0526733398438, -111.63812255859375, 208.9717559814453, 286.926513671875, 341.25384521484375, 648.3473510742188, 723.6695556640625, 1006.462158203125, -193.97816467285156, -47.78399658203125, 1155.914306640625, 1599.95751953125, 173.06076049804688, 381.197998046875, 437.4135437011719, 200.1927947998047, 237.780029296875, 29.38299560546875, 88.8602294921875, 275.29345703125, -271.6722412109375, -26.38787841796875, -100.99069213867188, 12.411041259765625, 821.2127075195312, -83.04100799560547, 657.0346069335938, 661.3585205078125, -236.6420135498047, 446.5386047363281, 205.9336700439453, 526.30322265625, 20.35211181640625, -95.18585205078125, 600.5068359375, 131.95614624023438, 102.85939025878906, 203.03533935546875, 517.8668212890625, -87.62818145751953, -266.8943176269531, 113.31582641601562, 293.47369384765625, 231.941162109375, 129.6741943359375, -400.03021240234375, 301.85955810546875, 212.03448486328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000461.npy"} +{"epoch": 0.6769456681350955, "step": 462, "batch_size": 64, "mean": 230.16461181640625, "std": 419.6398620605469, "min": -644.7933959960938, "p10": -259.81098022460935, "median": 185.25746154785156, "p90": 655.8601806640628, "max": 1392.3331298828125, "pos_frac": 0.703125, "sample": [400.7628479003906, 58.363380432128906, 9.922950744628906, -186.67494201660156, 449.5776672363281, 583.9876708984375, 29.928239822387695, 493.0987854003906, 574.9708862304688, 814.4902954101562, 534.6349487304688, -211.81637573242188, -413.54461669921875, -16.88113784790039, 163.09326171875, 473.61883544921875, 526.3123779296875, 146.56277465820312, -247.65228271484375, 684.2672729492188, -270.0320739746094, 81.90573120117188, 333.1772766113281, -644.7933959960938, -124.57081604003906, 49.829864501953125, 589.5769653320312, -475.4342041015625, 22.289840698242188, 6.564258575439453, 1306.8165283203125, 401.5647277832031, -265.0218505859375, 258.1617431640625, 578.49755859375, 1014.3136596679688, -227.01358032226562, -60.43262481689453, 1392.3331298828125, 251.6544647216797, 145.44593811035156, -146.15904235839844, -178.3875274658203, 204.51483154296875, -281.7807922363281, -78.73707580566406, 368.0049743652344, 491.70782470703125, -304.1813049316406, 166.00009155273438, -78.07687377929688, 423.0684814453125, 1318.254638671875, 505.05517578125, 395.18804931640625, -145.94134521484375, 304.5865478515625, 505.6812438964844, 795.814208984375, 552.8329467773438, 74.80421447753906, 152.97726440429688, 246.52394104003906, 206.92779541015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000462.npy"} +{"epoch": 0.6784140969162996, "step": 463, "batch_size": 64, "mean": 307.53863525390625, "std": 450.5167236328125, "min": -482.8091125488281, "p10": -226.43145599365232, "median": 270.4690856933594, "p90": 855.0407409667971, "max": 1515.783447265625, "pos_frac": 0.71875, "sample": [57.20598602294922, 570.483154296875, -17.906625747680664, 282.32183837890625, -140.72515869140625, 386.3114013671875, -165.80731201171875, -8.332527160644531, 461.0097351074219, 1.0801544189453125, 598.5361938476562, -237.83108520507812, 620.4573364257812, 661.8897705078125, 180.86483764648438, 384.74761962890625, 338.568115234375, 667.3374633789062, 790.2589111328125, -98.88243103027344, 21.865602493286133, 1515.783447265625, 395.2465515136719, 273.40716552734375, 1263.8868408203125, 665.7440185546875, 173.91677856445312, 292.2471008300781, -19.777069091796875, 187.50125122070312, -294.7873229980469, -21.957984924316406, -482.8091125488281, 77.10352325439453, 808.2019653320312, -420.53955078125, -73.05097961425781, -185.33441162109375, 405.1461486816406, -199.8323211669922, 589.74951171875, 321.47113037109375, 684.7131958007812, 254.72305297851562, 781.3396606445312, 1060.443359375, -22.425750732421875, 267.531005859375, 952.5778198242188, -339.39813232421875, 173.2831268310547, 1342.3310546875, 241.25209045410156, 875.114501953125, -338.6005859375, 14.580963134765625, 385.3993225097656, 259.8720703125, -468.33709716796875, 412.7036437988281, 471.2353820800781, 551.3339233398438, 1319.928955078125, 178.1019744873047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000463.npy"} +{"epoch": 0.6798825256975036, "step": 464, "batch_size": 64, "mean": 471.21575927734375, "std": 482.2331848144531, "min": -509.9890441894531, "p10": -47.25964775085446, "median": 444.02227783203125, "p90": 963.0181518554688, "max": 2129.5, "pos_frac": 0.875, "sample": [-252.3745880126953, 956.4931640625, 367.1675109863281, 226.22653198242188, 5.87652587890625, 991.2320556640625, 687.9369506835938, 138.4839630126953, 84.86642456054688, 479.69439697265625, 698.923583984375, 627.37548828125, -482.1011657714844, 798.219970703125, 302.6344909667969, 506.25726318359375, 705.330078125, 217.4087371826172, 370.92523193359375, 784.7013549804688, 793.3574829101562, 423.121826171875, 803.2158203125, -128.6420440673828, 1101.077880859375, 248.66409301757812, 592.7326049804688, 495.7630615234375, -509.9890441894531, 165.51898193359375, 844.6142578125, 210.265869140625, 494.43939208984375, 51.19961929321289, 363.9273376464844, 581.650634765625, 755.28564453125, 247.93531799316406, 464.9227294921875, 286.28125, 137.7470703125, -15.715633392333984, 893.3907470703125, 623.3681030273438, 91.99250793457031, 411.5663146972656, -123.29364776611328, 311.400146484375, 15.353965759277344, 644.0797119140625, 556.2887573242188, 1523.422119140625, 2129.5, 1249.810791015625, 422.33294677734375, 965.8145751953125, 836.464599609375, -60.77851104736328, 1767.8902587890625, 744.3651733398438, 582.373046875, -456.0389709472656, 310.94757080078125, 94.9059829711914], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000464.npy"} +{"epoch": 0.6813509544787077, "step": 465, "batch_size": 64, "mean": 377.061279296875, "std": 517.4326171875, "min": -1484.7552490234375, "p10": -179.09581604003904, "median": 426.2876739501953, "p90": 873.3167846679688, "max": 1833.9925537109375, "pos_frac": 0.859375, "sample": [736.7205810546875, 1486.706787109375, 549.5875244140625, 230.7356414794922, -247.99102783203125, 7.735992431640625, 632.3499755859375, 357.5987548828125, 69.4009780883789, 9.960235595703125, 797.244873046875, 271.8990478515625, 294.2487487792969, 56.57823944091797, 586.563232421875, 641.3198852539062, 478.1617126464844, 427.1206970214844, 201.7582550048828, 59.83495330810547, -784.2081909179688, 1017.5836181640625, 476.43841552734375, 215.438232421875, 1833.9925537109375, -191.36952209472656, 563.3563842773438, 143.423095703125, 336.32958984375, 525.35693359375, 686.9523315429688, 458.15704345703125, -161.30569458007812, 150.52978515625, 235.53530883789062, 562.206787109375, 8.524269104003906, 325.70843505859375, -369.6532287597656, 738.1041259765625, 912.0997924804688, 221.40225219726562, 545.7821655273438, -940.2340698242188, 760.982177734375, 699.0929565429688, 170.8566436767578, 826.5651245117188, 100.29368591308594, 875.2276000976562, 774.2418212890625, -186.72015380859375, 1105.4759521484375, -1484.7552490234375, 849.8228759765625, 868.8582153320312, -92.08250427246094, 425.45465087890625, 664.6044921875, 473.9266662597656, 58.22630310058594, 423.55889892578125, 887.240234375, 773.3663330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000465.npy"} +{"epoch": 0.6828193832599119, "step": 466, "batch_size": 64, "mean": 341.78094482421875, "std": 496.98699951171875, "min": -1121.94140625, "p10": -315.6764465332031, "median": 297.3949890136719, "p90": 978.0736694335938, "max": 1435.2650146484375, "pos_frac": 0.75, "sample": [587.8617553710938, -418.48760986328125, 201.10784912109375, -285.05413818359375, 822.9441528320312, 317.3304443359375, 613.6589965820312, 135.7225799560547, 742.88916015625, 103.28147888183594, 210.84100341796875, 1182.94580078125, -450.33514404296875, 866.9312744140625, -40.62301254272461, 172.21978759765625, 334.14508056640625, -10.378215789794922, 463.9591369628906, 217.57843017578125, 579.0321044921875, 203.71441650390625, 456.3294677734375, 406.90252685546875, 909.3926391601562, 169.89462280273438, -34.234764099121094, 833.778076171875, 277.45953369140625, 583.5787963867188, 411.1136474609375, 160.65966796875, 865.150390625, 1435.2650146484375, -1121.94140625, 964.1693725585938, 415.9023132324219, 733.907470703125, 1114.55908203125, 427.63262939453125, 984.0326538085938, 6.142704010009766, 165.57061767578125, 222.81930541992188, 500.392578125, 1220.17138671875, -114.76678466796875, -491.8033752441406, -33.076576232910156, 87.6370849609375, 1142.385498046875, 275.3236999511719, 860.1741333007812, -83.7767105102539, 931.2314453125, 426.8447265625, -561.1829833984375, -0.12823486328125, 1037.0389404296875, -328.80029296875, 348.6645202636719, -119.97039031982422, -381.0810241699219, 219.33425903320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000466.npy"} +{"epoch": 0.684287812041116, "step": 467, "batch_size": 64, "mean": 359.0609130859375, "std": 682.378173828125, "min": -1274.7847900390625, "p10": -468.9119598388672, "median": 360.13197326660156, "p90": 1218.6074829101565, "max": 1946.6741943359375, "pos_frac": 0.71875, "sample": [726.9331665039062, 1817.610107421875, 741.3187866210938, 1341.7122802734375, -562.453125, -1274.7847900390625, 648.4400634765625, 480.5943908691406, 480.3026123046875, 339.7197570800781, 246.14608764648438, 470.46295166015625, 1838.1407470703125, 164.56712341308594, 507.69647216796875, 475.93927001953125, 116.19562530517578, 1124.8013916015625, 1542.3408203125, 436.215576171875, 380.544189453125, -94.44490051269531, 80.58807373046875, 894.4222412109375, -374.576416015625, -429.5444030761719, -753.4788208007812, 500.7770690917969, 1243.690185546875, 403.53826904296875, 1103.2122802734375, 162.1938018798828, 472.127685546875, -447.8422546386719, 1041.4119873046875, -13.842376708984375, 1248.5914306640625, 195.79827880859375, -132.81005859375, 961.4218139648438, 257.6162109375, -1116.7137451171875, 219.537109375, -321.4642333984375, 83.52064514160156, 445.37744140625, 29.105712890625, -317.16412353515625, -493.315673828125, 728.6107788085938, 846.5956420898438, 812.7276611328125, 1160.0811767578125, 324.6983947753906, 934.50634765625, -57.82068634033203, 645.2979125976562, -33.035919189453125, -477.94183349609375, 1946.6741943359375, -1029.60302734375, -27.96707534790039, 88.37694549560547, 228.519775390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000467.npy"} +{"epoch": 0.6857562408223201, "step": 468, "batch_size": 64, "mean": 450.0086364746094, "std": 589.9862060546875, "min": -1007.7269287109375, "p10": -103.75178375244136, "median": 379.3933868408203, "p90": 1385.264721679688, "max": 2364.73291015625, "pos_frac": 0.859375, "sample": [487.3189392089844, 38.214622497558594, 1437.334716796875, 554.1893310546875, 1501.8809814453125, 984.9978637695312, 585.2847900390625, 641.5819091796875, 453.3036804199219, 519.1485595703125, 324.6730041503906, 659.8603515625, 142.66925048828125, 403.2370300292969, 743.86181640625, 96.65936279296875, 212.4217071533203, -180.17620849609375, 380.8321838378906, 255.35626220703125, 192.6102294921875, 13.237041473388672, 1.2679977416992188, 377.2906188964844, 395.7687072753906, 1485.8009033203125, 377.95458984375, 1558.620361328125, 1015.8695068359375, 95.96798706054688, -431.2123107910156, -12.813583374023438, 667.686767578125, 352.4720458984375, 445.0615539550781, 625.484375, 126.13288116455078, 113.38995361328125, 654.2996215820312, -1007.7269287109375, 1902.20849609375, 83.90785217285156, 443.8524169921875, 461.0275573730469, 368.7236328125, 130.30313110351562, -252.8262939453125, 466.8876647949219, 10.161361694335938, 1263.76806640625, 333.6167907714844, 2005.8262939453125, 210.98248291015625, 733.5145263671875, 3.0383377075195312, 452.5045471191406, 579.2252197265625, 2364.73291015625, 560.894287109375, -127.78216552734375, 274.04669189453125, -504.26593017578125, -47.68089294433594, -205.92596435546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000468.npy"} +{"epoch": 0.6872246696035242, "step": 469, "batch_size": 64, "mean": 450.6948547363281, "std": 512.0138549804688, "min": -662.6998901367188, "p10": -131.16901473999016, "median": 465.92274475097656, "p90": 1174.4215820312502, "max": 1899.1708984375, "pos_frac": 0.828125, "sample": [1049.121826171875, 588.49462890625, 93.15294647216797, 550.2499389648438, 1899.1708984375, 558.7845458984375, 1155.927978515625, 64.15447235107422, -11.525131225585938, 208.1312255859375, 551.474365234375, 114.24021911621094, -521.5919799804688, 75.92398071289062, 1153.25439453125, 16.39604949951172, 500.9178161621094, 612.2191162109375, 487.29925537109375, 520.4967041015625, 1205.3035888671875, 230.23660278320312, 165.3335418701172, 184.0535125732422, 1140.355712890625, 1.4036865234375, 874.2189331054688, 522.3076171875, 1182.347412109375, -279.960205078125, 702.0090942382812, -190.1627960205078, 845.4375, 273.4076232910156, 681.1294555664062, 338.874755859375, -48.785316467285156, -162.43569946289062, 9.561981201171875, -438.65484619140625, 560.6546020507812, 852.7488403320312, 1206.8531494140625, 26.730663299560547, -390.76617431640625, 183.52969360351562, 251.83724975585938, 1205.677001953125, 349.63330078125, 124.96289825439453, 1086.958740234375, 912.4531860351562, 444.5462341308594, 1186.8878173828125, 1236.16455078125, 677.6077880859375, 598.4154052734375, 436.6893615722656, -20.337539672851562, -58.213417053222656, 196.17135620117188, 744.1047973632812, 791.5859375, -662.6998901367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000469.npy"} +{"epoch": 0.6886930983847284, "step": 470, "batch_size": 64, "mean": 398.8236083984375, "std": 538.6472778320312, "min": -1263.424560546875, "p10": -284.8109710693359, "median": 433.1428985595703, "p90": 1019.1655944824219, "max": 1773.924560546875, "pos_frac": 0.765625, "sample": [453.7706298828125, 471.9771728515625, -92.01025390625, 98.64795684814453, 238.64895629882812, -119.55184173583984, 294.548095703125, 113.48897552490234, -62.517059326171875, -643.85302734375, 531.7461547851562, 776.8467407226562, 582.0706787109375, 365.1039733886719, 647.6290283203125, 1014.9122314453125, -303.60968017578125, 763.9796752929688, -1263.424560546875, 724.0870971679688, -127.43869018554688, 809.7339477539062, 179.09896850585938, -375.09649658203125, 266.69964599609375, 193.27993774414062, -293.9409484863281, 379.02734375, 458.5948486328125, 412.5151672363281, 695.1997680664062, -307.267822265625, 1439.5938720703125, 161.50804138183594, 78.40111541748047, 742.7283935546875, 512.2620849609375, 598.5633544921875, 1773.924560546875, 631.1441040039062, 1420.116455078125, 95.92086791992188, 768.1854248046875, 1088.4107666015625, -99.81478881835938, 1472.8792724609375, 520.20947265625, 574.8848876953125, 716.3212890625, 1008.31640625, 388.9713134765625, -263.5076904296875, -147.6883544921875, 257.5817565917969, 582.3162231445312, 763.4423828125, -313.2933349609375, 1279.3411865234375, 799.75341796875, -170.29969787597656, 1020.9884643554688, 133.24356079101562, 321.5269470214844, 485.8819885253906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000470.npy"} +{"epoch": 0.6901615271659325, "step": 471, "batch_size": 64, "mean": 323.36785888671875, "std": 532.7197875976562, "min": -840.7374267578125, "p10": -272.0084014892578, "median": 234.91592407226562, "p90": 876.0324584960939, "max": 1959.2545166015625, "pos_frac": 0.71875, "sample": [1233.6639404296875, -22.588134765625, -19.8868408203125, 713.76220703125, 379.489990234375, 736.6533203125, 609.7817993164062, 1959.2545166015625, 973.8985595703125, 298.4970703125, -2.8274078369140625, 297.4722900390625, -146.30982971191406, 41.196685791015625, 365.5574645996094, -52.606082916259766, -226.2681884765625, -236.16319274902344, 207.025146484375, -44.47885513305664, 1728.096923828125, -338.587646484375, 262.80670166015625, 386.9826965332031, 701.113037109375, -46.24610137939453, -352.4700622558594, -39.39337158203125, -441.7909240722656, 515.3870239257812, -263.6196594238281, 946.4036865234375, 749.488525390625, 894.7662353515625, 486.2480773925781, 724.3209228515625, 825.3074951171875, 142.57081604003906, 200.4625701904297, 574.3447265625, 171.01168823242188, 177.63662719726562, 64.85411834716797, 5.932933807373047, 778.2425537109375, 1847.3350830078125, 106.36859130859375, -840.7374267578125, 754.1112060546875, 32.760711669921875, -383.98504638671875, 89.44818878173828, 61.519630432128906, 351.99432373046875, 74.0860595703125, 620.1255493164062, 9.097574234008789, -275.60357666015625, 392.642822265625, 301.2445373535156, 712.3094482421875, 832.3203125, -377.3843078613281, 468.89495849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000471.npy"} +{"epoch": 0.6916299559471366, "step": 472, "batch_size": 64, "mean": 403.055419921875, "std": 653.3126831054688, "min": -1261.796875, "p10": -183.43177947998043, "median": 246.7999725341797, "p90": 1346.7478393554693, "max": 2382.176025390625, "pos_frac": 0.78125, "sample": [-1261.796875, -404.435302734375, 828.725341796875, -450.4009704589844, 206.4113006591797, 146.12759399414062, 509.16546630859375, -164.30682373046875, -24.987943649291992, -734.4321899414062, 254.64804077148438, 51.351749420166016, 138.2455291748047, 438.9685974121094, 51.472747802734375, 906.5357666015625, -32.03303527832031, 691.4992065429688, 341.8834228515625, 5.777099609375, 1222.052734375, 83.246826171875, -31.6129150390625, 16.796646118164062, 238.951904296875, 500.71820068359375, 330.8976135253906, -192.92538452148438, 272.1572265625, 915.9398803710938, 35.89495849609375, 995.1829833984375, 196.65933227539062, 966.5, 2256.53125, 1711.845947265625, 115.04520416259766, 1455.99853515625, 307.10638427734375, 619.019775390625, 359.71710205078125, 966.4915771484375, 100.67350769042969, 283.3662109375, -191.62818908691406, 1407.625244140625, 1759.101318359375, 137.5759735107422, 1400.1885986328125, 111.93312072753906, 2382.176025390625, -113.66368103027344, -154.18727111816406, -85.9997787475586, 1162.2955322265625, 730.5380859375, 728.8156127929688, 188.92135620117188, 361.57452392578125, -341.89215087890625, 273.2239074707031, 427.0951232910156, 231.04080200195312, 156.13780212402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000472.npy"} +{"epoch": 0.6930983847283406, "step": 473, "batch_size": 64, "mean": 184.0141143798828, "std": 789.5870971679688, "min": -1951.761474609375, "p10": -498.3581237792969, "median": 182.93364715576172, "p90": 1049.4492309570317, "max": 3533.013916015625, "pos_frac": 0.6875, "sample": [-178.87106323242188, 805.6636352539062, 268.8082275390625, -238.30885314941406, -499.2644958496094, 1253.35986328125, -90.49737548828125, 163.14508056640625, 155.78395080566406, 300.8338317871094, 1184.287109375, 652.1663208007812, -74.48652648925781, 160.92388916015625, -586.2328491210938, -425.5433349609375, 42.516204833984375, 56.75325012207031, 199.0685577392578, 1272.419189453125, -290.4566650390625, -87.83778381347656, 659.697265625, 292.0049743652344, 156.62539672851562, 137.088623046875, 332.60260009765625, 327.84722900390625, 204.13589477539062, 97.30014038085938, -1951.761474609375, 106.93729400634766, -496.2432556152344, 183.75636291503906, -1685.6671142578125, -1746.89013671875, 3533.013916015625, 309.11322021484375, 407.1527099609375, 1226.87744140625, 179.4939422607422, 258.2385559082031, 768.6495971679688, 806.4384765625, 222.1959991455078, 671.81298828125, -571.15380859375, -368.5039367675781, 224.51307678222656, 975.7584228515625, 570.88427734375, 1081.031005859375, -406.19134521484375, -197.50164794921875, 101.42739868164062, -137.67578125, 813.7662963867188, -1517.77734375, 515.8064575195312, 182.11093139648438, -297.66082763671875, 302.18603515625, 217.46505737304688, 1243.768310546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000473.npy"} +{"epoch": 0.6945668135095447, "step": 474, "batch_size": 64, "mean": 319.40814208984375, "std": 591.739990234375, "min": -1639.4959716796875, "p10": -250.70170288085936, "median": 321.06219482421875, "p90": 947.1848388671875, "max": 1746.7421875, "pos_frac": 0.765625, "sample": [-1139.5213623046875, 191.02536010742188, 138.14300537109375, 949.21533203125, 780.071533203125, -504.5249938964844, 942.447021484375, 154.81163024902344, 287.6983947753906, 588.9411010742188, 1129.2061767578125, 1746.7421875, 29.34718894958496, 298.3548889160156, 200.55465698242188, 788.0692138671875, 350.4602966308594, -8.591217041015625, -423.7791748046875, 476.1197814941406, 79.73841857910156, 331.4627990722656, -203.78460693359375, 121.51119995117188, 310.6615905761719, 656.7246704101562, 872.943115234375, 193.73431396484375, 518.8241577148438, 435.49090576171875, 693.3751831054688, -63.046382904052734, 62.58312225341797, 297.03717041015625, 506.864501953125, 281.9447021484375, 642.032470703125, 153.97772216796875, -189.95469665527344, -258.90374755859375, 760.012451171875, -96.10922241210938, 611.1065673828125, 487.6204833984375, 188.6683349609375, 343.1095275878906, 1347.95947265625, 1089.45361328125, -125.49159240722656, -771.3845825195312, -231.5635986328125, 883.9223022460938, 415.37054443359375, 614.4127197265625, 714.1090087890625, 230.65248107910156, 370.478759765625, -951.0686645507812, 691.7662963867188, -181.76242065429688, 436.87078857421875, 1199.2088623046875, -1639.4959716796875, 1636.26806640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000474.npy"} +{"epoch": 0.6960352422907489, "step": 475, "batch_size": 64, "mean": 419.8092041015625, "std": 439.2447204589844, "min": -371.6594543457031, "p10": -144.52230072021484, "median": 408.4990234375, "p90": 1025.8115112304693, "max": 1465.427978515625, "pos_frac": 0.796875, "sample": [96.761962890625, -49.7474479675293, 442.1427307128906, 133.63198852539062, -194.01629638671875, 592.1257934570312, 1440.8551025390625, 450.13360595703125, 1184.1707763671875, 1077.194091796875, 285.6988830566406, 295.1533203125, 411.48944091796875, 11.9154052734375, 565.6566162109375, -371.6594543457031, 626.246337890625, -12.53645133972168, 789.05224609375, -140.39013671875, 760.5375366210938, 794.827880859375, 806.2930908203125, 213.06475830078125, 134.48336791992188, 319.08929443359375, 420.68316650390625, 409.374755859375, 29.633983612060547, 473.2099609375, 260.8011779785156, 511.3157043457031, -123.97792053222656, 248.0469512939453, 617.9575805664062, 827.587890625, 407.623291015625, 739.8606567382812, -221.82020568847656, 1307.24365234375, 440.44073486328125, -19.885498046875, 301.93914794921875, -177.8240509033203, 786.2039794921875, 107.62376403808594, 554.8305053710938, -146.29322814941406, 350.38543701171875, 594.9012451171875, -106.21279907226562, 905.9188232421875, 589.801025390625, 88.35897827148438, 1465.427978515625, -193.2017364501953, 222.83949279785156, 1402.42041015625, 701.4124755859375, 1094.3406982421875, 396.0540466308594, 798.3938598632812, -218.41336059570312, 358.6116027832031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000475.npy"} +{"epoch": 0.697503671071953, "step": 476, "batch_size": 64, "mean": 334.79010009765625, "std": 462.4664306640625, "min": -508.9059753417969, "p10": -184.9171020507812, "median": 237.60406494140625, "p90": 915.3699462890626, "max": 2108.216552734375, "pos_frac": 0.796875, "sample": [800.7403564453125, 112.6016616821289, -121.92156219482422, 684.3453369140625, 300.8218994140625, 521.303955078125, 1304.3797607421875, 130.88192749023438, 120.65127563476562, -133.85247802734375, 1080.739013671875, 85.60822296142578, 153.7173309326172, 417.52105712890625, 887.6051025390625, 490.1744384765625, 579.1082763671875, 70.38436126708984, 225.01968383789062, 107.3549575805664, 565.4594116210938, 244.83197021484375, -10.918045043945312, 1231.937255859375, 501.7032165527344, 560.4843139648438, -466.938720703125, -268.9207763671875, 750.543212890625, 83.84508514404297, 9.047271728515625, 121.8536376953125, -305.85772705078125, -273.1372985839844, 516.1256103515625, -41.50352096557617, -206.80194091796875, -365.4068298339844, 687.9083251953125, 163.67193603515625, 230.08831787109375, 608.283447265625, 278.6363830566406, -127.34394836425781, 156.1794891357422, 229.24424743652344, 767.7390747070312, 1032.1580810546875, 2108.216552734375, 927.2691650390625, 313.3587646484375, -16.758529663085938, 275.9784851074219, 230.37615966796875, 150.5128631591797, 104.28298950195312, 574.9928588867188, 532.06494140625, 121.20808410644531, 351.3067626953125, 301.1824951171875, 1158.5093994140625, 312.8743896484375, -508.9059753417969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000476.npy"} +{"epoch": 0.6989720998531571, "step": 477, "batch_size": 64, "mean": 328.0302429199219, "std": 572.0289306640625, "min": -1650.9566650390625, "p10": -279.51185607910156, "median": 279.2987518310547, "p90": 1011.5849243164063, "max": 2019.2642822265625, "pos_frac": 0.734375, "sample": [-496.07855224609375, 452.55487060546875, 928.6084594726562, -656.5773315429688, 1522.486328125, 231.69566345214844, 268.4151306152344, 852.3379516601562, 788.219970703125, 1049.696044921875, -25.694541931152344, 82.67337799072266, 221.89915466308594, 819.6810302734375, 966.7215576171875, -108.6242904663086, 691.3840942382812, 516.9186401367188, 1121.5430908203125, -288.21380615234375, 530.3999633789062, 173.15020751953125, -10.812164306640625, 201.7908172607422, -253.03323364257812, 701.4774169921875, 348.1373596191406, 131.92288208007812, 4.869539260864258, -96.59185791015625, -502.4795837402344, 447.4412536621094, -1650.9566650390625, -666.264892578125, 1273.7901611328125, 165.71139526367188, 230.54257202148438, 153.16754150390625, 490.025390625, 78.71817016601562, 662.6065673828125, -70.68359375, 572.302978515625, 1002.59130859375, 329.1665954589844, 317.0179748535156, 323.2388610839844, 830.1640014648438, 429.8741760253906, 190.60855102539062, 1032.29296875, -259.2073059082031, -55.540245056152344, 290.182373046875, -4.96612548828125, 1015.4393310546875, 622.7274169921875, -148.83175659179688, 31.441436767578125, -305.75, 114.78365325927734, 701.612548828125, 2019.2642822265625, 662.9468994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000477.npy"} +{"epoch": 0.7004405286343612, "step": 478, "batch_size": 64, "mean": 394.17974853515625, "std": 525.708740234375, "min": -906.61279296875, "p10": -214.84334564208982, "median": 399.0555114746094, "p90": 1196.601000976563, "max": 1659.6715087890625, "pos_frac": 0.8125, "sample": [-222.4006805419922, 157.60926818847656, 728.1958618164062, 99.68775177001953, 78.2162094116211, 455.28985595703125, 353.7084045410156, -497.0189208984375, 480.3590087890625, 8.297515869140625, 429.190673828125, 525.541748046875, -508.3531188964844, -528.091552734375, 281.9685974121094, 861.6159057617188, 770.058349609375, 1659.6715087890625, 418.2998046875, 163.11895751953125, -331.18170166015625, -197.20956420898438, -403.0333251953125, 201.45590209960938, 389.4344482421875, 1065.0355224609375, 481.35028076171875, 448.4509582519531, -46.826568603515625, 232.23976135253906, 1523.5721435546875, 1099.0955810546875, 312.85894775390625, 257.1913757324219, 449.94183349609375, 272.5896911621094, 1392.8455810546875, 1410.7110595703125, 639.92919921875, -124.65470886230469, 580.5841064453125, -66.89949798583984, 123.47824096679688, -108.2145004272461, 725.3129272460938, 462.6390686035156, 120.62317657470703, 1238.3890380859375, 238.8123016357422, 408.67657470703125, 532.032470703125, 8.300865173339844, 356.1798095703125, 829.4402465820312, 499.3912353515625, 303.1493835449219, 1465.1175537109375, 680.89990234375, 1314.5703125, -906.61279296875, 573.6398315429688, 436.1649169921875, 617.1790161132812, 5.8892364501953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000478.npy"} +{"epoch": 0.7019089574155654, "step": 479, "batch_size": 64, "mean": 376.33001708984375, "std": 641.447509765625, "min": -1411.599853515625, "p10": -288.70386657714835, "median": 288.7840881347656, "p90": 1138.682958984375, "max": 2963.864501953125, "pos_frac": 0.734375, "sample": [50.047035217285156, 69.89730072021484, 1131.8406982421875, 598.005859375, 1033.3790283203125, 1168.0333251953125, -393.1756591796875, -214.84713745117188, 259.851806640625, 35.608367919921875, 780.0394287109375, -127.636962890625, -203.80563354492188, 317.71636962890625, 801.26123046875, 214.58367919921875, 223.9286346435547, -142.49114990234375, 937.1049194335938, 476.9103698730469, 201.56607055664062, 520.5316162109375, -120.87306213378906, 523.3290405273438, 2963.864501953125, 1402.851806640625, 682.0252075195312, 735.5342407226562, 746.2998046875, 482.8698425292969, 1254.9398193359375, 211.50143432617188, 191.75299072265625, 797.52734375, -615.7371215820312, 1752.634521484375, -24.436492919921875, 673.8067016601562, 454.2443542480469, 129.96824645996094, 426.94775390625, 1141.6153564453125, 696.6668701171875, 444.9764404296875, -89.37229919433594, -1411.599853515625, 401.22161865234375, -389.4241943359375, -320.35675048828125, -81.28070068359375, 155.26451110839844, 9.025413513183594, -475.1439208984375, -544.9871826171875, 95.77626037597656, -64.0097885131836, 553.266845703125, -34.44366455078125, 127.4395751953125, 1346.7763671875, 377.66802978515625, 1001.2900390625, 102.27095031738281, 635.0830078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000479.npy"} +{"epoch": 0.7033773861967695, "step": 480, "batch_size": 64, "mean": 288.78948974609375, "std": 587.0894165039062, "min": -893.6109008789062, "p10": -511.00391235351555, "median": 314.5585479736328, "p90": 977.5253356933596, "max": 2195.574462890625, "pos_frac": 0.671875, "sample": [-554.7443237304688, 350.26617431640625, -158.27566528320312, 132.8062744140625, 583.253173828125, 59.982688903808594, 301.0711669921875, 997.842041015625, 613.806640625, 843.1168823242188, 493.51995849609375, 1363.3826904296875, 431.2899169921875, -422.98553466796875, -776.2463989257812, -617.5494384765625, 722.6122436523438, 853.4234619140625, -158.29489135742188, 298.7760009765625, 4.803245544433594, -110.40409851074219, 23.873367309570312, 413.31494140625, 431.4234313964844, 159.06040954589844, 2195.574462890625, 328.34271240234375, 607.0656127929688, -235.73207092285156, 834.2847900390625, 930.1196899414062, 896.348876953125, -192.36534118652344, -548.72607421875, 328.0459289550781, 513.3272094726562, -157.12290954589844, 452.37225341796875, -893.6109008789062, -774.115966796875, -687.3851318359375, 654.8740844726562, 1180.8870849609375, 1409.760986328125, -126.5538558959961, 345.5218200683594, 446.73699951171875, -40.012935638427734, 480.4943542480469, -8.311187744140625, -357.47125244140625, 133.7225341796875, 197.01950073242188, 160.41571044921875, -26.798412322998047, 93.21709442138672, -19.29922866821289, 632.2160034179688, 1110.5606689453125, 514.5875854492188, -216.5646209716797, 1164.16064453125, 877.8157348632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000480.npy"} +{"epoch": 0.7048458149779736, "step": 481, "batch_size": 64, "mean": 452.87890625, "std": 568.56494140625, "min": -893.3746337890625, "p10": -192.19943695068358, "median": 374.5646057128906, "p90": 1019.4062988281252, "max": 2047.3262939453125, "pos_frac": 0.765625, "sample": [2047.3262939453125, -893.3746337890625, 587.8292846679688, 352.29681396484375, -221.9533233642578, 28.184158325195312, -108.63491821289062, 384.19140625, 460.2706298828125, 1542.473388671875, 132.410400390625, -197.22366333007812, 143.38038635253906, -322.093994140625, 465.05120849609375, 986.915771484375, -148.80545043945312, 774.4332885742188, 1485.4310302734375, -37.78672790527344, -36.65875244140625, 842.1437377929688, 296.4341125488281, 760.5442504882812, 430.6062927246094, 199.2540283203125, 185.38894653320312, 945.4893188476562, 786.0391845703125, -358.0657043457031, 153.82427978515625, 805.238525390625, 402.45074462890625, -177.22406005859375, 820.2333374023438, 801.8796997070312, -255.90399169921875, -59.29487991333008, 931.4415283203125, 515.7666625976562, -224.5909423828125, 135.9245147705078, 876.1507568359375, 903.1710815429688, 68.9994888305664, 113.94878387451172, 1684.9649658203125, 351.5162658691406, 1689.2808837890625, 1033.330810546875, 294.50030517578125, -8.781295776367188, 348.4557800292969, 464.5290222167969, 508.23681640625, 130.63818359375, 200.49172973632812, 872.5934448242188, 775.6637573242188, 785.9600830078125, 1674.9178466796875, -180.4762420654297, 364.93780517578125, 669.976806640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000481.npy"} +{"epoch": 0.7063142437591777, "step": 482, "batch_size": 64, "mean": 214.26510620117188, "std": 631.9312133789062, "min": -2088.77392578125, "p10": -480.8723052978515, "median": 254.37477111816406, "p90": 811.1164367675782, "max": 1972.06201171875, "pos_frac": 0.6875, "sample": [248.11416625976562, -65.46495056152344, 329.3621520996094, -824.7781982421875, 423.39031982421875, 628.78173828125, -1102.219482421875, 609.4700317382812, 699.624267578125, 469.192626953125, 382.9808044433594, 652.0361938476562, -12.356292724609375, 311.739013671875, 659.4376831054688, 1243.7030029296875, 229.04815673828125, -356.74957275390625, 338.9092712402344, 314.647705078125, -29.852161407470703, 396.6894226074219, 460.85076904296875, 792.3154907226562, -943.4692993164062, 426.976806640625, 180.17742919921875, 990.2064208984375, 260.6353759765625, 515.8038330078125, 762.4148559570312, 597.3546142578125, 812.8213500976562, 233.66726684570312, -317.8692932128906, 171.86404418945312, 934.6727905273438, -503.2504577636719, 1399.5404052734375, -428.6566162109375, 609.6892700195312, 8.433145523071289, -204.96951293945312, 38.06043243408203, 146.26991271972656, 80.09002685546875, -2088.77392578125, 332.2476806640625, 1972.06201171875, -9.489112854003906, -3.1309814453125, -67.47883605957031, 23.19915771484375, 508.52532958984375, -1028.452880859375, 329.5414123535156, 163.326416015625, 1336.9879150390625, -214.60791015625, 88.46690368652344, -204.69808959960938, -138.80380249023438, 807.1383056640625, -662.4266357421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000482.npy"} +{"epoch": 0.7077826725403817, "step": 483, "batch_size": 64, "mean": 478.5213623046875, "std": 600.3779907226562, "min": -547.7156372070312, "p10": -222.47664947509762, "median": 385.6799774169922, "p90": 1222.3652099609376, "max": 3016.7958984375, "pos_frac": 0.796875, "sample": [-158.62184143066406, 50.767852783203125, 1361.868896484375, 841.5892944335938, -283.9205322265625, 784.2369384765625, 395.3675537109375, 375.9924011230469, 664.5888061523438, 655.2699584960938, 409.08306884765625, 807.26318359375, 501.3653564453125, 117.54994201660156, 240.75729370117188, 262.11468505859375, 1038.4287109375, 1014.4921875, -547.7156372070312, 536.89990234375, 319.2554931640625, -39.44060516357422, 790.3623046875, 917.3433837890625, 45.42552185058594, -363.8243713378906, 1291.075927734375, 712.7857055664062, 51.48637390136719, 1083.772705078125, -234.6663055419922, 1362.00830078125, 16.893943786621094, 910.4647827148438, -8.153945922851562, 1186.3599853515625, 272.6152038574219, 1064.1630859375, 197.41998291015625, 1162.492431640625, 772.8613891601562, 635.1377563476562, 496.0830078125, -362.925537109375, 193.73379516601562, 342.6788330078125, 451.2315368652344, 288.46661376953125, 1237.7960205078125, -71.75636291503906, 219.9798583984375, -543.8336181640625, 170.81472778320312, 3016.7958984375, -147.28123474121094, 174.77220153808594, 1424.8040771484375, 273.14971923828125, 462.8450927734375, -289.5693054199219, 672.2891845703125, 1303.7984619140625, 292.31182861328125, -194.03411865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000483.npy"} +{"epoch": 0.7092511013215859, "step": 484, "batch_size": 64, "mean": 435.53656005859375, "std": 531.379150390625, "min": -681.354736328125, "p10": -111.22025604248044, "median": 359.74501037597656, "p90": 1066.8930419921876, "max": 1806.1072998046875, "pos_frac": 0.78125, "sample": [460.69744873046875, -129.3426971435547, 1782.05517578125, 119.83326721191406, -123.85334777832031, -228.77052307128906, 564.407958984375, 92.14403533935547, 1806.1072998046875, 206.30999755859375, 94.19174194335938, 476.453125, 579.4776000976562, 540.7613525390625, 987.2880859375, 688.5042114257812, 401.2516174316406, 40.148624420166016, -222.769775390625, -681.354736328125, 381.7144470214844, 11.836780548095703, 1073.2308349609375, 948.0015869140625, 1779.328125, 60.19560241699219, 363.138916015625, 799.7561645507812, 33.53030014038086, -72.84591674804688, -37.20229721069336, -14.20489501953125, 909.6102294921875, 1308.98828125, 45.71137237548828, 162.79592895507812, 240.4613037109375, 109.3874282836914, 148.69496154785156, -186.13597106933594, -26.375144958496094, 347.19207763671875, 810.0017700195312, 356.3511047363281, 1325.3968505859375, -81.7430419921875, 682.6235961914062, 1048.9559326171875, 441.3725891113281, -211.41061401367188, -51.52140808105469, 785.2147827148438, 1052.1048583984375, 861.4716186523438, 82.60124206542969, 1096.152587890625, 1045.6346435546875, 403.9217529296875, 507.84759521484375, 98.9875259399414, 828.2344970703125, 72.92018127441406, -65.8089370727539, 944.6788940429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000484.npy"} +{"epoch": 0.71071953010279, "step": 485, "batch_size": 64, "mean": 411.17791748046875, "std": 596.7046508789062, "min": -947.8231811523438, "p10": -149.37587966918943, "median": 292.6755676269531, "p90": 1121.03232421875, "max": 2232.052490234375, "pos_frac": 0.75, "sample": [706.9376831054688, 701.2390747070312, -35.77763366699219, 260.05609130859375, 422.66583251953125, 171.40542602539062, 1140.425537109375, 191.43057250976562, 502.98980712890625, 470.3866882324219, 204.57318115234375, -35.58301544189453, 1568.998291015625, 1106.6927490234375, 138.46282958984375, 1016.1126098632812, -521.4024047851562, -93.8622817993164, 992.167724609375, 1094.4151611328125, 1037.98291015625, 1251.0338134765625, 140.15328979492188, 557.5857543945312, 2232.052490234375, 1180.272216796875, -34.53575897216797, 39.71051788330078, 834.7752075195312, -111.59029388427734, -65.2179946899414, -80.95523071289062, -632.203857421875, 164.52145385742188, 257.51373291015625, 911.98193359375, 481.0751953125, 120.64197540283203, 115.5548095703125, 707.0892333984375, -81.36627197265625, 138.55722045898438, 2062.002197265625, 479.8978271484375, -165.5697021484375, 377.1483459472656, 392.742919921875, 647.7224731445312, 325.2950439453125, -668.1494140625, 1127.1778564453125, 1020.6719970703125, 62.73813247680664, -300.2514343261719, 42.23042297363281, -194.95396423339844, 930.4451904296875, 159.76133728027344, 336.3362121582031, 190.06646728515625, -7.698173522949219, -947.8231811523438, 886.8806762695312, 391.7491760253906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000485.npy"} +{"epoch": 0.7121879588839941, "step": 486, "batch_size": 64, "mean": 466.4476013183594, "std": 714.4720458984375, "min": -1220.11328125, "p10": -425.7793212890624, "median": 367.0062713623047, "p90": 1365.6197265625, "max": 2914.916259765625, "pos_frac": 0.796875, "sample": [1124.4315185546875, 596.1891479492188, 73.80195617675781, 637.2374877929688, 301.65728759765625, -954.9949951171875, 247.86880493164062, 123.61185455322266, 994.3882446289062, -184.44564819335938, 618.2901000976562, 1419.2047119140625, 661.1533813476562, -925.2283325195312, -483.82952880859375, 1539.7628173828125, 973.9014892578125, 938.4747314453125, -297.4568786621094, 1198.58056640625, 1367.150634765625, -362.19091796875, 1567.5802001953125, 1440.59228515625, 214.52491760253906, -226.32736206054688, 569.0574340820312, 368.1381530761719, 93.65182495117188, 1488.4012451171875, 1290.8203125, 365.4407958984375, -453.031494140625, -850.5272216796875, 339.65570068359375, 365.8743896484375, 96.42181396484375, 1298.5396728515625, 886.3512573242188, 801.3321533203125, 308.8543701171875, 823.1832275390625, 467.554443359375, -84.7601318359375, 601.2564086914062, 924.57861328125, 870.3741455078125, 592.4685668945312, 893.6683959960938, 1362.047607421875, 123.0630111694336, 340.42608642578125, 56.661529541015625, 364.3889465332031, 663.2078857421875, 155.94044494628906, 6.806432723999023, 545.3858032226562, -1220.11328125, 145.30250549316406, 317.6066589355469, -54.104339599609375, -530.1222534179688, 2914.916259765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000486.npy"} +{"epoch": 0.7136563876651982, "step": 487, "batch_size": 64, "mean": 346.4443664550781, "std": 538.9556884765625, "min": -1708.74951171875, "p10": -186.4023223876953, "median": 317.78981018066406, "p90": 1089.293298339844, "max": 1737.89599609375, "pos_frac": 0.765625, "sample": [396.513427734375, 1211.396240234375, 201.6607666015625, -265.6168518066406, 1176.849853515625, 1110.134033203125, 724.0637817382812, 483.4542541503906, 1443.9385986328125, -39.67980194091797, 203.65252685546875, -436.3489990234375, 1111.1123046875, -189.75534057617188, 105.34850311279297, 1737.89599609375, 373.6568603515625, -275.8085021972656, 765.2570190429688, 803.7380981445312, 440.7548828125, 630.7576904296875, -1708.74951171875, 176.53085327148438, -110.26311492919922, 583.9318237304688, -12.860511779785156, 962.000244140625, 1318.84912109375, 43.439659118652344, 174.4509735107422, 330.26312255859375, 326.563232421875, 468.1146240234375, 161.50411987304688, -613.7740478515625, 252.90011596679688, -545.4049072265625, 587.8202514648438, 265.9549865722656, 407.30316162109375, 90.22161865234375, 635.79296875, -163.3096466064453, 473.09930419921875, 161.15512084960938, 77.53520202636719, 309.0163879394531, 406.32659912109375, 288.6125793457031, 140.78587341308594, 608.3046264648438, 287.33642578125, 471.6849365234375, -6.5958099365234375, -178.57861328125, 182.74520874023438, 1040.6649169921875, 794.4107666015625, 713.3336181640625, -16.516265869140625, 530.74267578125, -92.76028442382812, 636.88232421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000487.npy"} +{"epoch": 0.7151248164464024, "step": 488, "batch_size": 64, "mean": 367.892578125, "std": 462.5895690917969, "min": -529.285400390625, "p10": -123.26457748413085, "median": 335.75201416015625, "p90": 1055.6404113769534, "max": 1613.155517578125, "pos_frac": 0.734375, "sample": [-4.215888977050781, -7.961391448974609, 1401.88720703125, 279.10174560546875, 1005.1482543945312, 174.58921813964844, 748.92138671875, -40.425148010253906, -125.73941040039062, 327.2068786621094, 473.2830810546875, 247.24900817871094, 561.3157348632812, 1230.945068359375, 350.25146484375, 767.7535400390625, -47.522430419921875, 34.539649963378906, -529.285400390625, -57.7454833984375, 499.0083923339844, -148.66848754882812, 483.41265869140625, 358.5811462402344, 1.5847015380859375, -284.8433837890625, 57.553035736083984, 655.6409301757812, 1613.155517578125, 264.09710693359375, 501.77740478515625, 643.6611328125, -351.6435241699219, 1077.2799072265625, 730.4081420898438, 49.83134078979492, 1256.6083984375, 712.9495849609375, 356.9349670410156, 133.39886474609375, 638.2955932617188, 966.8077392578125, 299.29852294921875, 313.57379150390625, 192.44117736816406, 344.2971496582031, 646.0597534179688, 1180.1943359375, -117.4899673461914, -81.3470230102539, -132.6007537841797, 489.6820983886719, 465.2779235839844, 51.527870178222656, 496.361083984375, 502.1286315917969, -74.24177551269531, 461.8394470214844, -0.3030281066894531, -76.21027374267578, -495.395263671875, 547.602294921875, 259.9710693359375, 1267.3275146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000488.npy"} +{"epoch": 0.7165932452276065, "step": 489, "batch_size": 64, "mean": 357.419677734375, "std": 526.283935546875, "min": -562.9900512695312, "p10": -162.31460647583003, "median": 201.0049819946289, "p90": 1160.4672485351564, "max": 2056.43896484375, "pos_frac": 0.734375, "sample": [804.899658203125, -13.456283569335938, 226.60618591308594, 112.00892639160156, 10.246267318725586, 145.62164306640625, 190.53065490722656, -260.1254577636719, 101.96693420410156, 205.76937866210938, -250.19493103027344, 166.67379760742188, 576.9466552734375, 780.17724609375, 463.7330627441406, 192.3219451904297, -180.35047912597656, 219.6510009765625, -77.75110626220703, 960.6896362304688, 10.308305740356445, -71.74740600585938, 243.0179901123047, 247.15606689453125, -82.80868530273438, 149.8320770263672, 191.74815368652344, -68.84857177734375, 196.24058532714844, 600.89453125, -45.55377197265625, 47.15992736816406, -29.918214797973633, -265.35137939453125, 1292.5201416015625, 714.7982788085938, 58.46580123901367, 108.11454772949219, 341.75439453125, -562.9900512695312, 650.2610473632812, -210.34808349609375, 1278.1600341796875, -390.138427734375, 1174.7850341796875, 1127.05908203125, 662.0802001953125, 2056.43896484375, 331.30816650390625, 564.3614501953125, 1831.5556640625, 528.4525146484375, 523.1058349609375, 1500.398193359375, -105.21175384521484, -120.23090362548828, 745.048828125, 496.4465637207031, 524.26123046875, 411.6082763671875, 0.601776123046875, 1294.684814453125, 558.1995849609375, -8.784912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000489.npy"} +{"epoch": 0.7180616740088106, "step": 490, "batch_size": 64, "mean": 367.3455810546875, "std": 655.9293212890625, "min": -1208.7618408203125, "p10": -399.1303070068359, "median": 218.6532745361328, "p90": 1210.8778198242187, "max": 1912.5665283203125, "pos_frac": 0.765625, "sample": [75.00357055664062, -1110.9432373046875, 1197.36376953125, -283.9510803222656, 150.10910034179688, -53.864158630371094, 168.93505859375, 904.4970092773438, 855.2095336914062, -545.9915161132812, 156.0389404296875, -62.47265625, 1408.3170166015625, 52.751739501953125, 621.1883544921875, -419.8836364746094, -205.95925903320312, 1081.2586669921875, 656.8406372070312, -682.91748046875, 29.980510711669922, 141.95172119140625, -607.2373657226562, 61.34730529785156, -1208.7618408203125, 1305.0107421875, 87.87686157226562, 439.03387451171875, 536.8939208984375, 780.125, 212.00311279296875, 1912.5665283203125, 86.03265380859375, 625.4707641601562, 32.751976013183594, -208.900390625, 1685.7369384765625, 884.4345092773438, 1714.28564453125, 21.569385528564453, 288.40911865234375, 225.30343627929688, 1216.6695556640625, 671.0750732421875, -137.58558654785156, 789.4238891601562, -350.70587158203125, 833.8013305664062, 42.940765380859375, 508.71697998046875, 335.82269287109375, -531.4097900390625, 1746.8916015625, -22.0079345703125, 969.723876953125, 947.771484375, 80.08331298828125, 126.93670654296875, 622.1439208984375, 960.9614868164062, 57.52326583862305, 849.534423828125, 512.0529174804688, 272.3360595703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000490.npy"} +{"epoch": 0.7195301027900147, "step": 491, "batch_size": 64, "mean": 254.20652770996094, "std": 475.7027282714844, "min": -820.6328735351562, "p10": -283.1089874267578, "median": 172.2318344116211, "p90": 801.0976928710938, "max": 1617.4498291015625, "pos_frac": 0.734375, "sample": [339.15667724609375, -276.1139221191406, 790.1064453125, 507.1961669921875, 1450.482177734375, 228.8348846435547, 340.0973205566406, -140.6645050048828, 435.4339294433594, 94.198974609375, 323.6880798339844, 113.79951477050781, 59.118568420410156, -534.768798828125, 504.2454528808594, 116.74250793457031, 704.1650390625, 458.4407043457031, -13.555343627929688, 520.0321044921875, 160.37979125976562, 63.95502471923828, 961.0047607421875, -476.8663024902344, 231.3570098876953, 238.90704345703125, 691.2496948242188, -243.99183654785156, 805.8082275390625, 39.178977966308594, 1055.8082275390625, 416.9743957519531, -820.6328735351562, 129.984375, -206.71546936035156, 267.1622009277344, 4.206840515136719, 1096.5960693359375, 268.99432373046875, 360.75665283203125, 662.8276977539062, -578.16943359375, 1617.4498291015625, 39.831451416015625, -182.0942840576172, 153.83895874023438, -286.10687255859375, 184.08387756347656, -50.595252990722656, 549.2985229492188, 96.51478576660156, -372.03778076171875, 706.6487426757812, -90.00068664550781, -55.036048889160156, 137.8804168701172, 360.6930847167969, 769.767333984375, 117.64500427246094, -47.138275146484375, 1300.677001953125, 135.5687255859375, 480.8616943359375, -447.9432067871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000491.npy"} +{"epoch": 0.7209985315712188, "step": 492, "batch_size": 64, "mean": 247.95816040039062, "std": 552.0293579101562, "min": -958.4644165039062, "p10": -396.3013031005859, "median": 159.65695190429688, "p90": 1037.4760986328126, "max": 1383.373046875, "pos_frac": 0.734375, "sample": [163.12091064453125, 266.45086669921875, 485.6094055175781, 38.093955993652344, 245.84872436523438, 35.79402160644531, 17.645782470703125, 703.6533813476562, -517.67333984375, -168.7209930419922, 158.42044067382812, 611.5607299804688, 671.147705078125, -251.38095092773438, -958.4644165039062, 997.9013671875, -110.41898345947266, 1196.58544921875, -167.39083862304688, 1359.3433837890625, -371.8240966796875, 1379.0057373046875, 292.7289733886719, -308.6147766113281, 396.55645751953125, 379.65228271484375, 262.7877502441406, -492.40283203125, 66.39799499511719, 335.055908203125, 1.49017333984375, 25.47768211364746, 160.89346313476562, -837.3897094726562, -163.40576171875, 1196.9931640625, 871.897216796875, 299.90545654296875, -709.2680053710938, 92.74040985107422, 1023.18505859375, -706.467041015625, 1189.701416015625, 17.223487854003906, -147.2744598388672, 808.19873046875, 205.33544921875, 84.63856506347656, 971.2584838867188, 69.63833618164062, 502.6297302246094, -133.8728485107422, 153.48187255859375, -406.7915344238281, 1043.600830078125, 873.2901611328125, 501.49493408203125, 40.88581848144531, 76.88078308105469, 485.7531433105469, -280.29052734375, 1383.373046875, 47.73194885253906, 409.9139404296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000492.npy"} +{"epoch": 0.7224669603524229, "step": 493, "batch_size": 64, "mean": 277.2387390136719, "std": 529.29248046875, "min": -1861.5814208984375, "p10": -196.6875961303711, "median": 195.32789611816406, "p90": 1028.033447265625, "max": 1350.5870361328125, "pos_frac": 0.765625, "sample": [997.9462890625, 68.2094955444336, 539.548583984375, 246.79965209960938, -90.70751953125, 1214.0355224609375, 909.5325927734375, 18.794097900390625, 94.5234603881836, -361.19708251953125, 462.1800537109375, 93.16169738769531, 1217.985107421875, 473.82061767578125, 363.38165283203125, 136.13943481445312, -297.1416931152344, 289.9044494628906, 133.40072631835938, 44.21153259277344, 131.78736877441406, 293.8910217285156, 432.37005615234375, 400.18896484375, -231.36187744140625, 458.6378173828125, 651.6512451171875, -188.6810302734375, -178.77313232421875, -130.5870361328125, 24.83905029296875, -1861.5814208984375, 341.8564758300781, 30.048538208007812, 789.3683471679688, 22.204309463500977, 1350.5870361328125, 1251.0914306640625, 172.19073486328125, -197.59739685058594, -124.46047973632812, 372.5645751953125, 1191.5474853515625, 865.1568603515625, 345.6659240722656, 1024.3326416015625, -480.0765075683594, 325.54461669921875, 1348.131103515625, 1029.6195068359375, 83.69341278076172, 597.179443359375, -281.0248718261719, 209.1403350830078, 206.88568115234375, -194.56472778320312, -97.81950378417969, 256.2532653808594, 689.1719970703125, 183.77011108398438, 147.87557983398438, 2.09033203125, -80.34097290039062, 6.2856597900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000493.npy"} +{"epoch": 0.723935389133627, "step": 494, "batch_size": 64, "mean": 436.12835693359375, "std": 483.8360900878906, "min": -595.6873779296875, "p10": -88.1373229980468, "median": 410.8778533935547, "p90": 1106.0121215820313, "max": 1756.781005859375, "pos_frac": 0.859375, "sample": [-2.630247116088867, 337.560302734375, 511.018798828125, 392.1076965332031, 1756.781005859375, 1407.0714111328125, -595.6873779296875, 732.3110961914062, 707.9981079101562, 111.93365478515625, 434.7063903808594, -12.116596221923828, 848.2973022460938, 364.24053955078125, -289.7044677734375, 1109.9305419921875, 663.6904296875, -230.66070556640625, 417.0910339355469, 793.3244018554688, 121.2468032836914, -437.8417053222656, 258.8114318847656, 904.5305786132812, 835.9349365234375, 41.89148712158203, 746.7677001953125, 1096.869140625, 250.80787658691406, 373.0455017089844, -442.916015625, 49.28790283203125, 241.3224639892578, -292.443115234375, 1681.3140869140625, 411.9189147949219, 442.43707275390625, 69.97127532958984, 409.8367919921875, 1248.7904052734375, 1139.26123046875, 502.58111572265625, 8.861618041992188, 50.65215301513672, 478.56317138671875, 624.8341064453125, 745.432373046875, 921.9614868164062, 386.63006591796875, 612.4197387695312, -120.71763610839844, 74.51594543457031, 528.7503662109375, 130.97457885742188, 154.42581176757812, 434.86688232421875, 826.7272338867188, 653.8948364257812, 230.4892578125, 428.609619140625, 1170.8968505859375, 56.36753463745117, 349.2045593261719, 53.164306640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000494.npy"} +{"epoch": 0.7254038179148311, "step": 495, "batch_size": 64, "mean": 380.0621337890625, "std": 567.6135864257812, "min": -970.7493286132812, "p10": -162.38106689453124, "median": 310.9029083251953, "p90": 1053.513073730469, "max": 2380.029052734375, "pos_frac": 0.78125, "sample": [359.5145263671875, 109.84199523925781, -20.2198486328125, -166.43304443359375, 229.1536865234375, 310.87640380859375, -534.1321411132812, 157.8682861328125, 300.5074462890625, 32.95491027832031, 175.8250274658203, 731.31396484375, 916.8261108398438, -619.6673583984375, 370.10693359375, 313.6772766113281, -279.6150207519531, 816.511474609375, 82.44210815429688, 665.123779296875, 156.1732177734375, 1.404449462890625, 295.3491516113281, 81.36578369140625, 369.4809265136719, 1011.1436767578125, 925.0404663085938, 738.6167602539062, 208.47842407226562, 541.9190673828125, 284.12762451171875, 325.809326171875, 141.55706787109375, 428.45428466796875, 1075.6497802734375, 1071.67138671875, 836.2734985351562, -120.46138000488281, 2380.029052734375, 1371.609619140625, 365.3862609863281, -22.765670776367188, 131.68228149414062, 635.5548095703125, 351.5146789550781, -565.0882568359375, -29.684341430664062, 343.2960205078125, 62.216514587402344, -285.1101989746094, -152.92645263671875, -8.13094711303711, 414.37445068359375, 504.92236328125, 1361.480224609375, 571.8526000976562, 310.9294128417969, 852.1087036132812, -970.7493286132812, 1133.85009765625, 985.1876831054688, 300.09185791015625, 2001.78369140625, -43.967552185058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000495.npy"} +{"epoch": 0.7268722466960352, "step": 496, "batch_size": 64, "mean": 205.62347412109375, "std": 443.10955810546875, "min": -901.3890991210938, "p10": -342.20185852050776, "median": 166.8455047607422, "p90": 871.730126953125, "max": 1122.986328125, "pos_frac": 0.71875, "sample": [99.39254760742188, -482.7886962890625, 978.6121826171875, 856.279541015625, -765.0285034179688, 711.57421875, 132.6781005859375, 288.02520751953125, 878.351806640625, 178.60818481445312, 1122.986328125, 278.3011779785156, 391.2510681152344, 245.54229736328125, -273.9625244140625, -25.571651458740234, -154.7025146484375, 400.083984375, -730.5203857421875, 1108.2230224609375, 918.0323486328125, 402.4209289550781, 785.96630859375, 413.75634765625, 883.2147827148438, 261.8057861328125, -150.85777282714844, -54.76216125488281, 72.416259765625, -360.2340393066406, 184.70526123046875, 73.62657165527344, 92.42733764648438, 428.2395935058594, 63.49449920654297, -88.24815368652344, 479.847900390625, 155.67916870117188, -548.4933471679688, 342.4595947265625, -464.3330078125, -300.12677001953125, 643.4937133789062, 64.8406753540039, 164.4722442626953, 551.0010986328125, 169.21876525878906, 436.0646667480469, 594.0591430664062, 448.6352233886719, 78.97146606445312, -162.6283416748047, 435.86151123046875, -25.691415786743164, 204.83389282226562, -901.3890991210938, 117.86128234863281, 961.5513305664062, -0.5159072875976562, 16.808082580566406, 446.8497619628906, -3.840923309326172, 79.83135223388672, 11.24139404296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000496.npy"} +{"epoch": 0.7283406754772394, "step": 497, "batch_size": 64, "mean": 205.537109375, "std": 462.5643310546875, "min": -876.115234375, "p10": -298.54652404785156, "median": 163.23263549804688, "p90": 851.2255249023438, "max": 1338.739013671875, "pos_frac": 0.6875, "sample": [-301.6589050292969, 907.2845458984375, 621.6254272460938, 662.1111450195312, 10.301538467407227, -308.9118957519531, 489.657470703125, -229.73304748535156, -106.01853942871094, -199.8565216064453, 239.7641143798828, 1020.02001953125, 1146.02587890625, 402.75445556640625, 47.122955322265625, 180.83172607421875, 390.7514343261719, 193.6685333251953, 611.9741821289062, 439.5954284667969, -121.73619079589844, -76.82573699951172, 590.353759765625, 856.5540771484375, 9.382400512695312, 45.836212158203125, 838.792236328125, -116.52943420410156, 20.337020874023438, -614.2681274414062, 461.0447998046875, 64.57063293457031, -16.956218719482422, 353.76837158203125, 34.40024185180664, 1274.0819091796875, 48.14569091796875, 1338.739013671875, 68.19361877441406, 72.78533935546875, 186.05984497070312, 306.9655456542969, 313.6539306640625, 657.7244262695312, -876.115234375, 127.94515991210938, 554.9647827148438, -445.5069885253906, 329.4165954589844, 145.633544921875, 395.4674377441406, 276.0924072265625, 960.8513793945312, -160.91709899902344, -174.1536102294922, 219.15646362304688, -128.8105926513672, -291.2843017578125, -87.46034240722656, -784.73046875, 644.85986328125, -505.053955078125, 331.450439453125, -189.81333923339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000497.npy"} +{"epoch": 0.7298091042584435, "step": 498, "batch_size": 64, "mean": 339.7348327636719, "std": 556.2342529296875, "min": -1228.3848876953125, "p10": -217.4216384887695, "median": 348.4901428222656, "p90": 989.3061462402346, "max": 1873.61181640625, "pos_frac": 0.75, "sample": [-825.4866333007812, 172.78762817382812, 651.1806640625, 252.88552856445312, -185.82321166992188, 548.6484985351562, 665.9170532226562, 1014.335693359375, 384.3860778808594, 621.4564208984375, 405.5318908691406, 713.146240234375, 258.7655944824219, 885.1658935546875, -1228.3848876953125, 673.7623901367188, 930.9038696289062, 1414.68310546875, -127.59465789794922, 626.4456176757812, 119.52837371826172, 696.3704833984375, -150.60757446289062, 846.0397338867188, -165.94381713867188, 370.8954772949219, 326.0848083496094, 111.67316436767578, -307.77752685546875, 9.097694396972656, 296.0212707519531, 372.31671142578125, -995.483642578125, 1176.41552734375, 424.382568359375, 10.8934326171875, -150.91329956054688, 474.72467041015625, 552.6380004882812, 546.8463745117188, 1264.8795166015625, -35.91948699951172, 21.2386474609375, 90.61701965332031, 678.9072265625, 621.1131591796875, -397.9060363769531, 38.28955078125, 1327.064208984375, 911.4824829101562, 252.3922882080078, -69.52337646484375, 320.1248474121094, -182.65625, -230.9638214111328, 26.970428466796875, 526.5886840820312, 178.78887939453125, 887.6748657226562, 556.3489379882812, -281.9245300292969, 1074.6195068359375, 1873.61181640625, -124.70559692382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000498.npy"} +{"epoch": 0.7312775330396476, "step": 499, "batch_size": 64, "mean": 396.4712829589844, "std": 523.7645874023438, "min": -951.9523315429688, "p10": -181.1992263793945, "median": 418.40965270996094, "p90": 1043.5861816406255, "max": 2079.344970703125, "pos_frac": 0.796875, "sample": [1097.7052001953125, 68.23481750488281, 201.76388549804688, -951.9523315429688, -69.2022933959961, 555.00439453125, 500.2206726074219, 616.8599853515625, -16.39466094970703, -117.38967895507812, 422.405517578125, 479.20819091796875, 76.71868133544922, 931.2161865234375, 559.3168334960938, 151.02920532226562, 651.2499389648438, -565.9146728515625, 2079.344970703125, 285.428466796875, 198.40652465820312, 713.822265625, 672.579345703125, 35.49266815185547, -482.52630615234375, 414.4137878417969, 280.5216369628906, 912.5416259765625, -280.183837890625, 1091.7447509765625, 210.37982177734375, 3.75347900390625, 590.1596069335938, -93.5569839477539, 306.6559753417969, 33.54346466064453, 556.6884765625, 926.420166015625, 828.0250244140625, -207.9963836669922, 29.909696578979492, 547.1384887695312, 604.25, 583.969970703125, 650.5153198242188, 122.18709564208984, 676.011474609375, 1591.4453125, 1193.3248291015625, 92.61994171142578, 481.3385314941406, 751.7689208984375, 401.56689453125, 780.757080078125, -40.997806549072266, 778.4755859375, 1167.4981689453125, 351.24603271484375, -556.90576171875, -160.9998321533203, -189.85610961914062, 641.842529296875, 10.678548812866211, 1200.638427734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000499.npy"} +{"epoch": 0.7327459618208517, "step": 500, "batch_size": 64, "mean": 340.6584167480469, "std": 531.899169921875, "min": -1026.7510986328125, "p10": -378.69358520507797, "median": 269.9417419433594, "p90": 917.3555114746094, "max": 1739.6341552734375, "pos_frac": 0.796875, "sample": [274.3171081542969, -214.52101135253906, -598.0132446289062, 1739.6341552734375, -517.5682373046875, 515.5653686523438, 929.3179931640625, 475.0749206542969, 785.2230834960938, 129.2860107421875, 815.3759765625, 604.389892578125, 710.4026489257812, -448.2946472167969, 7.431943893432617, 475.8482360839844, 32.43965148925781, 96.59124755859375, 258.9822082519531, 1389.0286865234375, 616.9945678710938, -26.917587280273438, 689.6818237304688, 1243.515380859375, -16.219337463378906, 740.5479736328125, -474.40728759765625, 889.4430541992188, 806.6338500976562, -657.9625854492188, 767.2747192382812, 803.748779296875, 6.6800384521484375, 136.3608856201172, 89.45783233642578, 826.8297119140625, -682.2689208984375, 142.276611328125, 977.0280151367188, -1026.7510986328125, -173.2637481689453, 101.48661804199219, 144.7633056640625, 674.5728759765625, 32.594337463378906, -2.7333431243896484, 556.060546875, 79.58397674560547, 132.0030517578125, 265.5663757324219, 1182.2440185546875, 615.1176147460938, 685.5345458984375, 204.9163818359375, 628.8699340820312, 414.2856140136719, 662.2283935546875, 380.474365234375, 34.408897399902344, 606.8109741210938, 142.5956268310547, 1226.152587890625, -216.29110717773438, 111.69715881347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000500.npy"} +{"epoch": 0.7342143906020558, "step": 501, "batch_size": 64, "mean": 368.9128112792969, "std": 557.3740234375, "min": -817.5323486328125, "p10": -315.9778732299804, "median": 362.382568359375, "p90": 1127.8998779296876, "max": 1844.3668212890625, "pos_frac": 0.765625, "sample": [-115.38907623291016, 420.5760498046875, 264.7581787109375, -363.9725036621094, -38.680912017822266, 1256.8319091796875, 242.1348114013672, 912.1203002929688, 251.99510192871094, 4.642547607421875, 185.3515625, -175.02645874023438, 325.685791015625, 159.1385955810547, 1844.3668212890625, 751.2545776367188, 972.0869140625, 415.9063415527344, 448.9272766113281, 920.3206787109375, 1187.5189208984375, -67.58012390136719, 250.4485321044922, 141.64730834960938, 877.4838256835938, -444.2042541503906, -596.9141845703125, 499.80682373046875, 1079.1917724609375, 518.5880126953125, -687.1594848632812, 214.36465454101562, 466.2607421875, 378.8013610839844, 522.8592529296875, 1276.7374267578125, -230.33226013183594, 525.0696411132812, 367.64398193359375, 932.5265502929688, 1148.7747802734375, 16.316856384277344, -93.62614440917969, -352.6831359863281, 637.0260009765625, 37.60650634765625, 21.332416534423828, 548.5721435546875, 76.28665161132812, -817.5323486328125, 357.12115478515625, 573.8143920898438, -195.9941864013672, -90.26956176757812, 14.652774810791016, -465.665771484375, 1073.5909423828125, 758.4441528320312, 1225.99560546875, 436.60382080078125, 1805.07275390625, 529.1204833984375, 445.70751953125, 24.365619659423828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000501.npy"} +{"epoch": 0.73568281938326, "step": 502, "batch_size": 64, "mean": 361.4590759277344, "std": 419.40155029296875, "min": -594.37158203125, "p10": -65.39839172363276, "median": 315.8251953125, "p90": 956.8492980957034, "max": 1847.9150390625, "pos_frac": 0.875, "sample": [636.5403442382812, -262.497314453125, 814.4312133789062, 485.5806579589844, 66.38018798828125, 602.93603515625, -86.67151641845703, 39.84423828125, 1217.7369384765625, 358.17431640625, 387.1773681640625, 289.7814636230469, 356.8509521484375, 1081.0489501953125, 908.0254516601562, 35.6041259765625, 4.7296142578125, 262.9692687988281, -594.37158203125, 104.0035400390625, 635.2517700195312, 992.51123046875, 110.98052215576172, 416.9625549316406, 35.2158203125, 172.9313201904297, 57.52754211425781, 317.35595703125, 444.40869140625, 371.3002014160156, -234.66038513183594, 1061.2122802734375, 85.42183685302734, 487.5244445800781, 463.1815185546875, 797.0645141601562, -119.79948425292969, 1847.9150390625, 27.181198120117188, 12.926897048950195, 729.8465576171875, 50.42991638183594, 249.61297607421875, 306.32373046875, 394.4516906738281, 584.9949340820312, 1171.0584716796875, 43.68415069580078, 661.1407470703125, 428.06072998046875, 758.8038330078125, 14.771224975585938, -15.761102676391602, 113.99099731445312, 977.7738037109375, -147.9405059814453, 289.66448974609375, 43.575347900390625, 726.66650390625, 322.59844970703125, -89.92252349853516, 329.07415771484375, 185.50086975097656, 314.29443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000502.npy"} +{"epoch": 0.737151248164464, "step": 503, "batch_size": 64, "mean": 332.10968017578125, "std": 437.67864990234375, "min": -333.416015625, "p10": -100.05692138671876, "median": 238.76656341552734, "p90": 774.6872253417969, "max": 1973.5906982421875, "pos_frac": 0.78125, "sample": [2.599895477294922, -63.693626403808594, 1010.5792846679688, 10.250656127929688, -190.49330139160156, -196.423828125, 459.4969482421875, 241.73890686035156, 115.37667083740234, -46.219993591308594, -333.416015625, 340.18780517578125, -100.6479263305664, 175.1581268310547, 987.4873657226562, 768.584228515625, 346.36993408203125, 718.48681640625, 81.5074462890625, 682.178955078125, -67.88182067871094, 102.47041320800781, 10.419967651367188, 85.07062530517578, 406.20660400390625, -85.15324401855469, -98.67790985107422, 561.3662109375, 235.79421997070312, 170.1732177734375, 469.48699951171875, 2.33685302734375, 110.86741638183594, 605.8089599609375, 20.735366821289062, -300.8048095703125, 1973.5906982421875, 1654.2520751953125, 571.2003173828125, 398.10394287109375, 117.94406127929688, 678.6526489257812, 725.1329956054688, 94.65624237060547, -74.771484375, 593.1986694335938, 167.3697967529297, 766.2085571289062, 204.13168334960938, 377.197998046875, 158.5247802734375, 517.28125, 438.77557373046875, 259.70721435546875, 962.9660034179688, 508.94207763671875, -125.7776870727539, 737.7381591796875, -193.13302612304688, 1107.1788330078125, 261.5309753417969, 420.15313720703125, 777.3027954101562, -60.367279052734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000503.npy"} +{"epoch": 0.7386196769456681, "step": 504, "batch_size": 64, "mean": 398.95245361328125, "std": 553.2864379882812, "min": -658.9725341796875, "p10": -324.59171447753897, "median": 326.43994140625, "p90": 1039.4508422851563, "max": 1695.787841796875, "pos_frac": 0.765625, "sample": [-133.38967895507812, 156.8521728515625, 917.0464477539062, 390.04351806640625, 116.84339904785156, 282.3158264160156, 666.7881469726562, -230.66168212890625, 1664.768310546875, 1330.5355224609375, 1695.787841796875, -618.0576782226562, 571.5191650390625, 685.0697021484375, -50.74192810058594, -85.50343322753906, 622.7373046875, 208.86135864257812, -14.629585266113281, 18.302215576171875, 56.35533905029297, 15.580955505371094, 295.66168212890625, -214.50108337402344, 238.7620086669922, 211.96559143066406, -515.023193359375, 493.685546875, 170.60867309570312, 436.94769287109375, -364.8474426269531, 1042.6826171875, -470.3276062011719, 1349.9207763671875, 872.9720458984375, 705.760986328125, 807.720458984375, 581.8873291015625, 250.86404418945312, 319.1790771484375, 179.29388427734375, 1273.617431640625, 729.8699951171875, 484.0852355957031, 954.2670288085938, -173.76278686523438, -473.3396301269531, 841.2090454101562, 38.86567687988281, 960.4532470703125, 193.97103881835938, 682.817138671875, 1031.9100341796875, 712.0785522460938, 347.309326171875, 328.871337890625, -455.60894775390625, 908.9237670898438, 1615.8896484375, 324.008544921875, -658.9725341796875, 508.44476318359375, 708.5075073242188, -10.093994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000504.npy"} +{"epoch": 0.7400881057268722, "step": 505, "batch_size": 64, "mean": 219.14529418945312, "std": 440.14556884765625, "min": -745.8865356445312, "p10": -233.44024353027342, "median": 173.38536071777344, "p90": 865.4058654785156, "max": 1418.120361328125, "pos_frac": 0.6875, "sample": [6.995811462402344, 537.7227172851562, -237.85223388671875, 565.3704833984375, -49.71705627441406, 902.3966674804688, 74.70643615722656, -40.98381042480469, 99.51141357421875, 31.342300415039062, 790.71923828125, 825.31396484375, 1418.120361328125, -15.0035400390625, 567.5537719726562, 549.6226806640625, -186.46839904785156, 351.24371337890625, 162.5849609375, -202.1100616455078, 285.8525085449219, 579.4262084960938, 202.111083984375, 85.09579467773438, -485.20745849609375, -638.2508544921875, 130.46263122558594, 255.72802734375, 909.6454467773438, 267.09954833984375, -48.19789505004883, 380.35760498046875, 429.1827392578125, -223.14559936523438, 214.68649291992188, 723.6898193359375, 30.522499084472656, 672.3191528320312, -81.83016204833984, 517.1406860351562, -670.6408081054688, -745.8865356445312, 185.9272918701172, 207.1968536376953, 12.38873291015625, 102.01226043701172, 938.2753295898438, 439.7900695800781, -198.79234313964844, 862.86328125, 171.2581024169922, -187.53558349609375, -368.99456787109375, 882.9779052734375, 175.5126190185547, -88.79045104980469, 866.4955444335938, -427.529296875, -140.0888671875, 49.14411926269531, 879.5833129882812, 231.5489501953125, 530.6763916015625, -39.849971771240234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000505.npy"} +{"epoch": 0.7415565345080763, "step": 506, "batch_size": 64, "mean": 361.67413330078125, "std": 496.19549560546875, "min": -773.3080444335938, "p10": -139.54901580810548, "median": 342.88665771484375, "p90": 1039.2372314453125, "max": 1939.2320556640625, "pos_frac": 0.734375, "sample": [-423.40679931640625, 36.58514404296875, 708.7373046875, -7.5093536376953125, 251.03504943847656, 336.2564697265625, -203.1980438232422, 598.9994506835938, 740.8015747070312, 36.798519134521484, 615.8346557617188, 266.235595703125, 300.70404052734375, 154.73072814941406, -703.4614868164062, 1040.594482421875, 464.5948486328125, -31.186264038085938, 302.6951904296875, 633.5006713867188, 450.3265380859375, 1351.60302734375, 375.1163635253906, 63.68981170654297, 1939.2320556640625, -138.6791534423828, -139.92181396484375, 463.7685546875, 654.588134765625, 287.154296875, 378.31695556640625, 1157.0477294921875, 423.13525390625, 105.80563354492188, 532.6569213867188, -624.6109008789062, 359.293701171875, 184.05938720703125, 1061.1973876953125, 860.54296875, 550.9302368164062, 647.060546875, -123.44313049316406, 438.00347900390625, 151.42930603027344, -773.3080444335938, 305.5876770019531, -25.978248596191406, 1036.0703125, 901.244140625, 1034.2508544921875, -4.385341644287109, 349.516845703125, 743.3534545898438, 722.1194458007812, 1081.8236083984375, -4.3278656005859375, 1048.6160888671875, 189.58335876464844, -25.82941436767578, -302.8936767578125, -13.343963623046875, 437.7492980957031, -80.34906768798828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000506.npy"} +{"epoch": 0.7430249632892805, "step": 507, "batch_size": 64, "mean": 314.0374755859375, "std": 605.41552734375, "min": -1088.2271728515625, "p10": -359.6368194580078, "median": 291.5054931640625, "p90": 844.6380249023439, "max": 2411.1865234375, "pos_frac": 0.71875, "sample": [3.734029769897461, 481.96649169921875, -49.94404602050781, -1088.2271728515625, 1194.0111083984375, -137.98199462890625, 60.76231384277344, 384.3133239746094, 39.84876251220703, 133.46136474609375, -76.48249816894531, 425.64483642578125, 12.155410766601562, 414.880859375, 656.222412109375, 721.031982421875, -458.7060546875, 323.7967529296875, 176.3345489501953, 2043.0081787109375, 492.90948486328125, 621.0928955078125, 35.07944869995117, -236.57174682617188, 450.66729736328125, 313.90008544921875, 652.04296875, 858.7783813476562, -378.3077087402344, 336.74896240234375, 348.517578125, -65.67257690429688, -316.0714111328125, -39.28392028808594, 32.054908752441406, 661.68701171875, 596.2840576171875, 1407.7882080078125, 655.0011596679688, 62.65984344482422, 271.2626953125, 19.925918579101562, -40.857635498046875, 377.7416076660156, 2411.1865234375, 939.4231567382812, 184.22454833984375, 298.26568603515625, -437.3600769042969, 729.2079467773438, 788.0701904296875, -550.5297241210938, -26.25646209716797, 674.8837890625, 1921.628662109375, 628.7052001953125, 811.6438598632812, 450.5632629394531, 284.74530029296875, -25.8160343170166, 30.934974670410156, -140.92697143554688, -418.9162902832031, -832.4874267578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000507.npy"} +{"epoch": 0.7444933920704846, "step": 508, "batch_size": 64, "mean": 266.6474609375, "std": 621.9771728515625, "min": -1508.0501708984375, "p10": -399.3205535888672, "median": 238.8148193359375, "p90": 943.4833129882816, "max": 2259.099609375, "pos_frac": 0.6875, "sample": [202.15109252929688, 169.4642333984375, -681.134765625, 736.7457885742188, 559.4208374023438, -54.82757568359375, 117.93014526367188, 1941.730712890625, 434.6089172363281, 651.8135375976562, 424.6153259277344, 1027.2276611328125, 275.4785461425781, 2259.099609375, 574.0980834960938, 301.36767578125, 197.05300903320312, -675.2545166015625, 1256.4197998046875, 873.283447265625, -523.9200439453125, -254.51011657714844, 330.25213623046875, -1080.0107421875, 973.5689697265625, -239.85726928710938, 88.8046875, -202.84133911132812, -110.34074401855469, 325.30450439453125, -48.47163391113281, 362.9059753417969, 48.33515930175781, -30.143047332763672, 1630.6214599609375, -1508.0501708984375, 92.5527572631836, 135.7863311767578, 176.20556640625, 763.6673583984375, 404.6020202636719, 126.07965850830078, 110.36312866210938, 457.290283203125, 566.0270385742188, 277.29425048828125, 516.6465454101562, 609.98046875, 743.8178100585938, 513.6071166992188, 658.2494506835938, -255.61447143554688, -257.94244384765625, 1000.2053833007812, 577.76708984375, -234.240478515625, -114.3516845703125, -46.18701171875, -434.32806396484375, 650.1898193359375, -404.4462890625, 446.87054443359375, 19.767614364624023, -387.3605041503906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000508.npy"} +{"epoch": 0.7459618208516887, "step": 509, "batch_size": 64, "mean": 292.65716552734375, "std": 459.6500244140625, "min": -647.9981079101562, "p10": -262.52355499267577, "median": 245.3778533935547, "p90": 938.4312622070313, "max": 1716.238037109375, "pos_frac": 0.78125, "sample": [-48.71448516845703, -77.01138305664062, -566.5128173828125, 897.300537109375, -131.53643798828125, 524.8711547851562, 69.78009796142578, 111.11048126220703, 504.5955810546875, 343.49090576171875, 660.6602172851562, 122.26605987548828, 290.45611572265625, 94.73776245117188, 194.58680725097656, 152.7113800048828, 934.6029663085938, -481.36602783203125, -428.1878662109375, 394.3762512207031, 1152.078125, 475.7182922363281, 151.16676330566406, 249.46966552734375, 420.47906494140625, 727.1764526367188, 156.53240966796875, 121.02645111083984, 301.3982849121094, 421.7455139160156, 560.0807495117188, -312.4195251464844, 91.21818542480469, -199.5973663330078, 293.2058410644531, 940.0719604492188, 761.6361694335938, 60.92927551269531, 209.06362915039062, -229.5585174560547, -44.340576171875, 444.98992919921875, 306.3380126953125, 124.28659057617188, 47.55464172363281, -647.9981079101562, 1081.7998046875, 1276.9649658203125, -92.66957092285156, 1716.238037109375, -276.65142822265625, 178.11981201171875, -363.4795837402344, 34.326507568359375, 267.3929138183594, 354.91455078125, 96.08708953857422, 268.6726379394531, 241.28604125976562, 495.14837646484375, 694.0463256835938, 1172.415771484375, 428.8956298828125, 1012.0813598632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000509.npy"} +{"epoch": 0.7474302496328928, "step": 510, "batch_size": 64, "mean": 386.8134765625, "std": 424.6536560058594, "min": -544.45947265625, "p10": -156.32365722656246, "median": 391.5742950439453, "p90": 861.7651977539062, "max": 1379.748291015625, "pos_frac": 0.78125, "sample": [258.3844909667969, 875.6951904296875, 1208.8424072265625, 441.2361145019531, -172.29629516601562, 867.431396484375, 751.7836303710938, 346.46075439453125, 85.53605651855469, 758.8546142578125, 401.6313171386719, 563.7468872070312, -262.5110778808594, 538.29150390625, 1111.7940673828125, 828.6639404296875, 678.181396484375, -544.45947265625, -56.76781463623047, 840.028076171875, 136.09347534179688, 381.51727294921875, 425.02099609375, 177.99948120117188, 280.0002746582031, 682.79931640625, 589.5237426757812, 848.5440673828125, 1098.255859375, -264.14361572265625, 819.2093505859375, 1379.748291015625, -119.05416870117188, 138.671142578125, 674.0662841796875, 145.95352172851562, 310.7001647949219, 1247.68603515625, -302.06976318359375, 327.72637939453125, 263.32891845703125, 112.2160415649414, 685.9512939453125, 276.839599609375, 487.03857421875, -52.808753967285156, 516.6451416015625, 412.57415771484375, 37.48118591308594, 405.56683349609375, 723.5466918945312, -49.98949432373047, 126.525390625, 721.9248657226562, -69.14649963378906, 519.3426513671875, -338.8794250488281, 829.52490234375, -58.41216278076172, -187.35723876953125, -22.88075828552246, 680.2276611328125, 182.56478881835938, 55.461971282958984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000510.npy"} +{"epoch": 0.748898678414097, "step": 511, "batch_size": 64, "mean": 305.0107421875, "std": 559.1774291992188, "min": -1448.028564453125, "p10": -186.04444274902332, "median": 203.71514892578125, "p90": 967.8306579589847, "max": 1716.729248046875, "pos_frac": 0.765625, "sample": [-60.30443572998047, 1261.4547119140625, 198.45962524414062, 178.2852325439453, 376.21002197265625, 94.09629821777344, 1002.9166870117188, 1469.3961181640625, 514.4116821289062, 164.50924682617188, 36.62012481689453, 640.2913208007812, 654.536376953125, 664.774658203125, 93.26271057128906, 1127.029052734375, -1448.028564453125, 439.5709228515625, -368.4600830078125, 663.3948364257812, 1406.587158203125, 772.8029174804688, 74.16651916503906, 436.50634765625, 236.90277099609375, 19.81928253173828, -709.9124755859375, 373.566650390625, -45.605682373046875, 627.9255981445312, 435.54388427734375, -5.2286834716796875, -17.482364654541016, 296.61065673828125, 322.20037841796875, 148.4530792236328, 885.9632568359375, -245.37930297851562, 5.296661376953125, 183.59561157226562, 801.63818359375, 177.32351684570312, 26.157733917236328, -358.6958923339844, -56.02129364013672, 725.6742553710938, 758.783203125, 71.6383056640625, -1.5869731903076172, 97.73756408691406, -239.93301391601562, 455.2762756347656, 497.810546875, -60.157630920410156, 208.97067260742188, 449.56353759765625, 232.74716186523438, -12.852165222167969, 1716.729248046875, 92.57534790039062, 447.8565368652344, -1205.197998046875, 131.920654296875, 1657.9722900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000511.npy"} +{"epoch": 0.750367107195301, "step": 512, "batch_size": 64, "mean": 414.32196044921875, "std": 422.2722473144531, "min": -374.234130859375, "p10": -77.06082000732417, "median": 372.9373474121094, "p90": 991.2586303710939, "max": 1760.5247802734375, "pos_frac": 0.84375, "sample": [1112.9320068359375, 961.63671875, 705.0360717773438, -314.98907470703125, -150.46311950683594, 50.367156982421875, 1760.5247802734375, 246.23211669921875, 67.37599182128906, -20.66922950744629, 119.19911193847656, 777.831787109375, 4.001031875610352, 427.68670654296875, -200.93783569335938, 44.464229583740234, 490.41943359375, 492.2033386230469, 307.12139892578125, -2.220792770385742, -193.499267578125, 682.1996459960938, 137.08489990234375, 526.1234130859375, 1175.5701904296875, 783.6392822265625, 249.1050262451172, 413.8323974609375, 100.43199920654297, 756.9940185546875, 345.10101318359375, 315.2676086425781, 514.1777954101562, 480.42523193359375, 220.7421112060547, 237.88035583496094, 1097.2640380859375, 292.88580322265625, -101.22864532470703, 868.65576171875, 150.15658569335938, 1054.85498046875, 413.0345458984375, 1003.9537353515625, 35.622955322265625, -374.234130859375, 407.5185546875, 17.348773956298828, 281.43157958984375, 435.0244140625, 1069.3311767578125, -17.61841583251953, 287.9001159667969, 663.6416625976562, 260.3713073730469, 526.861083984375, 703.6008911132812, 224.89163208007812, 400.773681640625, 831.253662109375, -201.0638885498047, 927.117919921875, 877.3228149414062, 757.105712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000512.npy"} +{"epoch": 0.7518355359765051, "step": 513, "batch_size": 64, "mean": 301.99615478515625, "std": 487.93682861328125, "min": -690.7075805664062, "p10": -213.18499298095702, "median": 227.2471160888672, "p90": 1077.456787109375, "max": 1343.076171875, "pos_frac": 0.765625, "sample": [714.205322265625, 350.6400146484375, 281.08538818359375, 145.88919067382812, 255.11648559570312, 192.2020721435547, 295.1914978027344, 200.32028198242188, -193.87242126464844, 1037.6324462890625, 629.6205444335938, -125.73870849609375, 693.1864013671875, 37.89690399169922, 64.0167465209961, 737.7167358398438, 29.821775436401367, 55.83551025390625, 191.3198699951172, 365.475830078125, 947.2548828125, 1178.21044921875, 1253.970947265625, 254.1739501953125, 615.2218627929688, 30.27341079711914, 343.29815673828125, 187.63409423828125, 325.203857421875, -199.52069091796875, 49.518218994140625, 1217.86376953125, 539.4293823242188, 399.1265869140625, 262.59722900390625, -56.450775146484375, -31.485397338867188, -589.7417602539062, 264.37396240234375, 1343.076171875, -28.831008911132812, 152.22091674804688, 58.14739990234375, -690.7075805664062, 608.6564331054688, 7.634521484375, -453.6287536621094, -22.505685806274414, 1156.0411376953125, 940.875, -649.8278198242188, 9.344741821289062, 1079.216796875, 1248.920654296875, 302.4990234375, 197.89193725585938, -345.5802001953125, 595.0254516601562, -86.0922622680664, -219.04112243652344, -384.583251953125, 1073.35009765625, 140.0267333984375, 347.1097717285156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000513.npy"} +{"epoch": 0.7533039647577092, "step": 514, "batch_size": 64, "mean": 303.3856506347656, "std": 505.4642639160156, "min": -785.5114135742188, "p10": -201.4287399291992, "median": 183.4027099609375, "p90": 1007.5641296386722, "max": 1815.375732421875, "pos_frac": 0.6875, "sample": [-521.7512817382812, -184.31991577148438, 98.42671203613281, -68.80017852783203, 1092.1318359375, 186.70932006835938, 604.8616943359375, 568.4607543945312, 1815.375732421875, 928.1642456054688, 153.9138946533203, 671.0927734375, -157.99932861328125, 162.27911376953125, 386.10772705078125, -73.11759948730469, -309.4903869628906, 159.551513671875, 1159.5908203125, 1107.450927734375, -208.76109313964844, -62.66596221923828, 385.2278747558594, -111.52687072753906, 37.38050079345703, 353.5763854980469, 360.926513671875, 161.937744140625, 537.5440063476562, -97.33521270751953, 883.6828002929688, 556.7454223632812, -155.57037353515625, -14.14373779296875, 180.09609985351562, 325.98406982421875, 1234.1033935546875, -149.09414672851562, -23.06726837158203, -222.72988891601562, 317.21710205078125, -508.8048095703125, -124.00983428955078, 496.00067138671875, 209.60348510742188, 128.85569763183594, -785.5114135742188, -174.5963897705078, -275.8328857421875, 809.769775390625, 228.49765014648438, 489.1942138671875, 467.2296142578125, 653.0538330078125, 1041.5926513671875, 150.14059448242188, 300.41912841796875, 665.030029296875, 726.9679565429688, 139.9995574951172, 802.6925048828125, 45.73857498168945, 134.07528686523438, 1728.409423828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000514.npy"} +{"epoch": 0.7547723935389133, "step": 515, "batch_size": 64, "mean": 224.7902374267578, "std": 448.4010925292969, "min": -1588.1658935546875, "p10": -216.68487701416015, "median": 209.55753326416016, "p90": 741.2733642578128, "max": 1351.3326416015625, "pos_frac": 0.75, "sample": [-103.08500671386719, -98.69026184082031, 573.3716430664062, 838.9771728515625, 604.408935546875, -1588.1658935546875, 495.2304992675781, 408.42559814453125, 128.30938720703125, -554.9630737304688, -352.73577880859375, 34.99385070800781, -15.248931884765625, 170.02972412109375, 310.9964599609375, 289.656005859375, 185.45799255371094, 515.211181640625, -407.1138916015625, 37.28045654296875, 368.0500793457031, -202.07997131347656, 165.94798278808594, 349.502685546875, 186.2164764404297, 145.27403259277344, 681.9487915039062, -186.42657470703125, 517.7606811523438, 43.508934020996094, 766.6981811523438, 31.856882095336914, -222.94412231445312, 591.6307373046875, 271.57550048828125, 347.80712890625, 583.8185424804688, 12.95635986328125, 1223.8037109375, -167.39199829101562, 157.71768188476562, 1080.4732666015625, -368.37799072265625, -6.248374938964844, -369.1723937988281, 234.78875732421875, 106.96903228759766, 773.2115478515625, -18.656841278076172, 71.53326416015625, 22.989280700683594, 1064.4893798828125, 232.89859008789062, 123.60436248779297, 340.3938903808594, 580.04541015625, 466.6069641113281, -116.70761108398438, 310.73321533203125, 377.35662841796875, 337.80767822265625, 258.7668151855469, 1351.3326416015625, 392.15936279296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000515.npy"} +{"epoch": 0.7562408223201175, "step": 516, "batch_size": 64, "mean": 342.5379943847656, "std": 421.19232177734375, "min": -673.2535400390625, "p10": -191.70615539550775, "median": 327.9738464355469, "p90": 868.5500305175783, "max": 1198.033935546875, "pos_frac": 0.765625, "sample": [236.41366577148438, 239.80618286132812, 791.572265625, -113.97467803955078, 553.5574340820312, 751.9182739257812, 454.0545959472656, -126.99928283691406, 262.16552734375, 440.8201904296875, 1198.033935546875, 504.0527648925781, 522.2657470703125, 477.182861328125, 153.8649139404297, -135.36502075195312, 571.5787963867188, -32.51203918457031, 111.736572265625, 1188.62646484375, -673.2535400390625, 99.17779541015625, -215.85235595703125, -411.6431579589844, 959.7613525390625, 496.923095703125, -502.6408386230469, 885.8800659179688, 1139.260986328125, 170.77284240722656, 266.4472961425781, 788.6898193359375, 1073.16455078125, 226.98394775390625, 578.2844848632812, 618.7950439453125, 309.47625732421875, -4.069950103759766, 444.97503662109375, 269.2442321777344, 152.77037048339844, -59.5338134765625, 828.11328125, 285.0483703613281, 141.48220825195312, 1095.650390625, 156.42994689941406, 157.11590576171875, -17.431434631347656, 463.57177734375, 433.04345703125, 485.87017822265625, 90.43441772460938, 570.7283935546875, 696.4788208007812, -238.78964233398438, -255.95668029785156, -426.8656005859375, 495.33709716796875, 671.0997314453125, 523.5833740234375, 811.005859375, 346.471435546875, -52.40077590942383], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000516.npy"} +{"epoch": 0.7577092511013216, "step": 517, "batch_size": 64, "mean": 394.68890380859375, "std": 537.0808715820312, "min": -668.8670043945312, "p10": -176.79386901855466, "median": 356.9472198486328, "p90": 866.9944885253907, "max": 2343.83349609375, "pos_frac": 0.78125, "sample": [573.8851318359375, 745.8698120117188, 108.69192504882812, 378.31854248046875, 843.3380737304688, 206.54580688476562, -464.18780517578125, 847.3283081054688, 502.68572998046875, -217.5865936279297, 551.3358154296875, 1946.8824462890625, 1302.26025390625, 377.83599853515625, 556.7052001953125, -103.36913299560547, 278.7374572753906, 508.86907958984375, 66.44353485107422, 747.031005859375, 1374.466796875, 632.413330078125, 26.10001564025879, 227.29849243164062, -668.8670043945312, -285.82318115234375, 119.27950286865234, 383.4380798339844, -409.1744079589844, 2343.83349609375, 328.20819091796875, -30.602981567382812, -161.50636291503906, 875.4228515625, 109.692626953125, 611.1376953125, -469.6524658203125, 426.0242614746094, -67.298828125, 340.8360290527344, 753.832275390625, -114.2419662475586, 314.5352478027344, 297.3775329589844, 615.7964477539062, 535.5121459960938, 743.8638305664062, 375.0122375488281, 25.45355987548828, 1372.0423583984375, 128.61166381835938, 1128.531982421875, 373.05841064453125, 810.9664916992188, 94.492919921875, 749.517333984375, -183.3456573486328, 225.40078735351562, 217.3644561767578, 434.490478515625, -149.64422607421875, 820.1921997070312, 250.52865600585938, -22.104286193847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000517.npy"} +{"epoch": 0.7591776798825257, "step": 518, "batch_size": 64, "mean": 324.5028076171875, "std": 416.84844970703125, "min": -832.964111328125, "p10": -164.0851364135742, "median": 277.9253845214844, "p90": 881.2076354980469, "max": 1213.65234375, "pos_frac": 0.796875, "sample": [103.21786499023438, 1059.395263671875, 507.3958740234375, 375.23114013671875, 225.1790008544922, 277.86334228515625, 492.1708984375, 587.7155151367188, 53.1950569152832, 238.94564819335938, 865.6121215820312, 654.7269287109375, 146.3148193359375, -247.31480407714844, 496.1772155761719, 249.8029022216797, 357.31207275390625, 95.13593292236328, 57.435882568359375, 493.6397705078125, 877.3634033203125, -35.729774475097656, -262.2424621582031, 138.86416625976562, -4.406009674072266, 318.0357360839844, 777.102783203125, 104.88677978515625, 685.077392578125, 911.98193359375, 843.5164794921875, 25.288253784179688, 1196.5074462890625, 1213.65234375, 766.1153564453125, 708.5, 147.8140869140625, -107.84115600585938, 481.3829650878906, 628.8104248046875, 277.9874267578125, -104.62249755859375, 480.1250305175781, -174.00137329101562, 437.83819580078125, -407.0811767578125, -368.14825439453125, -832.964111328125, 333.5197448730469, 422.3194274902344, 36.19743347167969, 209.236572265625, -140.94725036621094, 184.67715454101562, -342.0260009765625, 1038.95849609375, 962.2861938476562, 311.8324279785156, -104.86276245117188, 882.8551635742188, 272.8556823730469, 129.92225646972656, 528.7442626953125, 229.6444854736328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000518.npy"} +{"epoch": 0.7606461086637298, "step": 519, "batch_size": 64, "mean": 238.39837646484375, "std": 431.2276306152344, "min": -906.4351806640625, "p10": -291.03977966308594, "median": 249.06765747070312, "p90": 813.7243408203127, "max": 1212.6063232421875, "pos_frac": 0.671875, "sample": [-40.972747802734375, 559.9146728515625, 365.4709777832031, -365.7731018066406, 1021.1005249023438, -279.8383483886719, 98.36698913574219, 476.2901611328125, 1025.84326171875, 584.0471801757812, 101.58116149902344, 581.3392333984375, -189.7376708984375, 833.3198852539062, 373.3324279785156, -800.4478149414062, 1175.49072265625, 583.6439208984375, 82.34691619873047, -12.31947135925293, 290.941162109375, -108.56239318847656, 49.671478271484375, 336.3828125, 522.486083984375, 131.66909790039062, 274.70599365234375, 529.1383056640625, 499.6285400390625, 43.258567810058594, -29.96588134765625, 669.017333984375, -131.07791137695312, -100.255126953125, 336.7912902832031, 83.66138458251953, 524.6340942382812, -2.7158279418945312, -316.8563232421875, -16.989700317382812, 334.6925048828125, -253.97848510742188, 945.8272094726562, 419.025390625, -906.4351806640625, 551.215087890625, 1212.6063232421875, 40.92723846435547, -59.69122314453125, -16.558792114257812, 176.418212890625, 871.7313232421875, -313.5516662597656, 592.2330322265625, -295.84039306640625, 223.4293212890625, 768.0014038085938, 17.56417465209961, 382.9076232910156, 543.3019409179688, 276.5872802734375, -28.353404998779297, 325.1293640136719, -308.2544860839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000519.npy"} +{"epoch": 0.762114537444934, "step": 520, "batch_size": 64, "mean": 300.2342529296875, "std": 470.6655578613281, "min": -1022.561279296875, "p10": -164.7825942993164, "median": 207.77989959716797, "p90": 919.8718017578125, "max": 1997.6778564453125, "pos_frac": 0.75, "sample": [-90.4458999633789, -226.4208984375, 1002.378173828125, 148.16465759277344, 432.9857482910156, 154.17129516601562, 358.2278747558594, -76.58587646484375, -165.50042724609375, 189.43426513671875, 17.087051391601562, -55.200775146484375, 644.87548828125, -66.83979797363281, -42.610389709472656, 514.6027221679688, 1165.253173828125, 1997.6778564453125, -210.44442749023438, 353.3175354003906, 1095.111572265625, 1264.100341796875, -401.6649169921875, 169.798828125, -43.806182861328125, 709.6419067382812, 126.2660140991211, 603.2789306640625, 473.84906005859375, 317.4398193359375, 255.46951293945312, 249.04664611816406, 1263.30224609375, 109.33351135253906, 57.742279052734375, 466.1555480957031, 528.013916015625, 434.7480773925781, 718.5999755859375, -1022.561279296875, 477.7033386230469, 152.69703674316406, 199.4171905517578, 305.7647705078125, 50.523162841796875, 907.1442260742188, 48.61748504638672, 914.14892578125, 449.61163330078125, 90.94818115234375, 198.83348083496094, -250.38693237304688, -229.68038940429688, 292.1588134765625, 216.14260864257812, 533.240478515625, -119.76983642578125, 922.324462890625, -163.10765075683594, 488.8022766113281, 268.764404296875, -96.81639099121094, 6.441497802734375, 133.4771728515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000520.npy"} +{"epoch": 0.7635829662261381, "step": 521, "batch_size": 64, "mean": 327.1440734863281, "std": 436.8957214355469, "min": -754.3360595703125, "p10": -105.43234176635741, "median": 248.70413970947266, "p90": 959.8477844238283, "max": 1605.49169921875, "pos_frac": 0.78125, "sample": [457.7850341796875, 750.917724609375, 239.07395935058594, 26.53948974609375, -37.143226623535156, -754.3360595703125, -155.46322631835938, -19.2353515625, -84.2591552734375, 520.3048095703125, 47.64897155761719, -176.44198608398438, 5.865848541259766, 481.80865478515625, 442.3562927246094, 1037.2593994140625, 90.35247802734375, 12.56955337524414, -252.02662658691406, 915.3209228515625, 195.7595977783203, -71.08177185058594, -90.87645721435547, 235.89674377441406, 517.016845703125, 611.4857788085938, 561.127685546875, 258.3343200683594, 110.1486587524414, 17.48175048828125, 377.954833984375, 557.3749389648438, -41.09400177001953, 997.59765625, 183.80514526367188, 598.037109375, 521.7479248046875, 176.79336547851562, 531.6536865234375, -117.95439910888672, 309.9302062988281, 350.4217529296875, 295.682373046875, 68.61112976074219, 227.67864990234375, 1091.94970703125, -111.67057800292969, 1079.482177734375, -16.761322021484375, -553.39599609375, 563.95703125, 383.33392333984375, 657.9982299804688, 28.456771850585938, 679.6282348632812, 978.9307250976562, 170.0455780029297, 1443.336181640625, 26.110862731933594, 1605.49169921875, 446.2208557128906, 64.48924255371094, 702.4774169921875, 764.708984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000521.npy"} +{"epoch": 0.7650513950073421, "step": 522, "batch_size": 64, "mean": 291.8514709472656, "std": 459.6984558105469, "min": -775.175048828125, "p10": -146.8851776123047, "median": 215.33358001708984, "p90": 1036.589538574219, "max": 1344.6593017578125, "pos_frac": 0.8125, "sample": [219.20481872558594, 668.6227416992188, 839.7688598632812, -9.775779724121094, 1054.065673828125, 139.724853515625, 1344.6593017578125, 251.12985229492188, 60.34444808959961, 13.096019744873047, -141.09225463867188, 367.6535339355469, 750.882568359375, 42.40069580078125, 453.6529846191406, -64.42880249023438, 289.669677734375, -88.38609313964844, 385.00567626953125, 211.46234130859375, 617.8331909179688, 588.9210815429688, 133.67572021484375, 282.02569580078125, -639.749267578125, 1094.5858154296875, 336.6429443359375, 149.4221649169922, 488.47625732421875, 578.4570922851562, 328.66259765625, 152.74021911621094, 98.32893371582031, 39.163299560546875, 236.12384033203125, -775.175048828125, 533.3579711914062, 124.32476043701172, 176.84344482421875, -374.92071533203125, -149.36785888671875, 396.8583984375, 995.8118896484375, 114.99970245361328, -426.791015625, 517.2696533203125, 560.6229248046875, 202.6610107421875, -686.2192993164062, 1077.6871337890625, 1073.9827880859375, 26.600128173828125, 1243.075439453125, 94.6010513305664, 135.50584411621094, -490.56390380859375, 46.361671447753906, -77.8078842163086, 607.2328491210938, 234.43136596679688, 939.2957153320312, 81.36798095703125, 1105.2852783203125, 98.19117736816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000522.npy"} +{"epoch": 0.7665198237885462, "step": 523, "batch_size": 64, "mean": 316.2938537597656, "std": 531.8327026367188, "min": -946.4376831054688, "p10": -290.5767883300781, "median": 286.3076477050781, "p90": 1010.0177124023438, "max": 1580.076416015625, "pos_frac": 0.703125, "sample": [-157.5538787841797, 1145.6951904296875, 105.43049621582031, 291.48834228515625, 378.5599365234375, -293.03155517578125, 382.42010498046875, 528.5198974609375, 863.78369140625, 337.33685302734375, -744.5036010742188, 694.2874145507812, 3.7416858673095703, -11.864105224609375, -387.5995178222656, 65.71946716308594, 259.0289306640625, 808.08984375, -509.1053771972656, 213.36981201171875, 495.56170654296875, 998.8731689453125, -116.64312744140625, 4.861345291137695, 485.69769287109375, 119.45237731933594, 213.82102966308594, 100.70010375976562, -49.459999084472656, 1580.076416015625, 496.2807312011719, 117.20957946777344, -77.74276733398438, -946.4376831054688, 1014.7939453125, 805.4105834960938, -438.5771179199219, -28.80732536315918, 489.84515380859375, -95.48103332519531, 635.6173095703125, -65.42720031738281, 1575.5587158203125, 361.1246032714844, 677.6920776367188, -17.934173583984375, 372.1143493652344, 281.126953125, 1557.213623046875, 392.6451110839844, 590.914306640625, 154.08004760742188, 531.9337768554688, 130.52978515625, 714.6420288085938, 1174.8819580078125, 808.054931640625, -83.99839782714844, 625.0482177734375, 1105.877197265625, 583.8084716796875, -676.2114868164062, -284.8489990234375, -44.88558578491211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000523.npy"} +{"epoch": 0.7679882525697503, "step": 524, "batch_size": 64, "mean": 331.36962890625, "std": 487.57525634765625, "min": -1446.7333984375, "p10": -83.89690322875975, "median": 402.09141540527344, "p90": 860.809851074219, "max": 1724.6036376953125, "pos_frac": 0.75, "sample": [57.31108093261719, 963.0281982421875, 415.267578125, 464.79925537109375, 365.0636291503906, 296.169921875, 361.0087890625, 11.202522277832031, 532.693115234375, 287.44354248046875, 448.15087890625, 104.75497436523438, 418.3742370605469, 520.8283081054688, 676.9427490234375, 408.1316833496094, -11.138702392578125, 879.7081298828125, 290.1712646484375, 570.7625122070312, -1446.7333984375, 415.340576171875, 512.2404174804688, 1724.6036376953125, -18.247650146484375, -38.712860107421875, 739.5152587890625, 370.1566467285156, 504.1092529296875, 9.364559173583984, -23.79513931274414, 315.0504150390625, 486.25885009765625, -184.64891052246094, 687.5615234375, 245.79013061523438, 1172.4718017578125, 1403.59765625, 646.5862426757812, 442.19915771484375, -31.251426696777344, -826.4942626953125, 21.85688018798828, -95.85216522216797, 663.2813110351562, -87.60528564453125, 366.075927734375, 6.428047180175781, 469.619384765625, 437.0698547363281, 1030.325439453125, 986.4872436523438, 565.4285888671875, -789.41845703125, -75.24401092529297, -13.044723510742188, 816.7138671875, 642.0269775390625, -54.765342712402344, 399.9449768066406, 573.949951171875, 404.23785400390625, -154.18426513671875, -71.31265258789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000524.npy"} +{"epoch": 0.7694566813509545, "step": 525, "batch_size": 64, "mean": 422.78076171875, "std": 508.2586975097656, "min": -785.1712036132812, "p10": -112.19880065917968, "median": 316.63587951660156, "p90": 1262.7388183593757, "max": 1599.6221923828125, "pos_frac": 0.796875, "sample": [317.28924560546875, 1042.031982421875, 337.5355224609375, 87.3991928100586, -13.906744003295898, 429.90185546875, 1349.4232177734375, 756.2666015625, 381.1684265136719, -153.41677856445312, 1373.159423828125, -278.906494140625, 820.9473266601562, 243.71397399902344, 241.13369750976562, -142.6362762451172, 167.73841857910156, 325.5757751464844, 889.5460205078125, 687.63134765625, 606.8651733398438, 112.67190551757812, -114.43524169921875, 184.45071411132812, -133.11212158203125, 332.1822509765625, 497.356689453125, 524.6246337890625, 378.3186340332031, 1008.8670654296875, 1037.3690185546875, 67.3582992553711, 1458.33740234375, -14.914924621582031, 1599.6221923828125, 312.55572509765625, 126.69408416748047, 248.97323608398438, 365.771240234375, -106.98043823242188, 1519.2490234375, -785.1712036132812, -49.65093994140625, -7.81794548034668, 231.84060668945312, 253.66635131835938, 479.2501525878906, 222.81430053710938, 553.3566284179688, 173.79736328125, 748.8978271484375, 1360.858154296875, -67.66830444335938, 29.89971923828125, 706.6762084960938, 244.7948760986328, 315.9825134277344, 135.390380859375, -463.30621337890625, 1060.4752197265625, 237.65939331054688, 1527.305419921875, 394.2328796386719, 881.2646484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000525.npy"} +{"epoch": 0.7709251101321586, "step": 526, "batch_size": 64, "mean": 413.08404541015625, "std": 511.3519287109375, "min": -800.5677490234375, "p10": -193.2379959106445, "median": 452.4098205566406, "p90": 1095.0435180664062, "max": 1415.6788330078125, "pos_frac": 0.765625, "sample": [1103.36865234375, 711.8162841796875, 103.85371398925781, 182.10107421875, 327.9171142578125, 291.68218994140625, -147.2572784423828, -23.06220054626465, 488.225341796875, -654.21533203125, 592.7713012695312, 664.73828125, 790.7596435546875, 240.03921508789062, 1249.9571533203125, 981.2232666015625, -108.81167602539062, 689.9887084960938, 299.4004211425781, 583.7362670898438, 344.24151611328125, 513.4544677734375, 243.43739318847656, 1069.53955078125, 492.44866943359375, 560.4257202148438, 1415.6788330078125, 60.37904357910156, 454.91717529296875, -200.3927459716797, -214.6545867919922, 631.061767578125, -380.8511657714844, 1303.7220458984375, -176.5435791015625, 290.9914245605469, 994.0946044921875, 819.2491455078125, 186.54989624023438, -800.5677490234375, 809.6973266601562, 583.2734375, 449.9024658203125, 1098.111328125, -431.3633728027344, 715.7691650390625, 669.3316650390625, 1090.1673583984375, -79.38402557373047, 511.54095458984375, -83.208984375, 259.78460693359375, 317.4760437011719, 1097.13330078125, 973.0599365234375, -19.84561538696289, 1121.154296875, 668.0303344726562, 417.31011962890625, 8.485366821289062, -149.7427978515625, 74.38007354736328, -726.8246459960938, 1087.7252197265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000526.npy"} +{"epoch": 0.7723935389133627, "step": 527, "batch_size": 64, "mean": 282.3432312011719, "std": 415.23419189453125, "min": -569.6371459960938, "p10": -189.05686492919918, "median": 243.23734283447266, "p90": 760.3781921386719, "max": 1186.994873046875, "pos_frac": 0.765625, "sample": [-553.1806640625, 536.8192138671875, 387.9982604980469, 349.3760070800781, 651.2440185546875, 167.33029174804688, 1186.994873046875, -122.45549011230469, -129.09927368164062, 763.7156372070312, -137.98143005371094, -155.76455688476562, 633.1315307617188, 279.6668395996094, -569.6371459960938, -532.6928100585938, 567.9945678710938, -52.23822784423828, 587.5016479492188, 311.37591552734375, -401.02001953125, 984.2718505859375, 151.1839599609375, 27.161556243896484, 150.83114624023438, 718.6439819335938, -18.904922485351562, 55.86518096923828, 949.1351318359375, 666.8314208984375, 553.42626953125, 1028.031494140625, 419.4903564453125, 663.4173583984375, 1145.6988525390625, 999.196044921875, 752.5908203125, -45.157508850097656, 85.93791198730469, 238.01535034179688, 645.4901123046875, 195.440185546875, 296.03631591796875, 248.45933532714844, -128.95248413085938, 56.60498046875, 265.302978515625, 366.9440002441406, 134.46502685546875, -495.22467041015625, 289.5273742675781, 170.07675170898438, 669.3757934570312, 216.0518798828125, 18.065704345703125, 75.40049743652344, 685.3844604492188, 549.091552734375, 512.4608154296875, 223.7331085205078, 190.7697296142578, -249.611083984375, 43.6531982421875, -203.3249969482422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000527.npy"} +{"epoch": 0.7738619676945668, "step": 528, "batch_size": 64, "mean": 358.5581970214844, "std": 562.3563232421875, "min": -600.9801025390625, "p10": -347.31591796875, "median": 294.6216735839844, "p90": 1174.8984619140629, "max": 1857.431884765625, "pos_frac": 0.71875, "sample": [-3.6351699829101562, 1247.659912109375, 303.29571533203125, 359.9427795410156, 176.25482177734375, -440.364501953125, -38.27075958251953, 703.2715454101562, 625.2940673828125, 1235.9202880859375, 350.1016845703125, -125.11875915527344, 415.9482421875, -469.435302734375, 113.1301040649414, 441.36419677734375, 301.91729736328125, -124.44921875, 1068.173583984375, 1857.431884765625, -70.4173355102539, 968.350830078125, 425.18231201171875, 702.1782836914062, 504.6599426269531, 922.2241821289062, -552.356201171875, 1297.00634765625, -251.40512084960938, 1034.7890625, -530.2913208007812, 287.3260498046875, 413.2421875, 243.34298706054688, 326.779541015625, 172.66929626464844, 551.6635131835938, 192.35902404785156, 506.1371765136719, 113.5125503540039, -169.6992645263672, 892.8654174804688, -150.40188598632812, -600.9801025390625, 1430.26904296875, 268.7784729003906, 604.6187744140625, 1727.494140625, 127.60835266113281, 43.58074951171875, -544.21337890625, -358.8065185546875, 724.8447265625, 1220.6376953125, 114.42623901367188, 866.8724975585938, 574.464599609375, 172.97348022460938, -320.5045166015625, -15.277740478515625, 85.88037109375, -119.01728820800781, 1017.1864624023438, 98.7397689819336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000528.npy"} +{"epoch": 0.775330396475771, "step": 529, "batch_size": 64, "mean": 244.05267333984375, "std": 540.3975219726562, "min": -1316.924560546875, "p10": -415.1270416259766, "median": 255.9566192626953, "p90": 875.0877807617189, "max": 1493.1700439453125, "pos_frac": 0.65625, "sample": [260.0087890625, -197.96275329589844, 1493.1700439453125, -55.00750732421875, 150.0079803466797, 521.1974487304688, 493.9058532714844, 748.22265625, 581.548095703125, 1438.3465576171875, -288.8548278808594, 504.37823486328125, 134.59860229492188, -399.5232238769531, 494.3603210449219, -117.02198028564453, 138.01565551757812, 55.925071716308594, 693.8150024414062, -76.7000503540039, -348.5086975097656, 324.42083740234375, 1111.517822265625, 782.9811401367188, 952.5321655273438, -644.5049438476562, -35.94178009033203, 375.1580505371094, -482.888916015625, 631.6480712890625, 251.90444946289062, 492.1279602050781, -129.72592163085938, 370.4105224609375, -43.261077880859375, 49.22004699707031, 732.5025634765625, 147.32115173339844, 569.5712280273438, 698.3722534179688, -1316.924560546875, 126.06340789794922, 982.7916259765625, -498.19189453125, 576.5751342773438, -894.2567749023438, 136.89437866210938, 542.9136352539062, 895.3790893554688, -10.813072204589844, -679.1033325195312, 442.5999755859375, -62.864845275878906, 572.8621826171875, -421.81439208984375, 454.0222473144531, 226.12356567382812, 827.7413940429688, 497.56787109375, 1221.2393798828125, -328.1725158691406, -86.38652038574219, -290.7207946777344, 328.5588073730469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000529.npy"} +{"epoch": 0.7767988252569751, "step": 530, "batch_size": 64, "mean": 382.5972900390625, "std": 645.4879760742188, "min": -2000.821044921875, "p10": -184.49969024658202, "median": 269.96055603027344, "p90": 1287.190075683594, "max": 1986.138671875, "pos_frac": 0.734375, "sample": [285.5620422363281, -348.8468933105469, -12.7275390625, 154.88980102539062, 268.3919982910156, -161.66903686523438, 1696.1951904296875, 614.2518310546875, 305.5815124511719, -570.1636962890625, 190.85365295410156, -54.39049530029297, -4.65362548828125, -93.12625122070312, 703.744873046875, 707.7337646484375, -17.102766036987305, 60.62945556640625, -342.10906982421875, 1181.8531494140625, 1621.0816650390625, 500.9061584472656, 950.6145629882812, 691.5626831054688, -73.81605529785156, 811.3409423828125, -107.105224609375, 219.19992065429688, -194.2842559814453, 651.7120361328125, -2.3673324584960938, 1589.5802001953125, 1171.9334716796875, 495.59246826171875, 364.0762023925781, 45.0327262878418, 858.3846435546875, 145.74630737304688, -297.7543640136719, -400.4933776855469, 200.4508514404297, 1707.1470947265625, 636.6036376953125, 1986.138671875, 76.596923828125, 315.8100280761719, 310.8710021972656, 271.52911376953125, 1510.552001953125, 170.22097778320312, 383.5251159667969, 18.320348739624023, 818.5599365234375, -2000.821044921875, 1265.971923828125, 55.66749572753906, -12.3387451171875, 1296.2835693359375, 502.3303527832031, 285.588623046875, 80.35841369628906, 218.85028076171875, 545.0986938476562, 237.07308959960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000530.npy"} +{"epoch": 0.7782672540381792, "step": 531, "batch_size": 64, "mean": 329.83197021484375, "std": 541.7294311523438, "min": -1009.7593994140625, "p10": -258.4589767456054, "median": 275.5526885986328, "p90": 1009.1005065917972, "max": 1808.4969482421875, "pos_frac": 0.78125, "sample": [592.776611328125, 127.29119110107422, 726.3770751953125, 1042.9810791015625, 746.7888793945312, -475.22540283203125, 131.52328491210938, 213.19186401367188, 1680.33251953125, -196.79953002929688, -226.4503936767578, 429.7344055175781, 9.429044723510742, 385.8694152832031, 279.29144287109375, 180.28334045410156, 418.0731201171875, 214.8727569580078, -1009.7593994140625, 338.11199951171875, 930.0458374023438, 693.5508422851562, 351.09893798828125, 80.81489562988281, 831.1497802734375, 188.189697265625, 417.7082824707031, 245.1844024658203, 473.4039306640625, -54.774925231933594, 1253.447998046875, -272.17694091796875, 652.8519897460938, 632.4296875, 60.24278259277344, 271.8139343261719, -32.29297637939453, -567.3812255859375, -16.180784225463867, 180.736083984375, 1808.4969482421875, 327.1463623046875, 72.2861557006836, 7.5357208251953125, 1651.458740234375, -407.66339111328125, 512.30615234375, 1457.9310302734375, 281.8287353515625, 866.2880249023438, 1158.9656982421875, -16.781959533691406, 587.220947265625, -196.97555541992188, 292.9745178222656, 139.08596801757812, 434.6729736328125, 117.37800598144531, 405.5441589355469, 9.344192504882812, 215.154296875, 494.974853515625, -753.5184936523438, -284.96209716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000531.npy"} +{"epoch": 0.7797356828193832, "step": 532, "batch_size": 64, "mean": 361.05938720703125, "std": 657.469970703125, "min": -1334.456298828125, "p10": -421.58934326171874, "median": 287.59149169921875, "p90": 1284.9066528320318, "max": 1909.6993408203125, "pos_frac": 0.75, "sample": [1169.4229736328125, 467.2064208984375, 1602.9317626953125, -191.89767456054688, 1381.8272705078125, 236.05905151367188, 828.6094970703125, 812.227783203125, -1334.456298828125, 288.7621765136719, -334.38287353515625, 1357.1064453125, 512.1929931640625, 676.2833251953125, -425.7490234375, 666.3355712890625, 555.62939453125, -1181.0472412109375, 229.38682556152344, 286.4208068847656, 421.7725830078125, 1707.26708984375, 1909.6993408203125, 878.2803344726562, 641.6634521484375, 682.1851806640625, -151.52320861816406, 1119.6754150390625, 1334.399658203125, 157.3104248046875, -74.18897247314453, 289.7071838378906, -151.0853271484375, 1031.2734375, 771.3453369140625, 174.96560668945312, 197.30862426757812, 1076.15185546875, -64.31632232666016, 296.1657409667969, 56.29729461669922, 839.3046875, 145.31658935546875, 582.2845458984375, 218.06715393066406, -75.85596466064453, 3.7725906372070312, 680.5311889648438, 182.7559814453125, 40.71471405029297, -486.9562072753906, -540.5106811523438, 271.998779296875, 1599.8184814453125, 265.63140869140625, -54.434814453125, 308.6672668457031, -952.2546997070312, 45.9755859375, 232.86195373535156, -411.8834228515625, 625.80126953125, -617.0643920898438, 296.03369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000532.npy"} +{"epoch": 0.7812041116005873, "step": 533, "batch_size": 64, "mean": 258.5040283203125, "std": 552.2969360351562, "min": -1503.364013671875, "p10": -317.6853454589844, "median": 243.83811950683594, "p90": 915.8158752441407, "max": 2089.2587890625, "pos_frac": 0.703125, "sample": [378.8831481933594, -359.19476318359375, 713.517333984375, -520.2552490234375, 426.5504150390625, 172.02755737304688, 1151.19775390625, 586.6150512695312, 276.3494873046875, 331.05645751953125, 762.031005859375, 639.9660034179688, 630.0161743164062, 737.5643920898438, -189.51943969726562, 412.1318359375, 903.4552612304688, 151.40597534179688, 313.3570556640625, -799.1295166015625, 1273.621337890625, 260.81463623046875, 43.22467041015625, 533.0916748046875, 62.54423904418945, 594.0459594726562, -51.95447540283203, -7.9790191650390625, 226.86160278320312, 2089.2587890625, 416.78985595703125, 413.9728698730469, -104.11285400390625, 47.452980041503906, 34.25539779663086, -1503.364013671875, -330.7845764160156, 387.4222106933594, -224.52850341796875, 380.8053283691406, 149.9241180419922, 215.66868591308594, 1204.7926025390625, 1202.8780517578125, -196.23068237304688, 28.492294311523438, -308.8045654296875, -4.1074981689453125, 296.6363830566406, 290.1319274902344, 311.2635803222656, -70.64750671386719, 921.11328125, 102.49901580810547, -321.49139404296875, -282.2308044433594, -753.6974487304688, 177.8607177734375, 145.10760498046875, 1105.9342041015625, -193.36520385742188, -4.405853271484375, 770.531982421875, 496.9402770996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000533.npy"} +{"epoch": 0.7826725403817915, "step": 534, "batch_size": 64, "mean": 348.64691162109375, "std": 525.1646118164062, "min": -893.8709716796875, "p10": -192.01328735351564, "median": 266.32556915283203, "p90": 922.2540893554689, "max": 1901.4661865234375, "pos_frac": 0.78125, "sample": [395.817626953125, 928.636962890625, -40.91419982910156, 1901.4661865234375, 56.29805374145508, 774.9592895507812, -479.0343017578125, 311.6721496582031, 360.59490966796875, -468.9155578613281, 76.82229614257812, 1221.937255859375, -192.64804077148438, 185.11294555664062, 1096.72119140625, 881.2493286132812, 833.8202514648438, -190.53219604492188, 335.963623046875, 906.10791015625, 250.77284240722656, -141.80404663085938, 369.1322937011719, 468.34051513671875, 281.8782958984375, 356.61676025390625, 982.658935546875, -118.12191772460938, 399.3546142578125, 182.11880493164062, 138.38375854492188, 818.3192138671875, 234.8904571533203, 178.03549194335938, 663.33447265625, -563.7745361328125, -117.52249145507812, 1594.1268310546875, 764.1416625976562, 907.3607177734375, 155.67816162109375, 146.0735321044922, -45.6864013671875, 152.34884643554688, 765.470947265625, 244.6505584716797, 58.06330490112305, 896.26171875, 1425.724609375, 903.7682495117188, 718.3455200195312, 313.7392883300781, 619.532470703125, 43.12456130981445, 348.7341003417969, 160.1737060546875, 303.7822570800781, 217.05938720703125, 242.93075561523438, 40.29931640625, -156.57553100585938, -893.8709716796875, -293.08477783203125, -596.5193481445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000534.npy"} +{"epoch": 0.7841409691629956, "step": 535, "batch_size": 64, "mean": 396.780029296875, "std": 537.9991455078125, "min": -854.108642578125, "p10": -225.5595733642578, "median": 378.2223205566406, "p90": 1018.092236328125, "max": 2183.694580078125, "pos_frac": 0.796875, "sample": [-341.4013671875, 2183.694580078125, -299.56121826171875, 497.48828125, -234.97474670410156, 216.2791748046875, 97.49200439453125, 970.6635131835938, 147.98040771484375, 619.3370971679688, 229.8002471923828, 21.905860900878906, 256.4176330566406, 945.585205078125, 454.7432861328125, 418.335205078125, 94.59037017822266, 1341.535888671875, 245.29489135742188, 410.73602294921875, 905.0324096679688, 507.97235107421875, 538.104736328125, 457.51702880859375, -20.738876342773438, 1483.0367431640625, 1185.695556640625, 517.1328125, -52.535308837890625, 333.1309509277344, 1513.4368896484375, 673.8948364257812, 1015.603271484375, -363.8713684082031, -321.1799011230469, -854.108642578125, 485.7068176269531, 717.6597290039062, 139.16299438476562, 451.8658447265625, 345.7086181640625, 1019.158935546875, 793.2733154296875, 210.49896240234375, 460.0212707519531, -88.21452331542969, 27.48711395263672, 692.2218017578125, -203.59083557128906, -515.18603515625, 576.8367919921875, 52.85796356201172, 418.37103271484375, 36.65093231201172, -139.5122528076172, -78.75885009765625, 956.16943359375, 139.40719604492188, 1490.992919921875, 252.41278076171875, 29.755685806274414, 522.2162475585938, 323.66156005859375, 483.02093505859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000535.npy"} +{"epoch": 0.7856093979441997, "step": 536, "batch_size": 64, "mean": 485.10693359375, "std": 675.7999267578125, "min": -1175.79833984375, "p10": -56.10823287963867, "median": 297.19915771484375, "p90": 1172.5166137695312, "max": 3862.385009765625, "pos_frac": 0.828125, "sample": [90.8951644897461, 809.1957397460938, 296.63702392578125, 985.5801391601562, 1225.872802734375, 23.444351196289062, 180.03463745117188, 135.724609375, 73.09307861328125, -27.712615966796875, 891.68115234375, 347.5636901855469, 1169.4154052734375, 842.5475463867188, -181.19544982910156, 532.4952392578125, 1311.439208984375, 104.67416381835938, 918.166015625, 766.0474243164062, 2076.76513671875, 590.971923828125, -94.6231918334961, 799.2711181640625, -48.372596740722656, 1173.845703125, 24.568260192871094, 592.1119384765625, 627.6751098632812, -52.0828857421875, 3862.385009765625, 768.4339599609375, 298.04718017578125, 233.19161987304688, 533.6359252929688, 724.8716430664062, 128.4680633544922, 1064.3343505859375, -321.7752380371094, 100.06060791015625, 127.93150329589844, -1175.79833984375, 1589.31494140625, 948.8792724609375, 721.1411743164062, -142.46092224121094, 121.25125122070312, 236.50714111328125, 261.4117431640625, 131.855712890625, 297.76129150390625, -57.83338165283203, 228.2969970703125, 219.7138671875, 246.03823852539062, 762.633056640625, 120.55726623535156, -39.61919021606445, -342.17034912109375, 35.94927978515625, 899.383544921875, 718.728759765625, 367.02630615234375, 1192.963134765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000536.npy"} +{"epoch": 0.7870778267254038, "step": 537, "batch_size": 64, "mean": 315.5430603027344, "std": 551.7247314453125, "min": -1292.76025390625, "p10": -305.2695556640625, "median": 261.96263122558594, "p90": 1063.087048339844, "max": 1343.551025390625, "pos_frac": 0.796875, "sample": [-1292.76025390625, -777.4710693359375, 28.464431762695312, 122.02017974853516, -417.8280029296875, 272.27813720703125, 24.76887321472168, 903.3817749023438, 11.129104614257812, -49.979373931884766, 453.3975524902344, 24.497756958007812, 506.9180603027344, 336.29864501953125, -306.78277587890625, 850.086669921875, 1119.074951171875, -50.25396728515625, 151.0845947265625, -18.99309539794922, 6.866731643676758, 765.0379028320312, 46.861305236816406, -774.1895141601562, 921.912353515625, -301.73870849609375, 1297.5865478515625, 1129.0528564453125, 337.8877868652344, 608.8134765625, 1004.38427734375, 306.809814453125, 1040.932373046875, 311.65155029296875, 772.493896484375, 23.028053283691406, 95.33470153808594, 555.9805908203125, 251.64712524414062, -458.04718017578125, 207.25643920898438, 387.9020690917969, -53.51176452636719, -868.5283203125, 40.0283317565918, 1072.5819091796875, -80.41586303710938, 172.32643127441406, 328.6527099609375, 835.67236328125, 156.49124145507812, 1343.551025390625, 124.60790252685547, 1217.5955810546875, 566.908935546875, 100.30220794677734, 1291.0972900390625, 123.70613098144531, 913.5929565429688, 370.67352294921875, 494.66644287109375, 674.2569580078125, 37.764373779296875, 905.9381713867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000537.npy"} +{"epoch": 0.788546255506608, "step": 538, "batch_size": 64, "mean": 372.283447265625, "std": 506.0456848144531, "min": -544.698486328125, "p10": -218.8054000854492, "median": 384.0868835449219, "p90": 944.5154785156252, "max": 1988.9552001953125, "pos_frac": 0.734375, "sample": [147.2898712158203, 465.18768310546875, 743.0931396484375, 1122.71728515625, 549.0198974609375, -173.1973876953125, 894.2175903320312, 559.7501220703125, 536.6629028320312, 403.5022888183594, -58.40563201904297, 1647.2901611328125, 508.58380126953125, 485.78271484375, 262.6871032714844, 872.0368041992188, -544.698486328125, -27.65912628173828, 362.23663330078125, 1127.21337890625, 277.15582275390625, 483.1634826660156, 108.62356567382812, 661.648193359375, 514.0892333984375, 105.49226379394531, -42.336181640625, 1505.109619140625, -29.785110473632812, -377.8954162597656, -518.3202514648438, 781.7225952148438, 159.2376251220703, -99.11222839355469, 1988.9552001953125, 674.48388671875, 687.5807495117188, 515.2808837890625, 364.6714782714844, 704.2816162109375, 193.8453369140625, -134.15509033203125, 13.303913116455078, 621.0968627929688, 291.7861022949219, 713.5670776367188, -500.5455627441406, 538.6590576171875, -221.84902954101562, 108.41130065917969, 553.6314697265625, 417.2862243652344, 266.0958251953125, 633.610107421875, -211.70359802246094, -50.786888122558594, 300.53125, 27.432445526123047, 1120.6668701171875, 966.0717163085938, -362.05963134765625, -269.4805908203125, 551.6268310546875, -88.25836944580078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000538.npy"} +{"epoch": 0.7900146842878121, "step": 539, "batch_size": 64, "mean": 347.94537353515625, "std": 462.8359680175781, "min": -822.2125244140625, "p10": -144.34913787841793, "median": 317.1759033203125, "p90": 892.8924133300785, "max": 1568.848388671875, "pos_frac": 0.78125, "sample": [288.43170166015625, 345.92010498046875, 99.49556732177734, -10.048295974731445, 361.5869140625, 442.86346435546875, -237.6935272216797, 30.12842559814453, -16.889495849609375, 1324.4818115234375, -322.413330078125, -25.460275650024414, 1568.848388671875, 164.06654357910156, 938.07275390625, 589.4007568359375, 741.1832885742188, 668.4091186523438, 52.9036865234375, 787.4716186523438, 1503.1552734375, 142.59930419921875, 277.176025390625, 700.3861083984375, 135.98333740234375, 200.14437866210938, 45.82585144042969, 472.69952392578125, -66.42939758300781, 687.0103759765625, -368.1435241699219, 587.2735595703125, -822.2125244140625, 522.5499877929688, 602.5008544921875, 76.6336669921875, 48.089820861816406, 519.987548828125, 187.21578979492188, -106.94882202148438, -111.29457092285156, 1325.740478515625, -199.34616088867188, 672.266845703125, -59.0002326965332, -158.515380859375, 979.9348754882812, -280.8507080078125, 43.610313415527344, 30.958839416503906, 595.743408203125, 184.95492553710938, 771.6548461914062, 445.925537109375, 446.63623046875, 347.80596923828125, 765.8799438476562, 542.640380859375, 404.3408203125, 539.2449951171875, 26.469038009643555, 1214.2105712890625, 539.9266357421875, 63.30851745605469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000539.npy"} +{"epoch": 0.7914831130690162, "step": 540, "batch_size": 64, "mean": 461.1107177734375, "std": 636.3046875, "min": -741.3790283203125, "p10": -279.51945037841784, "median": 366.1020812988281, "p90": 1428.9562133789063, "max": 2530.441650390625, "pos_frac": 0.75, "sample": [465.4664611816406, 585.464111328125, 738.0836181640625, -403.3931884765625, 1886.9940185546875, -123.95703125, 294.6344299316406, 106.92181396484375, -741.3790283203125, -478.6680908203125, -106.22864532470703, 536.127685546875, -145.32154846191406, 285.8150634765625, 2530.441650390625, -123.59583282470703, 249.82473754882812, 103.19906616210938, 625.459228515625, 517.0883178710938, -17.888702392578125, 177.50213623046875, 426.7146301269531, 763.8726196289062, 783.7288818359375, 1330.5946044921875, 595.9721069335938, 1445.0423583984375, 1464.9600830078125, 424.461181640625, 623.0120849609375, 411.94757080078125, 636.8419799804688, 101.39942169189453, 320.256591796875, 447.1512756347656, -51.781219482421875, -15.954803466796875, 1572.266357421875, 307.1485290527344, 1071.445068359375, 1810.3707275390625, 1474.963623046875, 1248.604248046875, -412.7009582519531, 676.8839111328125, 228.73977661132812, 258.49462890625, 636.2333374023438, 313.2637939453125, -337.0328369140625, 292.7913513183594, 674.5819702148438, 774.1190185546875, 163.82345581054688, 282.62158203125, 775.8087158203125, 179.52023315429688, 1391.421875, -49.49958038330078, -37.229637145996094, 659.7310791015625, -713.9684448242188, -402.1252136230469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000540.npy"} +{"epoch": 0.7929515418502202, "step": 541, "batch_size": 64, "mean": 243.34228515625, "std": 560.9125366210938, "min": -1181.81640625, "p10": -408.38843383789055, "median": 208.36014556884766, "p90": 928.2657592773443, "max": 1862.882568359375, "pos_frac": 0.71875, "sample": [-843.317138671875, 185.24398803710938, 638.297119140625, 88.71377563476562, 169.02053833007812, 67.64816284179688, 376.8235168457031, 92.85102844238281, 227.80108642578125, -112.7398681640625, -104.53231811523438, 1258.30810546875, -832.6580200195312, -6.8634490966796875, 1004.327392578125, 48.214210510253906, 267.0911560058594, 1228.7384033203125, 85.84505462646484, 771.4954833984375, 1551.886474609375, 800.6141967773438, -0.230499267578125, -528.0142822265625, 220.4332275390625, -269.2791748046875, 402.7799072265625, 307.0099182128906, -434.77490234375, 672.1240234375, 669.8370361328125, 62.80333709716797, 750.2535400390625, 181.62388610839844, 174.20883178710938, 144.37179565429688, 148.79644775390625, 151.77281188964844, 603.2274169921875, 422.96759033203125, -134.98606872558594, 196.7809295654297, 704.2371215820312, 982.9735717773438, 591.2987060546875, 288.36572265625, -1181.81640625, -548.1343994140625, 440.1781311035156, 630.165283203125, -319.9785461425781, 405.3799133300781, 297.6109924316406, 1862.882568359375, 1127.37646484375, -346.82000732421875, -892.2590942382812, 506.6162414550781, -241.07809448242188, -260.9356994628906, -141.74090576171875, 219.93936157226562, 467.7738037109375, 277.356689453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000541.npy"} +{"epoch": 0.7944199706314243, "step": 542, "batch_size": 64, "mean": 453.1556091308594, "std": 631.8525390625, "min": -1174.165771484375, "p10": -284.3477310180663, "median": 372.9001922607422, "p90": 1151.4187255859376, "max": 2169.35205078125, "pos_frac": 0.796875, "sample": [1101.0723876953125, 325.12799072265625, 249.80560302734375, 241.73556518554688, 183.61749267578125, 994.0892944335938, -345.56646728515625, 2169.35205078125, 1172.9957275390625, 428.9527587890625, 16.249130249023438, 487.3814697265625, 865.27294921875, -46.425140380859375, -67.04155731201172, 457.0392150878906, 1945.6025390625, 305.06488037109375, -71.29412078857422, 1782.8876953125, 328.6889343261719, 131.5789337158203, 460.3580322265625, 489.71844482421875, -345.6002502441406, 263.82830810546875, -179.59027099609375, -405.2418212890625, -196.2577362060547, 1014.984619140625, -322.1005859375, 900.4818115234375, 932.2742309570312, 170.57229614257812, 1659.010986328125, -62.19620132446289, 670.908935546875, 493.3016052246094, -567.0504760742188, 623.0171508789062, 306.99609375, 399.2326354980469, 694.9031982421875, 166.0479736328125, 1548.330322265625, 177.20693969726562, 252.87826538085938, -1174.165771484375, 726.9771118164062, 163.0102996826172, 646.9982299804688, 348.35089111328125, 104.80165100097656, 397.4494934082031, 1613.8056640625, 1095.03515625, 243.57350158691406, -906.3959350585938, 1058.4163818359375, 240.93209838867188, 480.5727844238281, 619.9910888671875, 681.4251098632812, 858.9783935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000542.npy"} +{"epoch": 0.7958883994126285, "step": 543, "batch_size": 64, "mean": 327.8123474121094, "std": 755.3506469726562, "min": -1023.5202026367188, "p10": -347.608984375, "median": 144.61444854736328, "p90": 1296.4673339843755, "max": 3413.46728515625, "pos_frac": 0.65625, "sample": [22.416046142578125, -53.78887939453125, 735.0440063476562, -50.8369140625, 2.151205062866211, 2301.818359375, 4.399759292602539, -42.78443908691406, -1023.5202026367188, 831.3065185546875, -7.297035217285156, -780.6921997070312, -3.3405494689941406, 181.7252197265625, 2287.32958984375, 51.93855285644531, 37.16426086425781, 3413.46728515625, -525.5147705078125, 15.215888977050781, 529.9960327148438, -350.91180419921875, -294.578125, 196.4829559326172, -391.9460754394531, -125.2253189086914, -96.86669921875, 529.222900390625, -339.90240478515625, 250.4123077392578, 552.2970581054688, 220.48109436035156, -356.06109619140625, 1338.707275390625, -153.62435913085938, -245.3427276611328, 1938.96875, 577.095458984375, 1197.907470703125, 69.7729263305664, 1562.8118896484375, 142.90589904785156, -176.93223571777344, 361.708740234375, 814.9577026367188, 15.259696960449219, 330.9091796875, 1767.2510986328125, -365.90362548828125, 583.4500122070312, 220.80233764648438, -101.88860321044922, 356.4002685546875, -269.4975891113281, 467.43023681640625, 601.6290283203125, 301.8758850097656, -195.44754028320312, 37.43260192871094, 360.6446838378906, 146.322998046875, 539.0279541015625, 542.4673461914062, 493.28533935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000543.npy"} +{"epoch": 0.7973568281938326, "step": 544, "batch_size": 64, "mean": 376.4363708496094, "std": 569.7345581054688, "min": -1007.627685546875, "p10": -209.25299835205078, "median": 363.4873504638672, "p90": 1142.582836914063, "max": 1782.491455078125, "pos_frac": 0.75, "sample": [1245.91015625, 641.4193115234375, 352.95220947265625, 548.1907958984375, 1645.9910888671875, -909.8585205078125, 378.8853454589844, -105.57039642333984, 746.1978759765625, -15.147645950317383, 1191.4210205078125, -49.83164978027344, 692.6737670898438, 258.63800048828125, -997.6639404296875, 338.8780822753906, 634.6284790039062, 230.05316162109375, 106.24420166015625, 425.0372009277344, 374.0224914550781, 458.23419189453125, -219.62130737304688, 244.55581665039062, 98.01510620117188, 112.22280883789062, 751.5697631835938, 124.4228515625, -352.66302490234375, -212.04025268554688, 1600.1483154296875, 184.50576782226562, -71.0050277709961, 614.3165283203125, -1007.627685546875, 1028.6270751953125, 473.70245361328125, 385.5986328125, 639.9945068359375, -202.74940490722656, 295.06982421875, -11.531375885009766, 274.22308349609375, 1601.512451171875, 711.3428344726562, -77.12284851074219, 172.13157653808594, 1782.491455078125, 851.6235961914062, 570.4150390625, 29.912960052490234, 156.47958374023438, 751.2333374023438, 1414.6710205078125, 577.6229248046875, 405.2154846191406, 481.2323913574219, -165.7983856201172, 510.5694885253906, 98.84666442871094, -22.301132202148438, 755.2884521484375, -247.05233764648438, 792.57421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000544.npy"} +{"epoch": 0.7988252569750367, "step": 545, "batch_size": 64, "mean": 449.6326904296875, "std": 572.400390625, "min": -669.4094848632812, "p10": -115.39449081420895, "median": 313.7082061767578, "p90": 1409.4041137695312, "max": 1716.2310791015625, "pos_frac": 0.84375, "sample": [-152.49105834960938, -3.240753173828125, -615.8695068359375, 33.205841064453125, 285.1319580078125, 888.432373046875, 224.02926635742188, 69.70018005371094, 560.7819213867188, -443.1504821777344, 440.8944396972656, 699.297119140625, 42.18962860107422, 616.7157592773438, 1225.8321533203125, 102.59375762939453, 1589.2919921875, 1442.8182373046875, 1414.5968017578125, 374.4332275390625, 828.9728393554688, 43.83572006225586, 295.7601318359375, 284.87286376953125, 5.363059997558594, 331.6562805175781, 74.204833984375, 407.92333984375, 847.98974609375, -75.49776458740234, 1397.287841796875, 492.848388671875, 134.9328155517578, -132.4930877685547, 1716.2310791015625, 941.1566772460938, 22.072463989257812, 1466.386474609375, -573.7566528320312, 498.26287841796875, -10.954490661621094, 416.0259704589844, 131.64373779296875, 573.3065185546875, 392.0847473144531, 692.7310180664062, 906.489501953125, 1364.7515869140625, 275.3653259277344, 285.63385009765625, 156.60231018066406, 84.70159912109375, 278.7278747558594, 426.8327331542969, 73.01541137695312, 1636.9176025390625, 1288.997314453125, -271.3075256347656, 359.5113830566406, 195.11846923828125, 713.8516845703125, -669.4094848632812, 182.3842010498047, 1490.268798828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000545.npy"} +{"epoch": 0.8002936857562408, "step": 546, "batch_size": 64, "mean": 301.70965576171875, "std": 496.46099853515625, "min": -1541.517333984375, "p10": -243.1973098754883, "median": 261.12965393066406, "p90": 876.794500732422, "max": 1363.9864501953125, "pos_frac": 0.75, "sample": [-236.39926147460938, 104.52918243408203, 335.9444580078125, 11.95928955078125, 232.42498779296875, 545.8179321289062, 247.84048461914062, 558.2568969726562, 176.17369079589844, 216.993408203125, -178.12850952148438, -246.23623657226562, -215.4935760498047, -188.70916748046875, 764.9134521484375, 112.04087829589844, 41.372840881347656, 380.1271667480469, 965.9915161132812, 252.49713134765625, -58.87999725341797, 847.46875, -295.6146545410156, 1206.2213134765625, 668.7665405273438, 887.1526489257812, 793.4058837890625, 468.61328125, 852.62548828125, 374.2065124511719, 1363.9864501953125, -265.219482421875, -11.533683776855469, 439.4698791503906, 559.3084716796875, -788.4990844726562, 663.2811279296875, 562.0174560546875, 734.1748657226562, 892.1190795898438, -1541.517333984375, 733.873779296875, 260.48846435546875, 277.7813415527344, 465.0060729980469, 802.0952758789062, 68.26632690429688, 774.3382568359375, -62.116127014160156, 144.11720275878906, 60.29218292236328, 24.958059310913086, 761.0640258789062, 261.7708435058594, -241.45375061035156, 576.3572998046875, 159.26263427734375, 1120.424560546875, -243.94454956054688, 92.82527923583984, -116.98332214355469, 427.5423583984375, -319.01312255859375, 1048.995849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000546.npy"} +{"epoch": 0.801762114537445, "step": 547, "batch_size": 64, "mean": 394.68609619140625, "std": 549.3321533203125, "min": -1178.05126953125, "p10": -165.9199440002441, "median": 369.6005554199219, "p90": 1023.425927734375, "max": 1963.1400146484375, "pos_frac": 0.765625, "sample": [595.69189453125, 50.91321563720703, -1178.05126953125, 957.572509765625, -49.3029899597168, 51.272674560546875, 313.4190673828125, 794.7158813476562, 234.73834228515625, 552.0128784179688, 148.21795654296875, 1616.947265625, 10.849552154541016, 1754.83935546875, -210.8589324951172, -68.05622100830078, 501.962158203125, 612.4823608398438, -187.42576599121094, 1013.1279296875, -28.21910858154297, 88.79193878173828, 434.5853271484375, 284.7328186035156, 671.1011962890625, -250.16314697265625, 535.8350830078125, 182.6880340576172, 219.5498809814453, 371.4236145019531, 1027.83935546875, -112.23136901855469, 508.9006042480469, 685.234619140625, 565.937255859375, 486.6966857910156, -97.19731140136719, 945.2992553710938, 367.7774963378906, 69.40618896484375, 741.3477783203125, 1963.1400146484375, 604.3968505859375, -572.0083618164062, -201.43731689453125, 16.252647399902344, -357.6383056640625, 1147.3963623046875, 116.82356262207031, 634.6490478515625, 1606.2412109375, 217.6809539794922, 616.720458984375, 424.78045654296875, 202.85557556152344, 315.70562744140625, 756.8414306640625, -57.00714111328125, 395.78399658203125, -73.42282104492188, 710.9041748046875, 1270.546630859375, 422.0409851074219, -115.73969268798828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000547.npy"} +{"epoch": 0.8032305433186491, "step": 548, "batch_size": 64, "mean": 348.1575927734375, "std": 579.3407592773438, "min": -970.20703125, "p10": -211.69056396484373, "median": 289.9557189941406, "p90": 834.5145080566408, "max": 2739.76025390625, "pos_frac": 0.734375, "sample": [-14.002792358398438, 802.2655029296875, 353.09808349609375, 320.13916015625, -130.9598388671875, 357.2655944824219, -970.20703125, 1335.2012939453125, 407.676513671875, 400.5210266113281, 422.17962646484375, 539.6346435546875, 1089.291259765625, 241.99005126953125, 704.7322998046875, -253.43206787109375, -122.22328186035156, 352.0167541503906, 797.27880859375, 182.8175048828125, 901.8207397460938, 286.1044616699219, 203.1324462890625, -425.50396728515625, 467.6463928222656, 122.0661849975586, 180.96168518066406, 293.8069763183594, 304.1378173828125, 173.15650939941406, -255.05307006835938, 2739.76025390625, 786.6417236328125, 209.20065307617188, 137.41366577148438, -457.4928894042969, -71.88493347167969, 679.5634765625, -191.72232055664062, 2122.65087890625, -15.848331451416016, -38.09365463256836, -259.27423095703125, 445.1290283203125, -90.9063720703125, 61.182403564453125, 373.33465576171875, 135.42242431640625, 423.9108581542969, -220.24838256835938, 517.1697387695312, 376.4149475097656, -62.34486389160156, 1938.89501953125, 389.8022155761719, 614.871337890625, 112.25004577636719, -35.323150634765625, 198.8983612060547, 251.5040283203125, 23.806114196777344, 558.826171875, 848.3355102539062, 712.681396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000548.npy"} +{"epoch": 0.8046989720998532, "step": 549, "batch_size": 64, "mean": 269.5531005859375, "std": 534.2130126953125, "min": -1020.0985107421875, "p10": -268.22358703613276, "median": 194.38426208496094, "p90": 918.0298645019533, "max": 1659.8084716796875, "pos_frac": 0.65625, "sample": [302.7396545410156, 821.9788818359375, 48.17396545410156, -31.075538635253906, 140.89634704589844, 128.01263427734375, -163.3743438720703, 545.03662109375, 708.2279052734375, 857.50244140625, -145.87831115722656, 1291.6356201171875, 109.91886138916016, 171.79837036132812, -101.49725341796875, -287.45208740234375, 938.5438842773438, -325.1523132324219, -121.78194427490234, -747.8109130859375, 634.2962036132812, 1054.7171630859375, -349.8836364746094, 221.15740966796875, 304.1446228027344, 220.873779296875, 444.5323486328125, -75.14324188232422, 743.7387084960938, 870.163818359375, -108.45159912109375, 217.14048767089844, 270.6425476074219, -223.35708618164062, 400.66729736328125, -1020.0985107421875, 216.97015380859375, 613.3525390625, 114.8405990600586, 62.25337600708008, -19.351837158203125, -65.3313217163086, 316.16326904296875, 658.58203125, 1402.532470703125, -66.99800109863281, -198.7511444091797, 332.99725341796875, -371.7166748046875, 1643.3262939453125, 1330.449951171875, -746.1065673828125, -44.66276550292969, 713.49755859375, 448.98095703125, -157.79544067382812, 139.90411376953125, 158.19598388671875, 620.24169921875, 1659.8084716796875, 386.01043701171875, 281.65875244140625, 136.1039581298828, -59.340736389160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000549.npy"} +{"epoch": 0.8061674008810573, "step": 550, "batch_size": 64, "mean": 322.6148376464844, "std": 509.4976806640625, "min": -1061.054443359375, "p10": -278.4778381347655, "median": 308.55914306640625, "p90": 848.1503234863281, "max": 1967.3153076171875, "pos_frac": 0.8125, "sample": [331.3052673339844, 336.7983093261719, 1138.4881591796875, 336.4660949707031, 244.90371704101562, 739.3897094726562, 1049.4996337890625, 305.15240478515625, -140.9923553466797, 258.0875244140625, 584.498779296875, 385.25213623046875, 534.6263427734375, 125.66918182373047, 311.96588134765625, 1766.5406494140625, 84.56005096435547, -332.1145935058594, 79.81895446777344, 204.03363037109375, -1061.054443359375, -13.278533935546875, 534.450439453125, 831.2341918945312, -89.50648498535156, 853.1089477539062, 836.5802001953125, 142.9644775390625, 258.48394775390625, 122.19065856933594, 40.72560119628906, -756.4954833984375, 264.7227783203125, -120.78865814208984, 443.3674621582031, 832.654296875, 58.71766662597656, 530.636962890625, 1967.3153076171875, 442.9591369628906, 295.7491455078125, 642.6286010742188, 51.09608840942383, 41.9385986328125, 1195.3563232421875, 428.0106201171875, 1044.6192626953125, 117.43251037597656, 83.59420013427734, 573.6541137695312, 445.5326843261719, 600.4304809570312, -615.79296875, 563.4008178710938, -153.32540893554688, 315.07476806640625, 243.37840270996094, 487.8172912597656, 355.4890441894531, -377.9486083984375, 488.3541259765625, -388.57916259765625, -365.937255859375, 112.43791198730469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000550.npy"} +{"epoch": 0.8076358296622613, "step": 551, "batch_size": 64, "mean": 537.0025634765625, "std": 531.70263671875, "min": -413.5666809082031, "p10": -56.82378005981443, "median": 498.00140380859375, "p90": 1391.6067260742188, "max": 2011.931640625, "pos_frac": 0.859375, "sample": [15.30807876586914, -29.67303466796875, 495.4585266113281, 790.9732055664062, 621.27490234375, 536.2249145507812, -413.5666809082031, 20.760791778564453, 1358.9652099609375, -261.4112548828125, -37.06885528564453, 292.28955078125, -65.29017639160156, 550.67919921875, 577.4763793945312, 1597.16943359375, 500.5442810058594, 389.82891845703125, 506.54437255859375, 1634.5733642578125, 979.997314453125, 379.81292724609375, 535.0941162109375, 614.8216552734375, 294.07379150390625, 278.62054443359375, 954.4076538085938, 413.032958984375, 586.9393310546875, 335.4765625, 378.3161926269531, 1.0750999450683594, 1528.625, 151.07550048828125, 536.8433837890625, 622.6932373046875, 751.0720825195312, 721.4270629882812, 542.5828857421875, 129.07212829589844, -177.38510131835938, 1143.9298095703125, 1405.595947265625, 459.7474060058594, 27.469921112060547, 1554.6954345703125, -379.9389343261719, -268.9532165527344, 1446.833251953125, 810.4071044921875, -248.09567260742188, 141.15029907226562, 1317.9892578125, 887.014404296875, 428.3531494140625, 590.4212646484375, 782.832763671875, 337.3484802246094, 401.2078857421875, 988.3936157226562, 154.6304473876953, 2011.931640625, 416.058837890625, 320.40570068359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000551.npy"} +{"epoch": 0.8091042584434655, "step": 552, "batch_size": 64, "mean": 586.982177734375, "std": 698.5947875976562, "min": -690.9524536132812, "p10": -80.78665542602535, "median": 506.0690155029297, "p90": 1428.2556396484379, "max": 2675.06591796875, "pos_frac": 0.859375, "sample": [741.8590698242188, 105.66326141357422, 1054.0665283203125, 2609.431396484375, 771.9454345703125, 1164.77392578125, 568.8108520507812, 1006.6298828125, 23.23944091796875, 512.1934814453125, 641.16748046875, 504.3123474121094, 143.88131713867188, 455.45458984375, 167.1819305419922, 600.785888671875, 249.06521606445312, 663.1082153320312, 1307.5428466796875, -405.9273986816406, 74.02096557617188, 684.338623046875, 491.1546630859375, 378.4052429199219, -690.9524536132812, 16.71973419189453, 37.346168518066406, 1479.4473876953125, 188.5369110107422, -236.63531494140625, 1344.9736328125, -596.2922973632812, 816.24365234375, -36.621559143066406, 492.7081298828125, 122.53221130371094, 484.256103515625, 2432.099853515625, 1362.3438720703125, 1814.0482177734375, 545.3001708984375, -157.09994506835938, 507.82568359375, 885.6370849609375, 618.6163330078125, 2675.06591796875, 254.39443969726562, 461.43524169921875, 1456.5035400390625, 2227.33349609375, 530.7739868164062, 125.36598205566406, 454.27471923828125, -415.4388122558594, -32.814056396484375, 553.03662109375, -99.71455383300781, 115.78498840332031, 407.15081787109375, 554.5494384765625, 660.86669921875, 934.6976318359375, 731.1434326171875, 28.31218719482422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000552.npy"} +{"epoch": 0.8105726872246696, "step": 553, "batch_size": 64, "mean": 308.27679443359375, "std": 559.7786254882812, "min": -723.8419799804688, "p10": -160.89324798583982, "median": 217.4594497680664, "p90": 1045.5955383300784, "max": 2547.1787109375, "pos_frac": 0.734375, "sample": [2547.1787109375, 922.7122802734375, -152.4412841796875, 436.1979064941406, 460.2156982421875, -107.91620635986328, 53.397216796875, 40.11431121826172, 148.17210388183594, 248.54437255859375, 99.5982666015625, 213.9416046142578, 106.71871185302734, 78.57656860351562, -63.12421417236328, 261.191162109375, -489.9905700683594, 16.44134521484375, 366.13336181640625, -114.05519104003906, 230.18679809570312, 190.2409210205078, 348.9710998535156, -505.7462158203125, 498.73699951171875, -15.965225219726562, 1130.6541748046875, 89.41424560546875, -14.719474792480469, 305.436279296875, -164.51551818847656, 388.796142578125, -149.7352294921875, -131.35411071777344, 852.6343994140625, 292.96044921875, 361.514892578125, 103.4901123046875, 1070.4573974609375, -49.014122009277344, 419.0456848144531, -723.8419799804688, 615.2815551757812, 506.2861633300781, 226.87954711914062, 111.84326171875, -652.135498046875, 32.9014892578125, 519.2777099609375, 22.00713348388672, 1128.9178466796875, -52.431861877441406, 1311.3607177734375, 530.58935546875, 831.338623046875, 1127.91455078125, 220.977294921875, 987.5845336914062, -489.64752197265625, 1865.327880859375, -277.243896484375, 890.70703125, 65.52765655517578, 607.1983642578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000553.npy"} +{"epoch": 0.8120411160058737, "step": 554, "batch_size": 64, "mean": 411.3079833984375, "std": 720.21435546875, "min": -1548.0126953125, "p10": -454.34450073242186, "median": 390.6107482910156, "p90": 1385.6586059570318, "max": 2303.512451171875, "pos_frac": 0.765625, "sample": [168.1457061767578, -726.2325439453125, 826.1987915039062, 547.309326171875, 233.97702026367188, 938.951416015625, 359.2031555175781, 425.5627746582031, -422.1098937988281, 411.60595703125, 2303.512451171875, 424.2862548828125, 431.0587158203125, 328.5660705566406, -195.28001403808594, 451.0206298828125, 575.0430908203125, 566.2373046875, 571.810791015625, -18.031883239746094, -631.1844482421875, -682.212646484375, -468.1593322753906, 7.536491394042969, 2118.126953125, -173.76043701171875, 225.98837280273438, 426.3752136230469, 461.18719482421875, 864.5079956054688, 571.4049072265625, 1447.0908203125, 1618.7833251953125, 179.0963134765625, 942.8638916015625, 341.2980651855469, 910.3201904296875, 4.575130462646484, 1669.1011962890625, -150.9154052734375, -67.08511352539062, 33.30486297607422, 1123.0634765625, 619.7333984375, 985.381103515625, 88.80448913574219, 322.757568359375, 5.066864013671875, 895.4109497070312, 792.1461791992188, -861.0226440429688, 617.2542724609375, 290.5660705566406, -688.0187377929688, 1121.89990234375, -1548.0126953125, -312.4252624511719, 0.9637508392333984, 369.61553955078125, 1242.3167724609375, 1730.2977294921875, 1750.550537109375, 117.34691619873047, -189.06219482421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000554.npy"} +{"epoch": 0.8135095447870778, "step": 555, "batch_size": 64, "mean": 482.5061950683594, "std": 735.4490356445312, "min": -1135.4232177734375, "p10": -226.7660247802734, "median": 412.6693115234375, "p90": 1563.3124389648438, "max": 2262.47509765625, "pos_frac": 0.78125, "sample": [468.6966552734375, 404.12811279296875, 1399.4525146484375, 282.1488342285156, 699.4784545898438, 1225.8260498046875, 421.21051025390625, 240.23751831054688, -197.96807861328125, 178.58847045898438, -141.58103942871094, 856.4907836914062, 779.417236328125, 533.7931518554688, 21.4228515625, 58.8062744140625, 167.11813354492188, -239.10800170898438, 2262.47509765625, 1190.8121337890625, -1.21759033203125, 2000.8551025390625, 87.18775177001953, 459.6181640625, 488.27227783203125, 94.36477661132812, 780.8148193359375, 38.53468322753906, 450.2962951660156, 112.26493835449219, 731.1075439453125, 1082.10400390625, 630.9755859375, 507.7672424316406, 19.266845703125, 186.7102813720703, 680.3658447265625, -673.4085693359375, 201.23880004882812, -139.25714111328125, -969.0103759765625, 1113.1119384765625, 1765.487060546875, 2255.41015625, -537.8377685546875, 474.7958679199219, 2074.2060546875, 177.02171325683594, 493.1833801269531, 53.204097747802734, 1551.3482666015625, -498.84857177734375, 1568.43994140625, -1135.4232177734375, -187.8594970703125, -137.42410278320312, 1789.2373046875, 547.4234619140625, -160.83267211914062, 198.3145751953125, 1253.3917236328125, 333.7257385253906, -265.8956298828125, 775.92041015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000555.npy"} +{"epoch": 0.8149779735682819, "step": 556, "batch_size": 64, "mean": 294.10113525390625, "std": 440.38470458984375, "min": -495.514892578125, "p10": -187.3568572998047, "median": 239.7291259765625, "p90": 911.5039428710938, "max": 1526.4415283203125, "pos_frac": 0.71875, "sample": [1275.9451904296875, 234.088134765625, 71.90994262695312, -118.7130126953125, 285.3130187988281, 142.3787841796875, 569.4574584960938, 435.2638244628906, -180.93331909179688, 348.1390380859375, 19.347312927246094, 679.1626586914062, 280.6680603027344, -190.10980224609375, 198.28466796875, 547.5515747070312, 1132.056884765625, -10.30218505859375, 692.7508544921875, -155.72308349609375, -495.514892578125, -92.1314926147461, -316.04632568359375, 156.1074981689453, -399.7078857421875, -105.0028076171875, 362.5730895996094, 27.610153198242188, 915.4767456054688, -310.754638671875, 380.3109130859375, 289.3624267578125, 198.14390563964844, 101.28565216064453, 787.9747924804688, -46.261253356933594, 432.9361572265625, 137.5610809326172, 205.67947387695312, -43.75116729736328, -79.9405517578125, 794.6356201171875, 189.6398468017578, 141.77813720703125, -230.44363403320312, 535.8192138671875, 902.2340698242188, 499.81756591796875, 958.1264038085938, 69.4074478149414, 245.3701171875, -139.95968627929688, 830.9840698242188, 323.34735107421875, 1526.4415283203125, -102.22411346435547, 926.3687744140625, 1441.050537109375, 475.7162780761719, 446.01434326171875, 404.67333984375, -318.4945983886719, 283.63916015625, 256.081787109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000556.npy"} +{"epoch": 0.8164464023494861, "step": 557, "batch_size": 64, "mean": 442.61669921875, "std": 711.7261352539062, "min": -1500.56591796875, "p10": -298.71486511230466, "median": 359.02040100097656, "p90": 1300.555981445313, "max": 2706.271484375, "pos_frac": 0.765625, "sample": [-1500.56591796875, 196.56187438964844, -1058.8525390625, 831.2073974609375, 373.2291564941406, 254.21697998046875, 131.34690856933594, 524.8873291015625, 697.4561767578125, 81.26513671875, 952.4544067382812, 1352.47509765625, 1057.05908203125, 66.4365234375, 159.29415893554688, 308.1005859375, 467.91986083984375, 949.8596801757812, -41.416316986083984, 87.61719512939453, 83.62831115722656, 1166.62255859375, 1648.8441162109375, 690.2171630859375, -4.879638671875, 538.1494140625, 43.28790283203125, -310.67669677734375, -293.5022277832031, 845.519775390625, 167.9549560546875, 675.769775390625, 1132.376708984375, 543.2548217773438, 220.27317810058594, 194.63722229003906, 439.7907409667969, -522.3713989257812, 895.0634155273438, 554.2045288085938, 807.4834594726562, -300.9488525390625, 542.5813598632812, 138.81495666503906, 90.13459777832031, -471.47796630859375, 692.3670654296875, 1706.6673583984375, -466.6527404785156, 2706.271484375, -48.83964920043945, 2262.79248046875, 1523.420654296875, 182.73880004882812, 1069.822021484375, -141.4720001220703, -291.9942626953125, 1485.5128173828125, -267.03240966796875, 1179.411376953125, -53.267295837402344, 469.0917663574219, 344.8116455078125, 568.516357421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000557.npy"} +{"epoch": 0.8179148311306902, "step": 558, "batch_size": 64, "mean": 584.4525146484375, "std": 631.5555419921875, "min": -1704.5823974609375, "p10": -116.14519042968747, "median": 504.88079833984375, "p90": 1356.765234375, "max": 1928.3779296875, "pos_frac": 0.8125, "sample": [394.177490234375, 1113.0860595703125, 207.9611053466797, 1371.136962890625, 180.45059204101562, 349.8731994628906, 575.9198608398438, 62.53718566894531, 845.1064453125, -371.6000671386719, -183.31295776367188, 1261.4736328125, 1288.3048095703125, 1063.43994140625, 461.18853759765625, 1106.2811279296875, 637.1328735351562, 392.6020812988281, 1683.9761962890625, 1720.0672607421875, -223.93301391601562, 361.62286376953125, 327.7200927734375, 1224.7132568359375, -61.49427032470703, 700.7255859375, 1323.231201171875, 416.76702880859375, 1431.42724609375, 539.1257934570312, 412.7117004394531, 562.6368408203125, 1014.8228149414062, 1279.95166015625, 1392.3548583984375, 1006.9008178710938, -1704.5823974609375, -92.67276000976562, 452.3456115722656, 173.1165313720703, 400.80816650390625, -0.5188884735107422, 126.28471374511719, 400.33709716796875, 492.7025146484375, -84.18362426757812, -222.1577911376953, 1928.3779296875, 1017.2117309570312, -0.9239883422851562, 770.427001953125, 1056.246337890625, 562.6340942382812, 517.05908203125, 1102.9063720703125, -642.8848266601562, 772.3023681640625, 371.9615173339844, 457.1256103515625, -126.20480346679688, 217.54876708984375, 1455.55615234375, 1087.600830078125, 1047.450439453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000558.npy"} +{"epoch": 0.8193832599118943, "step": 559, "batch_size": 64, "mean": 494.1765441894531, "std": 636.841796875, "min": -851.0772094726562, "p10": -87.81757354736324, "median": 364.3088836669922, "p90": 1261.2559814453125, "max": 2370.71435546875, "pos_frac": 0.8125, "sample": [973.0742797851562, 1244.920654296875, 623.634765625, 654.8351440429688, 71.64575958251953, 50.45611572265625, 140.78175354003906, 158.76808166503906, 1324.647705078125, 2370.71435546875, 1158.049072265625, 421.821044921875, 1129.567626953125, 13.686735153198242, 164.77291870117188, 766.02587890625, 1115.845703125, 1317.666259765625, -851.0772094726562, -442.8284912109375, 1268.2568359375, 306.463623046875, 190.14794921875, 402.70904541015625, 165.1835479736328, 1072.62841796875, -162.18289184570312, -103.34173583984375, 1123.0845947265625, 200.77410888671875, -11.050590515136719, 301.702880859375, 113.02152252197266, 51.147499084472656, 122.97892761230469, -51.59452819824219, 28.24466323852539, -43.873802185058594, 247.18528747558594, 211.8626708984375, 1923.066650390625, 551.5856323242188, 1163.341796875, 105.88028717041016, -727.2195434570312, 658.1229248046875, 633.45947265625, 602.9415283203125, 1135.4385986328125, -49.514793395996094, 496.2749328613281, 328.9513854980469, 399.6663818359375, 1148.26708984375, 1456.709716796875, 1915.252685546875, -462.6384582519531, 735.9534301757812, 1041.9569091796875, 464.1016845703125, -22.013967514038086, 95.85887145996094, -532.5374145507812, 724.0377197265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000559.npy"} +{"epoch": 0.8208516886930984, "step": 560, "batch_size": 64, "mean": 358.7589111328125, "std": 579.1507568359375, "min": -980.9002075195312, "p10": -247.44680633544922, "median": 296.016357421875, "p90": 1233.1477294921876, "max": 2054.414306640625, "pos_frac": 0.734375, "sample": [2054.414306640625, -371.6884765625, -132.89505004882812, 1135.986083984375, -443.3343811035156, 577.0216064453125, 67.03716278076172, -128.03070068359375, 68.336669921875, 416.4681701660156, 239.38726806640625, 140.19265747070312, 225.28863525390625, 554.9109497070312, 1520.011962890625, -249.53973388671875, 767.0536499023438, 1266.89111328125, 574.1310424804688, 1024.10546875, 742.003662109375, -31.33843994140625, 187.90945434570312, 1317.310546875, 890.0802612304688, -606.16650390625, 26.121158599853516, 1296.544921875, 427.876220703125, -165.39804077148438, 814.8439331054688, -242.5633087158203, 26.787822723388672, 547.3587036132812, 436.44134521484375, -524.44482421875, 81.77259063720703, 1408.7623291015625, 257.5299987792969, 535.4579467773438, 552.3282470703125, -145.97308349609375, 429.60205078125, 160.81393432617188, 685.9417114257812, 1256.766357421875, -89.98484802246094, -17.255149841308594, -103.72877502441406, 1178.03759765625, 567.6435546875, 189.16116333007812, 530.1441650390625, 233.66050720214844, 368.75634765625, 334.5027160644531, -235.08404541015625, 510.942626953125, 772.140380859375, 610.7410278320312, -725.61279296875, 99.02074432373047, -980.9002075195312, 46.26788330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000560.npy"} +{"epoch": 0.8223201174743024, "step": 561, "batch_size": 64, "mean": 298.46636962890625, "std": 651.0885009765625, "min": -2304.068603515625, "p10": -287.59631347656244, "median": 230.6715850830078, "p90": 1063.9386779785161, "max": 1762.72705078125, "pos_frac": 0.703125, "sample": [557.5147094726562, 481.82000732421875, 100.80794525146484, -7.256082534790039, -1091.104736328125, 1289.56396484375, 804.8489379882812, 81.53660583496094, 665.592041015625, 1108.841796875, 107.32904052734375, 513.6554565429688, 1762.72705078125, -135.6193389892578, 179.81741333007812, 178.44497680664062, 136.62445068359375, -251.02639770507812, 373.87860107421875, 454.6025390625, 1314.50390625, 168.35562133789062, 590.6078491210938, 320.94573974609375, -244.20745849609375, 119.80052185058594, 873.8148193359375, 437.2054138183594, -64.84656524658203, 1718.292236328125, 782.4708251953125, 1264.76220703125, -0.05112457275390625, 248.57818603515625, 923.8243408203125, -575.4869995117188, -303.2691345214844, 556.2127685546875, 20.516756057739258, 675.4916381835938, 131.4361114501953, -393.5229187011719, -87.65039825439453, 822.5487670898438, 520.97216796875, -179.09576416015625, 138.06463623046875, 797.1316528320312, 959.1647338867188, 212.76498413085938, -2304.068603515625, 1752.82958984375, 2.314960479736328, -122.36309051513672, 401.64599609375, -7.27264404296875, 414.59173583984375, -29.218122482299805, 581.5244750976562, 298.98492431640625, -717.3515625, 295.3641662597656, -355.5617980957031, -171.50717163085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000561.npy"} +{"epoch": 0.8237885462555066, "step": 562, "batch_size": 64, "mean": 423.41015625, "std": 642.0966186523438, "min": -1266.3299560546875, "p10": -220.91692962646485, "median": 351.2316589355469, "p90": 1013.2212829589844, "max": 2740.4228515625, "pos_frac": 0.765625, "sample": [605.3276977539062, 2740.4228515625, 1392.877685546875, 528.4603271484375, 132.18341064453125, 446.8740539550781, 743.0608520507812, 244.1633758544922, 1003.026123046875, 792.6527709960938, 818.3355102539062, 977.1119384765625, 264.56671142578125, 473.5585021972656, 1064.44287109375, -157.47642517089844, 243.7186279296875, 884.6397705078125, 213.09457397460938, 424.0619201660156, 300.6121826171875, 558.329833984375, 445.45892333984375, -338.4030456542969, 26.441062927246094, 796.5833129882812, 968.6193237304688, -50.45013427734375, 972.316650390625, 270.75244140625, -275.11663818359375, -180.1318359375, -58.47511291503906, 721.87255859375, -60.41187286376953, 2597.265380859375, -223.65667724609375, 467.769287109375, 592.0076904296875, 991.7333374023438, 801.877685546875, 198.0883331298828, 638.818115234375, 240.13958740234375, -1266.3299560546875, 240.0096435546875, -214.52418518066406, 852.0423583984375, 401.85113525390625, 1017.5906372070312, -96.83648681640625, -95.22892761230469, 172.63229370117188, 57.99894332885742, 663.966064453125, 277.55706787109375, -709.2241821289062, 55.385154724121094, 171.58641052246094, -540.072998046875, 113.51570129394531, -460.217529296875, 1098.5577392578125, 1120.84716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000562.npy"} +{"epoch": 0.8252569750367107, "step": 563, "batch_size": 64, "mean": 314.54473876953125, "std": 746.2301025390625, "min": -2902.46728515625, "p10": -497.60447387695314, "median": 388.7348327636719, "p90": 1023.6092407226564, "max": 1780.342041015625, "pos_frac": 0.765625, "sample": [205.97024536132812, -2902.46728515625, -912.021484375, 445.792724609375, 774.7800903320312, 148.568359375, 1413.0140380859375, 11.549346923828125, 319.17645263671875, 678.3857421875, 1149.1429443359375, 166.21954345703125, -487.2375183105469, 966.8369140625, 824.2557983398438, 784.987548828125, 109.24064636230469, 1743.292724609375, 704.1101684570312, -158.33714294433594, 570.623046875, 101.82538604736328, -187.74032592773438, 321.461181640625, 907.7929077148438, -5.090282440185547, 719.342041015625, 336.607177734375, 1036.6158447265625, 921.965087890625, 121.02114868164062, 747.215576171875, -1659.9544677734375, 535.1591186523438, -96.72010803222656, 146.03961181640625, -13.38717269897461, 610.7096557617188, 558.064453125, 377.59588623046875, 787.1697387695312, 594.6705932617188, 1291.4853515625, 291.3057861328125, 993.260498046875, -177.9097900390625, -1240.4713134765625, 1780.342041015625, -516.8857421875, 1343.9990234375, 117.02835083007812, 681.0975952148438, 76.15735626220703, -664.30712890625, 694.5965576171875, -502.0474548339844, 452.2529296875, -300.37811279296875, 105.24745178222656, 558.95751953125, 399.873779296875, 468.8301696777344, 323.5713806152344, 538.611572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000563.npy"} +{"epoch": 0.8267254038179148, "step": 564, "batch_size": 64, "mean": 402.17816162109375, "std": 532.6932373046875, "min": -541.505859375, "p10": -215.00467834472653, "median": 321.53375244140625, "p90": 1136.3365966796878, "max": 2038.6383056640625, "pos_frac": 0.796875, "sample": [320.646240234375, 475.5238952636719, 760.3238525390625, 142.5481414794922, 672.867431640625, 597.818115234375, 661.8057861328125, -367.1327209472656, 307.6490173339844, -541.505859375, 578.861572265625, 703.9596557617188, -311.26568603515625, 366.3273010253906, 1059.92431640625, 32.03851318359375, -166.93885803222656, 1264.8052978515625, 1169.084716796875, 484.93096923828125, 332.4314270019531, 233.7798614501953, 210.60472106933594, -186.92266845703125, -378.08062744140625, 290.22698974609375, -227.03982543945312, 218.15408325195312, 17.216644287109375, -367.71600341796875, 331.3355407714844, 1944.883544921875, 686.5574340820312, -104.90087127685547, 28.786352157592773, 356.6761474609375, 146.51356506347656, 129.87002563476562, 451.2376708984375, -18.349590301513672, 535.6651000976562, 2038.6383056640625, 63.0213623046875, 570.436279296875, 716.1751708984375, 753.97119140625, 1470.764892578125, 803.9212036132812, 1316.8668212890625, 322.4212646484375, -6.995246887207031, 702.54150390625, 62.275978088378906, 69.48062896728516, 169.35023498535156, 275.19769287109375, 204.43739318847656, 931.109619140625, 326.8551940917969, -88.61293029785156, 1350.695556640625, -312.397216796875, 914.0177612304688, 242.02874755859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000564.npy"} +{"epoch": 0.8281938325991189, "step": 565, "batch_size": 64, "mean": 422.9785461425781, "std": 738.9156494140625, "min": -1230.4656982421875, "p10": -436.9650177001952, "median": 282.35890197753906, "p90": 1232.7968139648438, "max": 2629.958740234375, "pos_frac": 0.65625, "sample": [13.850322723388672, 1148.6793212890625, 1342.493408203125, -1230.4656982421875, 215.73760986328125, 953.849609375, 2054.228515625, -127.09126281738281, -136.47291564941406, -302.76092529296875, 557.678466796875, 1168.587646484375, -272.7705383300781, -970.93212890625, 249.17242431640625, -494.1358642578125, -542.5084228515625, -109.64105224609375, -99.1165771484375, 477.9657897949219, 1101.7366943359375, -5.259422302246094, 267.0816650390625, 1566.9678955078125, -510.13128662109375, -29.88732147216797, 216.54180908203125, 285.9297790527344, -583.167724609375, 997.5029907226562, 622.1600341796875, 291.12225341796875, 644.0078125, 864.184814453125, 817.8106079101562, -1.8854522705078125, 921.8063354492188, 620.5136108398438, -303.5663757324219, -72.35662078857422, -73.84312438964844, 927.1455688476562, 762.1226806640625, 1702.9814453125, 854.8056030273438, 2629.958740234375, 521.19970703125, 2242.622802734375, 1226.4261474609375, -501.7767028808594, -118.0704345703125, 15.958938598632812, 993.1136474609375, 480.2134094238281, 798.12548828125, 284.1854553222656, 1026.425537109375, 280.5323486328125, 79.3284912109375, -6.804039001464844, -264.689697265625, 251.2665557861328, 116.41178894042969, 1235.527099609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000565.npy"} +{"epoch": 0.8296622613803231, "step": 566, "batch_size": 64, "mean": 470.28839111328125, "std": 739.6570434570312, "min": -1315.94921875, "p10": -363.9959411621094, "median": 541.4591674804688, "p90": 1466.3245605468758, "max": 2128.865966796875, "pos_frac": 0.703125, "sample": [-216.49371337890625, -749.411865234375, 607.168212890625, 2128.865966796875, 518.814453125, -185.71755981445312, 610.759765625, 154.21609497070312, -946.0810546875, 498.9757385253906, 606.9005126953125, -204.51646423339844, -24.71356201171875, 635.6631469726562, 875.4823608398438, 403.15478515625, 699.910400390625, 134.02513122558594, 20.477436065673828, -1315.94921875, 1021.4270629882812, 638.2470703125, 508.4839172363281, 22.05572509765625, 1246.46728515625, -474.3319396972656, 564.1038818359375, 1119.5523681640625, -100.45573425292969, 1118.744873046875, 790.3947143554688, 992.9783935546875, 716.7716064453125, 105.40359497070312, 1062.906494140625, -311.8608093261719, -98.47315979003906, 1944.6888427734375, 978.5700073242188, -203.91700744628906, -329.76904296875, 791.27392578125, 573.8465576171875, 1539.8829345703125, 1294.6883544921875, 1797.9102783203125, 2108.81201171875, 692.8299560546875, -804.9168090820312, 347.6678466796875, -617.272705078125, 1574.94140625, 765.8756103515625, 1592.249755859375, -70.73601531982422, -367.80755615234375, 1055.162353515625, 370.72467041015625, 1072.4791259765625, -355.1021728515625, -0.05834197998046875, 39.1387939453125, 671.3328247070312, 462.0156555175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000566.npy"} +{"epoch": 0.8311306901615272, "step": 567, "batch_size": 64, "mean": 362.74383544921875, "std": 489.8588562011719, "min": -1153.1722412109375, "p10": -68.50524978637694, "median": 287.56842041015625, "p90": 947.5468322753907, "max": 1829.8648681640625, "pos_frac": 0.84375, "sample": [135.987548828125, -12.952432632446289, 1259.6029052734375, 626.323974609375, 386.466552734375, 679.298583984375, 166.72882080078125, -77.51029205322266, 1094.29150390625, 164.75204467773438, 952.3333740234375, 38.37140655517578, 1099.657958984375, 290.09619140625, 201.30862426757812, 276.44842529296875, 1232.413330078125, 608.271728515625, 613.8360595703125, 63.78923416137695, 617.1345825195312, 196.63330078125, -300.0799255371094, 590.8043823242188, 273.404052734375, 736.0543823242188, 670.2024536132812, 1544.48974609375, -47.49348449707031, 645.1934814453125, 65.51177215576172, 293.29669189453125, 331.16387939453125, -1153.1722412109375, 256.3824462890625, -25.121173858642578, 136.95703125, 704.047607421875, 1829.8648681640625, 801.3268432617188, 88.17831420898438, -893.5870971679688, 244.43011474609375, 180.91055297851562, 205.68214416503906, 936.3782348632812, 292.8968505859375, 256.674560546875, 461.33544921875, 379.2487487792969, 182.97691345214844, 790.1992797851562, 17.47472381591797, 501.23199462890625, -132.77540588378906, 316.01434326171875, 142.91622924804688, 285.0406494140625, -157.12197875976562, 198.69140625, 389.3209228515625, 528.4872436523438, -503.99005126953125, 538.8761596679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000567.npy"} +{"epoch": 0.8325991189427313, "step": 568, "batch_size": 64, "mean": 480.6161804199219, "std": 529.9495239257812, "min": -777.3663940429688, "p10": -86.28858451843259, "median": 380.53868103027344, "p90": 1147.0157104492187, "max": 1883.54931640625, "pos_frac": 0.875, "sample": [694.718994140625, -642.9443969726562, 1232.220703125, 850.4682006835938, 471.5050048828125, 351.7468566894531, 616.2574462890625, 772.5867919921875, 392.7652282714844, -777.3663940429688, -309.39208984375, -168.6923370361328, 570.5006103515625, 1796.8154296875, 230.65020751953125, 495.8903503417969, 228.62872314453125, 165.6788330078125, 242.9330596923828, -101.21583557128906, 572.7412109375, 338.5025939941406, 924.4664306640625, 990.0422973632812, 580.9188232421875, 349.7367248535156, 83.11263275146484, 1883.54931640625, 294.8907165527344, 359.5187683105469, 787.4771728515625, 708.656982421875, 1147.3599853515625, 522.7483520507812, 1501.5611572265625, 374.4673156738281, 292.646240234375, 607.2400512695312, 311.4490661621094, 931.9818115234375, 980.7953491210938, 25.17380142211914, 771.2890014648438, 891.5504760742188, 73.11491394042969, 311.3834228515625, 160.19271850585938, -51.45833206176758, 371.7655334472656, 1146.21240234375, 101.41783905029297, 1341.205810546875, 386.61004638671875, -607.5982666015625, 232.5076904296875, -380.56787109375, 1326.42724609375, 1086.7767333984375, 242.27230834960938, 576.035400390625, 198.67999267578125, 728.0679931640625, 67.20623016357422, 103.55155944824219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000568.npy"} +{"epoch": 0.8340675477239354, "step": 569, "batch_size": 64, "mean": 401.8387451171875, "std": 710.9088134765625, "min": -2199.04296875, "p10": -302.5207916259765, "median": 320.5335998535156, "p90": 1251.5045776367188, "max": 2185.700927734375, "pos_frac": 0.78125, "sample": [-610.7169189453125, 108.94924926757812, 1719.2646484375, 185.39996337890625, 348.82080078125, -232.0247039794922, 704.406982421875, 143.27584838867188, -496.65557861328125, 136.6144561767578, 800.8129272460938, 1240.25732421875, 543.5043334960938, 489.1031494140625, 367.7835693359375, -569.875732421875, 248.52586364746094, 876.3985595703125, -222.39459228515625, -46.51268768310547, -1197.010009765625, 1631.2794189453125, 99.75576782226562, -242.96099853515625, 38.195037841796875, 706.876220703125, 1210.9227294921875, 18.332382202148438, -2.948822021484375, 92.11041259765625, 1359.5042724609375, 885.17431640625, 686.7879638671875, -22.421966552734375, 1386.5443115234375, 394.1775817871094, 121.03955078125, 370.4903259277344, 810.9324951171875, -2199.04296875, 598.9578857421875, 176.96946716308594, 21.187498092651367, 1183.91796875, 993.1278076171875, 136.9035186767578, 910.1552124023438, 229.00582885742188, -328.0464172363281, 804.4627685546875, -330.0479736328125, 1857.1644287109375, 81.45995330810547, 544.8828735351562, 1073.9603271484375, -112.36707305908203, 137.32064819335938, 774.50830078125, 292.24639892578125, 96.63983154296875, 543.6361694335938, 706.9324951171875, 1256.3248291015625, 2185.700927734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000569.npy"} +{"epoch": 0.8355359765051396, "step": 570, "batch_size": 64, "mean": 371.16790771484375, "std": 598.2675170898438, "min": -910.0357666015625, "p10": -229.68846740722654, "median": 290.1255645751953, "p90": 1206.9823486328128, "max": 2524.171875, "pos_frac": 0.78125, "sample": [312.50445556640625, 949.9613647460938, 403.81158447265625, 1602.9810791015625, 2524.171875, 109.45790100097656, 768.0586547851562, 1367.74072265625, 1604.6639404296875, 318.38543701171875, 372.0647888183594, 991.988525390625, 269.6423034667969, 461.1092529296875, 12.275777816772461, 397.96112060546875, 923.5791015625, 391.6935119628906, 647.0266723632812, -218.33395385742188, 720.6585083007812, -259.6634521484375, -536.6094360351562, -739.9736328125, 115.72870635986328, 846.00048828125, -585.1284790039062, -194.93392944335938, 319.84368896484375, 379.71002197265625, 492.1638488769531, 253.05503845214844, -234.5546875, -520.330810546875, 228.00222778320312, 211.2630157470703, 242.92645263671875, 448.93487548828125, 120.94322967529297, 1235.855224609375, 529.0845947265625, -141.61419677734375, 239.62338256835938, 145.51239013671875, 1337.862548828125, 286.65814208984375, 293.5929870605469, 707.3372192382812, 1139.6123046875, 733.0126953125, 2.391033172607422, 188.70046997070312, 564.3662109375, 399.23992919921875, -56.81696319580078, -910.0357666015625, 212.212890625, 95.68122863769531, 244.7167510986328, -122.64727783203125, -211.68365478515625, 1360.451171875, 180.26016235351562, -217.40760803222656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000570.npy"} +{"epoch": 0.8370044052863436, "step": 571, "batch_size": 64, "mean": 444.7485656738281, "std": 692.2005004882812, "min": -1345.8837890625, "p10": -401.4549560546875, "median": 377.2711486816406, "p90": 1362.0130493164065, "max": 2189.773193359375, "pos_frac": 0.765625, "sample": [587.9260864257812, 829.3096923828125, 84.95271301269531, 897.768798828125, -159.92410278320312, 590.9507446289062, 73.4755859375, 982.3834228515625, 318.1473388671875, 1127.7403564453125, 2000.5791015625, -408.7386169433594, -230.63986206054688, 1781.8118896484375, 166.0399627685547, 616.126953125, 826.2376708984375, 763.37939453125, 372.7094421386719, 809.1605834960938, 2189.773193359375, 1764.01123046875, 751.4920043945312, 1225.05126953125, -384.4597473144531, -2.451051712036133, 698.1714477539062, 312.3800048828125, 1516.560791015625, 1173.701171875, 268.8236083984375, -75.85614013671875, 381.8328552246094, 1678.6435546875, 853.3677978515625, -64.53147888183594, -618.7012329101562, 148.71331787109375, 1285.4619140625, 534.1063232421875, 863.0289916992188, -479.0683288574219, 1394.8206787109375, 567.0480346679688, 408.9468994140625, 127.01233673095703, 593.6058959960938, 163.9807891845703, 623.9616088867188, 18.806930541992188, -567.25390625, -1345.8837890625, 333.677978515625, 53.690216064453125, -664.275390625, 364.0309753417969, 240.44029235839844, 480.3804626464844, -852.92724609375, 45.91251754760742, -123.69857788085938, 585.89892578125, -62.613494873046875, 28.897705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000571.npy"} +{"epoch": 0.8384728340675477, "step": 572, "batch_size": 64, "mean": 451.42071533203125, "std": 780.9384765625, "min": -1384.577880859375, "p10": -347.45179748535156, "median": 343.24412536621094, "p90": 1468.7371093750005, "max": 3164.630126953125, "pos_frac": 0.6875, "sample": [-348.4225769042969, -170.0191192626953, -363.38519287109375, 525.216796875, 1958.0037841796875, -67.59088897705078, 61.029083251953125, 24.57697296142578, -37.58938217163086, 196.76443481445312, -45.438087463378906, 1337.636474609375, 952.586181640625, 361.5933837890625, 696.8151245117188, 1117.673828125, 1050.400390625, 1161.3560791015625, 1532.73193359375, 646.3887329101562, 268.5621337890625, 748.5247802734375, -70.44490051269531, 1064.2635498046875, -1060.8004150390625, 77.1836166381836, 301.07855224609375, -319.347900390625, -258.0608825683594, 841.3223266601562, -538.7421875, 537.1812133789062, 488.6915588378906, -7.711517333984375, 184.85568237304688, 3164.630126953125, 97.5056381225586, 375.5404357910156, -250.0238800048828, -654.8810424804688, 1524.923095703125, 60.20255661010742, -345.1866455078125, 603.282470703125, -289.0336608886719, -380.9895324707031, 1092.502197265625, 866.5481567382812, 665.4802856445312, -13.213645935058594, 855.60693359375, 317.71881103515625, 324.8948669433594, 2064.577392578125, 204.4076385498047, 379.4264221191406, 665.9525756835938, 2130.970703125, -23.390380859375, -1384.577880859375, 609.2246704101562, 1111.763916015625, 1669.2279052734375, 600.9520874023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000572.npy"} +{"epoch": 0.8399412628487518, "step": 573, "batch_size": 64, "mean": 407.0157775878906, "std": 575.2774047851562, "min": -563.1766967773438, "p10": -145.5139625549316, "median": 311.3287811279297, "p90": 965.4431518554688, "max": 2611.062255859375, "pos_frac": 0.796875, "sample": [675.3359985351562, -79.09225463867188, 15.259908676147461, -7.45892333984375, -6.4024810791015625, 2611.062255859375, 942.3621215820312, -105.49834442138672, 90.72498321533203, 107.93315124511719, 471.1823425292969, 237.65724182128906, -563.1766967773438, 2156.11572265625, 424.65802001953125, 479.93865966796875, 1793.74609375, -491.13739013671875, 45.10150909423828, -162.66351318359375, -506.0020751953125, 1126.4608154296875, 740.7194213867188, 555.0009155273438, 30.64380645751953, 259.36334228515625, 876.9560546875, 975.3350219726562, 158.83059692382812, 1161.58642578125, -298.1168212890625, 935.763427734375, 327.3995056152344, 543.7359008789062, 666.5552978515625, -349.0950622558594, 325.84161376953125, 107.880859375, 822.6781005859375, 48.74927520751953, 934.5894775390625, 296.8159484863281, 213.4244384765625, -98.55345916748047, 418.71502685546875, 70.90986633300781, 337.331298828125, 78.60673522949219, 867.62841796875, -267.43280029296875, 467.79443359375, 207.30990600585938, 196.70684814453125, 707.49072265625, 392.98150634765625, 88.96483612060547, -97.9205551147461, 860.750732421875, 563.816162109375, 602.5526123046875, 193.7130126953125, 706.998291015625, 1037.9185791015625, 121.96446228027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000573.npy"} +{"epoch": 0.8414096916299559, "step": 574, "batch_size": 64, "mean": 444.5045166015625, "std": 608.9181518554688, "min": -968.09326171875, "p10": -296.0772003173828, "median": 409.3670196533203, "p90": 1324.052221679688, "max": 2041.982666015625, "pos_frac": 0.796875, "sample": [285.5440368652344, 766.2100830078125, -183.29025268554688, -245.7315673828125, 1177.69873046875, 43.57434844970703, 119.140625, 1386.775146484375, 1007.3165893554688, 800.7254028320312, -46.54621124267578, 1580.9737548828125, 968.7435302734375, 796.4494018554688, 137.72512817382812, 737.7481079101562, 80.06133270263672, 428.09552001953125, -689.7743530273438, 2041.982666015625, 631.7413940429688, 652.2659301757812, 529.3005981445312, 630.5262451171875, 205.65118408203125, 711.9807739257812, 390.6385192871094, -428.000244140625, 149.5436248779297, -200.08404541015625, 165.81234741210938, -434.6647033691406, 203.818603515625, 1576.6275634765625, -86.08380889892578, 735.4513549804688, -489.5278015136719, 841.3292236328125, 232.8878631591797, 484.8493347167969, 990.1863403320312, -333.912841796875, 724.0077514648438, 816.079833984375, 683.412109375, 476.6111145019531, 481.8417663574219, 19.045883178710938, -968.09326171875, -317.6539001464844, 389.7917785644531, 101.88170623779297, 7.017107009887695, 1536.263916015625, -46.242706298828125, 922.84521484375, 368.8753967285156, 1514.4058837890625, 231.00428771972656, 669.3533325195312, 114.8642807006836, 1821.5625, 542.1987915039062, 5.455942153930664], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000574.npy"} +{"epoch": 0.8428781204111601, "step": 575, "batch_size": 64, "mean": 405.97833251953125, "std": 682.9534912109375, "min": -1260.9619140625, "p10": -308.9338287353515, "median": 370.7556610107422, "p90": 1301.159497070313, "max": 2256.68798828125, "pos_frac": 0.765625, "sample": [1373.368408203125, 249.47499084472656, 891.6384887695312, 359.09771728515625, 592.7094116210938, -834.4427490234375, 540.7056884765625, 547.7719116210938, 248.81161499023438, 142.1016387939453, 76.46302032470703, -14.966041564941406, 397.2253112792969, 1125.427734375, 597.2666625976562, 360.8958435058594, -134.8145751953125, 827.7413940429688, 267.1456298828125, 329.39990234375, 369.9786376953125, 21.0953369140625, 1372.4312744140625, 660.2501831054688, 578.888671875, 314.6962585449219, -1260.9619140625, 254.94613647460938, -700.6337890625, 477.9896240234375, -942.4213256835938, 1216.296630859375, 908.5072021484375, 1337.529296875, 265.45574951171875, -140.6946258544922, -254.7169189453125, -66.27254486083984, 1399.4752197265625, 990.2941284179688, -201.08502197265625, 257.2874755859375, -332.1696472167969, 923.3421630859375, 1204.7730712890625, -900.3179931640625, 529.7940063476562, 542.2698974609375, 813.5052490234375, 2256.68798828125, 371.5326843261719, 871.35595703125, 434.4274597167969, -63.5008544921875, 1504.8892822265625, 168.1148681640625, 655.2762451171875, -1138.7254638671875, 2003.611083984375, 40.51484298706055, -147.03558349609375, 720.35498046875, 172.87025451660156, 549.6846923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000575.npy"} +{"epoch": 0.8443465491923642, "step": 576, "batch_size": 64, "mean": 310.65863037109375, "std": 573.9830322265625, "min": -938.3682250976562, "p10": -360.83446350097654, "median": 245.25313568115234, "p90": 956.8374572753908, "max": 1762.009521484375, "pos_frac": 0.75, "sample": [259.529296875, 930.8665771484375, -139.40809631347656, 63.62924575805664, -371.54376220703125, -189.6886749267578, -207.9751434326172, 1666.7564697265625, 924.5786743164062, 190.44464111328125, 283.4331359863281, 1631.832275390625, 105.93682861328125, 136.6690216064453, 742.0042114257812, 429.675048828125, 366.8497009277344, -819.756591796875, 317.62322998046875, 537.0697631835938, 541.1124267578125, 1145.8909912109375, 648.320068359375, 889.1431884765625, 46.04069137573242, 1389.212158203125, -117.77458190917969, -402.42626953125, -311.7866516113281, 749.7864379882812, -203.0022430419922, 237.76844787597656, 86.16293334960938, 38.478363037109375, 874.1748046875, 598.26318359375, 1762.009521484375, 320.57659912109375, 281.241943359375, 732.3173828125, 63.808738708496094, 786.6158447265625, 252.73782348632812, 410.71142578125, 169.88876342773438, -794.7338256835938, -57.6627197265625, 675.260009765625, -589.7789306640625, 637.767822265625, 369.9713439941406, 1030.4432373046875, 967.9678344726562, 872.7858276367188, -938.3682250976562, -335.8460998535156, 161.072021484375, 226.91018676757812, 235.93203735351562, 174.6981201171875, -430.34820556640625, 86.42135620117188, -313.56292724609375, 55.424781799316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000576.npy"} +{"epoch": 0.8458149779735683, "step": 577, "batch_size": 64, "mean": 407.04620361328125, "std": 637.924072265625, "min": -1516.472412109375, "p10": -98.43039703369138, "median": 349.9341125488281, "p90": 1304.7466552734377, "max": 2463.2001953125, "pos_frac": 0.84375, "sample": [-1516.472412109375, 2463.2001953125, 1243.7010498046875, 1592.6502685546875, 167.31198120117188, 35.87834167480469, 344.6399841308594, 151.5131378173828, 159.39501953125, -63.536834716796875, 726.425048828125, 355.2282409667969, 534.9976196289062, 454.06304931640625, 480.6741943359375, 754.7747192382812, 970.772705078125, 1391.7969970703125, -520.7325439453125, 618.2680053710938, 175.3594207763672, 13.499549865722656, -886.2220458984375, 262.65966796875, 259.202392578125, 731.8455810546875, 142.30194091796875, 245.92710876464844, 482.1097717285156, 407.19140625, 154.86434936523438, 456.5251770019531, 253.32052612304688, -54.83623504638672, 1754.8360595703125, 52.498443603515625, 425.2513427734375, -566.439697265625, 229.71693420410156, -406.5238037109375, 381.2401428222656, 220.35183715820312, 333.58795166015625, 404.88800048828125, -26.56781768798828, 618.6726684570312, 75.11894226074219, 636.340087890625, 761.135498046875, 998.161865234375, 106.45127868652344, 369.8142395019531, 1330.9090576171875, -113.38478088378906, 1759.1558837890625, 503.44287109375, 441.2179870605469, -638.887451171875, 74.32106018066406, 1598.7572021484375, 32.71714782714844, 920.4730834960938, 459.4644470214844, 325.941162109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000577.npy"} +{"epoch": 0.8472834067547724, "step": 578, "batch_size": 64, "mean": 436.58270263671875, "std": 554.662109375, "min": -836.02392578125, "p10": -162.22628631591795, "median": 395.93190002441406, "p90": 1144.9868469238288, "max": 1996.439208984375, "pos_frac": 0.796875, "sample": [1996.439208984375, 191.99822998046875, 737.508544921875, 99.8277587890625, 585.0419311523438, 250.73980712890625, 1338.4541015625, 421.4786071777344, 207.49295043945312, -177.8302459716797, 94.9090576171875, 849.0436401367188, 496.023681640625, 239.07925415039062, 135.05581665039062, -510.2876892089844, 184.16416931152344, 19.228355407714844, 1847.7459716796875, 765.8177490234375, 726.2606201171875, 400.39947509765625, 997.5986938476562, -113.14408874511719, -55.21668243408203, 883.1415405273438, 834.8723754882812, -97.74051666259766, -336.9381408691406, 126.3759536743164, -24.151779174804688, -425.04443359375, 437.0281982421875, 391.4643249511719, 117.09931945800781, 549.1708984375, -239.8521728515625, 302.43359375, -836.02392578125, -85.49486541748047, 529.532958984375, 700.7157592773438, 537.9530029296875, 470.8050842285156, 1281.9564208984375, 961.48681640625, 340.07415771484375, 97.73885345458984, 784.08544921875, 188.4135284423828, 692.779296875, 797.5482177734375, 74.86131286621094, 1208.1531982421875, 849.310791015625, 1717.0240478515625, -125.81704711914062, 205.82432556152344, 1385.47119140625, 971.4735107421875, 563.8487548828125, -257.93768310546875, 79.19856262207031, 562.625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000578.npy"} +{"epoch": 0.8487518355359766, "step": 579, "batch_size": 64, "mean": 266.721923828125, "std": 609.9057006835938, "min": -1755.6685791015625, "p10": -448.79501342773426, "median": 285.3831024169922, "p90": 986.9016235351564, "max": 2150.197265625, "pos_frac": 0.734375, "sample": [8.060508728027344, -179.6111297607422, -1755.6685791015625, 606.5274047851562, -67.46646118164062, 107.10647583007812, 844.0319213867188, 359.2262268066406, 713.5232543945312, 416.2561950683594, 4.142509460449219, -104.92558288574219, -791.47705078125, 418.69781494140625, 186.58010864257812, 638.6441650390625, 242.3319549560547, 561.2669067382812, 1121.9180908203125, 208.58091735839844, 1007.4284057617188, 93.88457489013672, 353.1527404785156, 504.31512451171875, 16.860708236694336, 399.80926513671875, 346.9852294921875, 74.69376373291016, 651.9195556640625, 240.1967010498047, 489.23895263671875, -490.23553466796875, 616.73486328125, 657.60888671875, 2150.197265625, 1492.32568359375, 326.7176513671875, -680.0006103515625, -27.26883316040039, 567.9925537109375, 1259.2515869140625, -251.09152221679688, 756.8429565429688, -352.1004638671875, 1326.8330078125, 13.636579513549805, -22.892047882080078, -221.40675354003906, -614.54541015625, 264.0550537109375, 939.0057983398438, 503.6801452636719, -57.09407043457031, 592.4789428710938, 346.2870178222656, -890.3836669921875, 1044.491455078125, 20.724485397338867, 261.9022521972656, -857.89013671875, 167.2234344482422, -176.1597900390625, 306.7111511230469, 380.33990478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000579.npy"} +{"epoch": 0.8502202643171806, "step": 580, "batch_size": 64, "mean": 508.2029724121094, "std": 704.0264282226562, "min": -1174.638916015625, "p10": -72.43252487182616, "median": 345.8044738769531, "p90": 1343.6454589843752, "max": 2943.6806640625, "pos_frac": 0.8125, "sample": [-191.7686004638672, 1016.9036865234375, 276.60986328125, 541.5423583984375, -129.36346435546875, 109.30512237548828, 190.6844940185547, 2099.724365234375, 449.4093933105469, 280.6848449707031, 483.6722412109375, -37.925537109375, 2943.6806640625, 220.54067993164062, 63.62642288208008, 680.410888671875, 377.89886474609375, 140.9762420654297, 844.9114379882812, 493.32415771484375, 236.9276580810547, 1082.5751953125, 1094.1368408203125, 535.9326171875, 511.9071350097656, 2.9080886840820312, 1159.3057861328125, 1277.2327880859375, 577.3081665039062, 145.11941528320312, 45.314720153808594, 160.21876525878906, 111.99007415771484, 101.10417175292969, 792.9674072265625, 1376.8382568359375, 53.115570068359375, -67.76493072509766, 922.83251953125, 774.6222534179688, 1909.7237548828125, 37.82118225097656, -1174.638916015625, 1081.14697265625, -35.13105010986328, 822.1287231445312, -456.9097595214844, -93.26205444335938, 645.6036987304688, 2852.97314453125, -3.7021007537841797, -74.43292236328125, 1411.15087890625, 150.14266967773438, 362.6733703613281, 366.0122985839844, -56.143653869628906, 328.9355773925781, 25.4437198638916, -326.8421936035156, 375.036865234375, 957.9340209960938, 1372.1080322265625, 297.7771911621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000580.npy"} +{"epoch": 0.8516886930983847, "step": 581, "batch_size": 64, "mean": 345.31573486328125, "std": 627.6573486328125, "min": -846.57763671875, "p10": -476.9214172363281, "median": 354.0663604736328, "p90": 997.1770141601564, "max": 2955.347412109375, "pos_frac": 0.75, "sample": [192.82601928710938, 148.32469177246094, 169.37576293945312, -128.64671325683594, 380.57427978515625, 418.01593017578125, -432.9635009765625, 118.28838348388672, 958.0179443359375, -598.397705078125, 742.5160522460938, 716.7891845703125, 517.5882568359375, 343.23199462890625, 618.4339599609375, -482.9017639160156, 1365.674072265625, 887.983154296875, 485.49652099609375, 1276.5758056640625, -213.7474822998047, 636.4780883789062, 62.7529296875, 589.9141235351562, 546.8595581054688, 519.0469970703125, 278.7945251464844, 203.24090576171875, 93.1502685546875, 1013.95947265625, -282.43927001953125, 2955.347412109375, -35.82034683227539, 1569.440185546875, 264.15625, 486.260986328125, 848.599853515625, -151.4739227294922, 69.39258575439453, 479.50128173828125, 330.74859619140625, 96.46493530273438, -83.5111083984375, -305.212890625, 1112.7293701171875, 244.78810119628906, 650.3064575195312, 895.8655395507812, 1333.9276123046875, 625.0048828125, -583.43017578125, -846.57763671875, 28.24422836303711, -561.7113037109375, 626.7789306640625, 400.34783935546875, -477.4913330078125, -798.1514892578125, 65.23687744140625, -475.59161376953125, 364.9007263183594, 792.48388671875, 378.11480712890625, 655.7261352539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000581.npy"} +{"epoch": 0.8531571218795888, "step": 582, "batch_size": 64, "mean": 401.499267578125, "std": 574.64404296875, "min": -1132.9083251953125, "p10": -239.68021392822266, "median": 351.0791778564453, "p90": 1129.4213378906252, "max": 1830.8555908203125, "pos_frac": 0.734375, "sample": [-1132.9083251953125, 238.81988525390625, 1244.8531494140625, 354.2232666015625, 606.7293701171875, -233.408935546875, 292.1761169433594, 471.031005859375, 380.732177734375, -42.649417877197266, 357.96746826171875, 80.95806121826172, -42.91877746582031, 800.0831298828125, 968.9978637695312, 1510.56494140625, -30.54957389831543, -7.5203704833984375, -283.4476318359375, 1151.6220703125, 388.29595947265625, 95.35316467285156, 604.8182983398438, -626.4608154296875, -265.11566162109375, 948.5892333984375, 215.203125, 1072.267822265625, 1519.219970703125, 1830.8555908203125, 894.3765869140625, 398.6197204589844, 1183.873779296875, -45.569862365722656, -381.10540771484375, 239.98770141601562, 55.451393127441406, 185.24635314941406, 651.805908203125, 636.3229370117188, 347.9350891113281, -242.36790466308594, 914.9826049804688, 1056.2567138671875, -15.947372436523438, 1077.61962890625, -31.381568908691406, 769.0408325195312, -190.1840057373047, 243.37586975097656, 1.9030952453613281, 816.3033447265625, 1074.1494140625, 358.24212646484375, -41.63152313232422, -942.43896484375, 285.2035217285156, 251.87088012695312, 707.716796875, 672.6443481445312, 1217.998291015625, 294.3172912597656, 692.7258911132812, 90.22647857666016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000582.npy"} +{"epoch": 0.8546255506607929, "step": 583, "batch_size": 64, "mean": 249.65298461914062, "std": 673.0624389648438, "min": -1280.4891357421875, "p10": -611.0190307617187, "median": 234.35025024414062, "p90": 1004.9978637695312, "max": 2030.7437744140625, "pos_frac": 0.71875, "sample": [172.09255981445312, 244.53350830078125, 325.6985168457031, -643.7404174804688, 995.57666015625, -11.829452514648438, 1009.0355224609375, 378.7288513183594, 2011.8900146484375, 161.4249267578125, -198.94842529296875, 67.90113830566406, -952.1722412109375, 320.62750244140625, 162.6041717529297, -291.1265563964844, 897.0436401367188, 905.359130859375, 350.3451232910156, 1086.2977294921875, 517.2088012695312, 65.87320709228516, -534.6691284179688, -397.94171142578125, -107.15021514892578, -908.2457275390625, 314.95489501953125, 204.87548828125, -511.9703369140625, -810.6237182617188, -87.52922058105469, -269.4563903808594, 538.2451782226562, 550.1349487304688, 427.7219543457031, 2030.7437744140625, 412.2984619140625, 12.570381164550781, 981.9725341796875, 224.1669921875, 2.089569091796875, 367.9423522949219, 372.8809814453125, -96.92268371582031, -1132.7890625, -1280.4891357421875, 1663.637939453125, 467.2982177734375, -429.2371520996094, 717.25390625, 957.4393310546875, 1359.5350341796875, 191.0130615234375, 505.1282653808594, 857.99365234375, -751.1650390625, 130.86647033691406, 515.9238891601562, 56.1463623046875, 271.8784484863281, 1260.3074951171875, 36.07147216796875, 12.085639953613281, 278.3789367675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000583.npy"} +{"epoch": 0.856093979441997, "step": 584, "batch_size": 64, "mean": 490.79443359375, "std": 604.7607421875, "min": -640.2380981445312, "p10": -158.251513671875, "median": 350.5055694580078, "p90": 1290.2760375976563, "max": 2394.0078125, "pos_frac": 0.828125, "sample": [516.9074096679688, 239.75889587402344, -318.1685791015625, 291.4659729003906, -19.59100341796875, -447.545654296875, 614.164306640625, 463.10784912109375, 1121.6005859375, 35.7484245300293, 31.4742431640625, -140.40826416015625, 114.4305419921875, 159.66055297851562, 386.41094970703125, 1085.86572265625, -165.89862060546875, 1275.230224609375, 878.2994384765625, -405.5901794433594, 281.6703796386719, 598.5240478515625, -32.36433410644531, 261.6307373046875, 340.1198425292969, 232.39767456054688, 1362.0498046875, 123.40946197509766, 208.90875244140625, 226.02017211914062, 1124.4658203125, 603.3294677734375, 291.0267333984375, -640.2380981445312, 736.23046875, 369.22332763671875, 612.406982421875, 512.7056274414062, 272.7760925292969, 49.233184814453125, 462.01068115234375, -328.336181640625, 360.89129638671875, 1746.6844482421875, 888.1972045898438, 338.26153564453125, 1130.7628173828125, 1203.83154296875, -281.0386962890625, 2394.0078125, 1937.1981201171875, 30.854049682617188, 1645.8853759765625, 1296.7242431640625, 411.01007080078125, 1575.166748046875, 206.57667541503906, 701.3740234375, -76.8565902709961, 280.5920104980469, 555.6659545898438, 400.35504150390625, 1129.801513671875, 150.77586364746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000584.npy"} +{"epoch": 0.8575624082232012, "step": 585, "batch_size": 64, "mean": 442.110595703125, "std": 734.2393798828125, "min": -1668.5323486328125, "p10": -381.0555847167968, "median": 465.81861877441406, "p90": 1377.6603393554692, "max": 2762.061767578125, "pos_frac": 0.75, "sample": [194.168701171875, -221.10997009277344, 658.8006591796875, 330.706787109375, 534.8190307617188, 2762.061767578125, 1022.6080932617188, -649.3662109375, 356.5477600097656, 235.36444091796875, 1046.5377197265625, 23.005386352539062, 577.89306640625, 568.872802734375, -91.6442642211914, 1598.87744140625, 1415.2960205078125, 651.6348266601562, 1513.0628662109375, 551.2330322265625, 464.0590515136719, 1289.84375, 182.633544921875, 495.2181701660156, 1106.636474609375, -56.77986145019531, 780.5677490234375, 1066.3958740234375, 669.1818237304688, 161.78085327148438, 74.72334289550781, 765.402587890625, 1442.51171875, 68.32470703125, 1067.4609375, 224.82601928710938, 1014.7398681640625, -268.0769348144531, -71.38352966308594, 897.2908325195312, 397.4135437011719, -1668.5323486328125, 852.7033081054688, -803.5137939453125, 895.9066772460938, 520.4011840820312, 467.60125732421875, -487.8830871582031, 878.3753051757812, -256.0103454589844, -80.53350830078125, -1151.891845703125, -100.18418884277344, -429.4750061035156, 10.574478149414062, 1575.509765625, 467.57818603515625, -553.7380981445312, 851.3660278320312, -35.05403137207031, 161.35447692871094, 124.30702209472656, 185.55226135253906, 2018.522705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000585.npy"} +{"epoch": 0.8590308370044053, "step": 586, "batch_size": 64, "mean": 289.494140625, "std": 595.770263671875, "min": -1201.45751953125, "p10": -382.7334045410156, "median": 335.82752990722656, "p90": 971.8864868164065, "max": 1676.7799072265625, "pos_frac": 0.671875, "sample": [1202.2061767578125, -211.9365234375, -229.0386962890625, 510.3953857421875, 602.7681884765625, 318.5869445800781, -262.6316223144531, 339.563232421875, 338.5274353027344, -161.33248901367188, -604.88525390625, 366.6762390136719, 761.5673217773438, 1515.36572265625, -73.38338470458984, -144.7608184814453, 559.15869140625, -730.8465576171875, 204.80514526367188, 217.9802703857422, -138.8806915283203, 5.105432510375977, 1097.345703125, 774.1703491210938, 891.8683471679688, -57.76292419433594, 750.226806640625, -297.60565185546875, -100.56061553955078, 388.4817810058594, 723.2748413085938, -151.74737548828125, 377.13580322265625, -1201.45751953125, 602.9392700195312, 694.6619873046875, 1519.20751953125, -382.8138427734375, 509.23468017578125, -136.3304443359375, 906.768798828125, -521.216796875, 456.1365966796875, 435.3522644042969, -1061.7921142578125, -717.2213134765625, 408.7952575683594, 441.16748046875, 167.31809997558594, 543.7615356445312, -382.54571533203125, 333.12762451171875, 999.7940673828125, 1532.574951171875, 877.8237915039062, 232.5140838623047, 315.7464599609375, 1676.7799072265625, 81.65221405029297, -93.39429473876953, 192.2852783203125, 779.4610595703125, 407.74346923828125, 129.71340942382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000586.npy"} +{"epoch": 0.8604992657856094, "step": 587, "batch_size": 64, "mean": 415.85150146484375, "std": 572.4729614257812, "min": -1396.8052978515625, "p10": -172.3781326293945, "median": 371.62831115722656, "p90": 1294.3538452148441, "max": 1985.3740234375, "pos_frac": 0.78125, "sample": [535.3759765625, 1067.197265625, 452.85064697265625, 147.2445068359375, 336.1346130371094, 429.5535888671875, 662.5956420898438, -62.03229904174805, -0.8197174072265625, 1473.4835205078125, -421.45062255859375, -95.97970581054688, 1985.3740234375, 350.0694885253906, 248.44061279296875, 518.6170043945312, -21.194488525390625, 484.43084716796875, 404.0581970214844, 387.79156494140625, -234.2057647705078, 682.9285888671875, 1389.41943359375, 18.953275680541992, 408.44482421875, 1182.7374267578125, -239.8907928466797, 955.4302368164062, 762.2923583984375, 25.135055541992188, 95.33483123779297, 22.028892517089844, -445.42803955078125, 76.82241821289062, 685.6473388671875, 123.96446228027344, 199.92669677734375, -1396.8052978515625, 1028.8133544921875, 172.177734375, -372.65777587890625, 816.804443359375, 166.49822998046875, 716.9727783203125, -68.89768981933594, 1342.189453125, 711.384765625, -180.26622009277344, 208.62741088867188, 342.5447082519531, 492.8392333984375, 1604.82275390625, 396.435546875, 700.197509765625, 515.3756713867188, 628.69580078125, 962.6951904296875, -153.97259521484375, 310.0478515625, 19.672924041748047, 355.4650573730469, -67.55615234375, 1415.197021484375, 1355.91259765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000587.npy"} +{"epoch": 0.8619676945668135, "step": 588, "batch_size": 64, "mean": 346.6522216796875, "std": 518.298583984375, "min": -912.830322265625, "p10": -283.96333618164056, "median": 313.8847351074219, "p90": 1005.8801330566407, "max": 1428.9361572265625, "pos_frac": 0.75, "sample": [171.4990997314453, 560.9788208007812, -227.16635131835938, 135.95037841796875, 31.560409545898438, 682.071044921875, 708.6807861328125, 55.527984619140625, 866.7720336914062, 687.6226806640625, 118.7501220703125, -384.599365234375, -48.518798828125, 538.5955810546875, -308.3049011230469, -413.0432434082031, 836.0670166015625, 523.157958984375, 187.15155029296875, 259.8909912109375, 645.293701171875, -127.07656860351562, -87.08671569824219, 1137.97265625, 170.88722229003906, 441.434814453125, 1415.7598876953125, -89.5345458984375, -471.91827392578125, 115.92913055419922, 295.6549987792969, -68.91670227050781, 289.32720947265625, 1013.6502685546875, 954.967529296875, 485.4552001953125, 181.18653869628906, -220.14401245117188, 1258.7403564453125, 987.7498168945312, 1240.714111328125, 752.1481323242188, 27.917327880859375, -653.1515502929688, 717.1288452148438, 456.6427307128906, 170.8946075439453, 939.908203125, -912.830322265625, 394.768310546875, -16.82724380493164, 1428.9361572265625, 1210.91015625, 407.92718505859375, 452.0445556640625, 616.3291015625, 597.499755859375, 345.2765808105469, -195.95469665527344, 203.4825439453125, 7.682823181152344, 332.1144714355469, 928.1594848632812, -577.9547729492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000588.npy"} +{"epoch": 0.8634361233480177, "step": 589, "batch_size": 64, "mean": 461.2321472167969, "std": 581.2136840820312, "min": -846.5496826171875, "p10": -219.83479919433591, "median": 397.1772918701172, "p90": 1340.1416137695314, "max": 2217.64208984375, "pos_frac": 0.828125, "sample": [1610.265625, 302.5161437988281, 349.502197265625, 34.18973922729492, 728.27490234375, 104.10728454589844, -158.68714904785156, 616.056396484375, 391.842529296875, -382.83160400390625, -75.55561828613281, 402.5120544433594, 95.99422454833984, 219.5514678955078, 645.6544189453125, 1360.369384765625, 2217.64208984375, 150.1231689453125, 494.09674072265625, 323.5444030761719, 492.26080322265625, 218.8316650390625, 155.20863342285156, 1371.1610107421875, -29.6910400390625, 522.0389404296875, 292.0408630371094, 667.598876953125, 1599.5179443359375, -210.21319580078125, 565.0260009765625, 678.5405883789062, 819.72509765625, 547.529296875, 1157.7381591796875, -223.95834350585938, -279.88885498046875, 160.41387939453125, 790.2800903320312, 108.2552490234375, 446.8105163574219, -587.4305419921875, 233.77590942382812, 839.87255859375, 588.0701904296875, 547.6283569335938, 1092.35107421875, 205.5220184326172, 629.559814453125, 1481.8404541015625, 611.9305419921875, 302.5693054199219, 374.92626953125, -846.5496826171875, 318.772216796875, -633.6989135742188, 689.1007080078125, 1485.4102783203125, 32.39656066894531, 303.0060729980469, 1292.9434814453125, 668.0992431640625, -479.7996520996094, 1090.1671142578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000589.npy"} +{"epoch": 0.8649045521292217, "step": 590, "batch_size": 64, "mean": 452.76287841796875, "std": 577.2325439453125, "min": -416.7611083984375, "p10": -239.3719757080078, "median": 348.0946960449219, "p90": 1258.4825683593754, "max": 2069.1435546875, "pos_frac": 0.734375, "sample": [556.17041015625, 1580.1268310546875, 827.1237182617188, 300.015625, 990.9315185546875, -283.456298828125, 140.8243408203125, 2069.1435546875, 849.0896606445312, -216.92709350585938, 452.9882507324219, -261.3125, 368.9122314453125, 1450.0789794921875, -175.49224853515625, -90.26887512207031, -151.82415771484375, 50.538429260253906, 868.1683349609375, 233.06393432617188, 107.17977905273438, -353.3362121582031, -121.82504272460938, 293.619384765625, -137.63702392578125, 167.16375732421875, 775.4727172851562, -280.33172607421875, 1405.466796875, -110.4887924194336, 1062.27490234375, 451.52996826171875, -25.884071350097656, 1406.9718017578125, 225.71060180664062, 429.5011291503906, 375.6519775390625, 584.738037109375, 1188.48974609375, 540.5128784179688, 507.76226806640625, -248.9912109375, -95.60064697265625, -416.7611083984375, 327.27716064453125, 67.39353942871094, 249.44606018066406, 969.2794189453125, 268.9041748046875, -376.316650390625, 446.7027282714844, 1288.4794921875, 295.799560546875, -70.48876190185547, 640.2354736328125, 719.6399536132812, 912.47412109375, 1852.538818359375, 178.80661010742188, 1099.0517578125, 1086.3665771484375, 880.3070068359375, 173.0186309814453, 678.826171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000590.npy"} +{"epoch": 0.8663729809104258, "step": 591, "batch_size": 64, "mean": 407.99945068359375, "std": 576.1973876953125, "min": -1005.2146606445312, "p10": -365.83306884765614, "median": 417.767333984375, "p90": 1081.8224731445314, "max": 1788.0003662109375, "pos_frac": 0.84375, "sample": [1.5640945434570312, 1151.937255859375, 1409.6611328125, 677.6277465820312, 988.7962646484375, -140.21434020996094, 26.597822189331055, -1005.2146606445312, 563.9826049804688, 1788.0003662109375, 953.827392578125, 137.78768920898438, 682.8153686523438, 392.9398193359375, 57.352142333984375, 1638.7696533203125, 792.7969970703125, 671.4701538085938, 128.94515991210938, 254.780517578125, -32.15748596191406, 962.8179321289062, 145.38392639160156, 95.67205047607422, 608.3418579101562, 193.94515991210938, -745.19580078125, 687.720947265625, 527.3141479492188, 403.18267822265625, -283.120361328125, 118.55792236328125, 432.35198974609375, -686.08935546875, 1088.37255859375, 167.22813415527344, 179.10452270507812, 66.70112609863281, 661.4025268554688, -401.2813720703125, -441.4404296875, 814.5084838867188, 135.4359893798828, 457.1747131347656, -606.3113403320312, 989.684814453125, 852.1888427734375, 71.83013916015625, 637.7411499023438, 1039.710693359375, 38.647727966308594, 1066.5389404296875, 910.1082763671875, 121.98663330078125, 1312.0350341796875, 493.17840576171875, 472.2745056152344, 1223.10986328125, -732.6856079101562, 697.4066162109375, 128.60398864746094, 505.2265625, 309.48138427734375, 251.05426025390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000591.npy"} +{"epoch": 0.8678414096916299, "step": 592, "batch_size": 64, "mean": 372.1299133300781, "std": 731.6919555664062, "min": -1348.8336181640625, "p10": -507.46376037597656, "median": 376.2057342529297, "p90": 1269.70166015625, "max": 2049.67529296875, "pos_frac": 0.734375, "sample": [325.2906494140625, 1155.952880859375, -90.10238647460938, 416.64910888671875, -906.3650512695312, 205.0647735595703, 610.403076171875, 988.213623046875, 1783.186767578125, 1600.6900634765625, -952.4943237304688, 2049.67529296875, 275.2704162597656, 847.38037109375, -1034.8055419921875, -493.8439025878906, -121.60558319091797, 373.43585205078125, 137.7171630859375, 1278.7576904296875, 124.87720489501953, 1133.4462890625, -38.279258728027344, 1248.5709228515625, 437.1529846191406, 86.60186767578125, 174.51742553710938, 961.7015991210938, 1578.523681640625, 574.3982543945312, 59.92916488647461, -315.0589599609375, 112.856201171875, 689.531982421875, 1056.956787109375, 257.587646484375, -513.3008422851562, 358.5240783691406, -439.83062744140625, 606.7583618164062, 504.2415466308594, -173.12973022460938, 747.4376831054688, 1619.544921875, 291.86566162109375, -339.95501708984375, 528.7958984375, -1348.8336181640625, 852.327880859375, 378.9756164550781, 664.37158203125, 92.07466125488281, 802.7699584960938, 550.1109619140625, -919.8585815429688, 880.0099487304688, 674.5894165039062, 135.00433349609375, 1349.190673828125, -260.0947570800781, -1330.951904296875, 868.98779296875, 646.0751342773438, -1.1714420318603516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000592.npy"} +{"epoch": 0.869309838472834, "step": 593, "batch_size": 64, "mean": 292.0050048828125, "std": 640.7404174804688, "min": -1071.3353271484375, "p10": -487.2212585449218, "median": 200.50360107421875, "p90": 1223.0030761718751, "max": 1955.8447265625, "pos_frac": 0.65625, "sample": [-21.353900909423828, 727.348876953125, -260.9355773925781, 135.59417724609375, -621.9281616210938, -526.1098022460938, 1493.8685302734375, -656.458251953125, 1165.884033203125, 587.389404296875, -123.1710205078125, 1590.3369140625, 1335.9713134765625, 1091.471435546875, -6.120063781738281, -804.3013305664062, 1247.482666015625, 1572.6646728515625, 707.8572387695312, 157.13206481933594, -396.4813232421875, 190.27520751953125, 389.41021728515625, 76.32279205322266, 100.58232116699219, 1955.8447265625, 462.7491455078125, 119.9511489868164, 375.0020751953125, -835.2883911132812, -2.7525501251220703, 507.5074768066406, -348.4525451660156, 222.32534790039062, 724.8303833007812, 852.5767822265625, -126.47714233398438, 1256.6922607421875, -198.99209594726562, 371.82623291015625, -871.731689453125, 607.2698364257812, -339.4231262207031, -1071.3353271484375, -4.389768600463867, -96.568115234375, 0.10827064514160156, 30.86528778076172, 476.8417053222656, 816.9067993164062, 470.1436767578125, 344.7961730957031, 869.8275146484375, 489.0052490234375, 210.73199462890625, 794.0223999023438, 4.330535888671875, 40.33555603027344, -145.24295043945312, 708.948486328125, -112.84259033203125, 447.1849365234375, -221.1617431640625, 749.6213989257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000593.npy"} +{"epoch": 0.8707782672540382, "step": 594, "batch_size": 64, "mean": 371.1469421386719, "std": 615.4995727539062, "min": -917.7213745117188, "p10": -363.20031433105464, "median": 322.06585693359375, "p90": 1174.1113647460938, "max": 1784.0067138671875, "pos_frac": 0.75, "sample": [1005.670166015625, 8.091888427734375, 148.17835998535156, 84.00155639648438, -56.4605712890625, -742.4359741210938, 618.515380859375, 458.34979248046875, 121.85599517822266, 767.1654663085938, 500.64849853515625, 368.0613098144531, 1784.0067138671875, 731.1802978515625, 141.92391967773438, 912.59716796875, 665.8353881835938, 254.24794006347656, 1151.154296875, -427.2435607910156, -579.2528686523438, 1440.1607666015625, -904.72802734375, 1770.4161376953125, -259.54132080078125, 530.9542846679688, -78.95040130615234, 399.1547546386719, 484.2202453613281, -178.63955688476562, 97.56495666503906, 434.8505859375, 1339.8790283203125, 633.7910766601562, 310.65789794921875, 564.9837646484375, 645.8575439453125, 30.577259063720703, 268.6320495605469, -728.4866333007812, -41.41916275024414, 974.6874389648438, 149.66317749023438, -184.56777954101562, 1176.5440673828125, 936.370361328125, 871.4189453125, 606.0053100585938, -377.8862609863281, 464.85821533203125, -917.7213745117188, 39.514190673828125, 259.67950439453125, -203.00933837890625, 333.47381591796875, -41.07834243774414, 1168.43505859375, 915.4976806640625, 1600.674560546875, 244.70240783691406, 1248.5771484375, -328.93310546875, 83.8548583984375, 56.61675262451172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000594.npy"} +{"epoch": 0.8722466960352423, "step": 595, "batch_size": 64, "mean": 413.5597229003906, "std": 585.2753295898438, "min": -563.7857666015625, "p10": -201.22907714843745, "median": 303.1824951171875, "p90": 1042.8740600585938, "max": 2684.977294921875, "pos_frac": 0.796875, "sample": [899.6279296875, 307.0675048828125, 582.3409423828125, 38.36664581298828, -13.271644592285156, 1024.3214111328125, 354.48675537109375, 952.5617065429688, -309.61358642578125, 346.35943603515625, 1554.522216796875, 111.07209777832031, 1213.571044921875, 974.6923828125, -214.24830627441406, 375.39410400390625, 316.6230163574219, 1780.279052734375, 210.45411682128906, -121.27249145507812, 68.57989501953125, -229.20736694335938, 645.499755859375, 380.1827392578125, -217.68661499023438, -236.56228637695312, 202.61325073242188, 92.5076675415039, 299.2974853515625, 287.8550109863281, 163.79220581054688, 237.71014404296875, 273.106689453125, 294.51849365234375, 876.803466796875, 24.69249153137207, 333.8897399902344, 611.13134765625, 1600.347412109375, 317.708740234375, -43.78047180175781, -563.7857666015625, 465.9863586425781, -359.3498229980469, 394.4241638183594, 386.0611572265625, 263.3092346191406, -170.8508758544922, 671.4159545898438, 679.8154296875, 12.135185241699219, 789.8303833007812, 95.01197814941406, -103.88832092285156, 421.53350830078125, 109.21517944335938, 1050.8251953125, 98.62179565429688, -130.6451416015625, 3.9603614807128906, 1922.4022216796875, 531.60009765625, 848.8814697265625, 2684.977294921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000595.npy"} +{"epoch": 0.8737151248164464, "step": 596, "batch_size": 64, "mean": 291.58941650390625, "std": 624.2456665039062, "min": -1153.9522705078125, "p10": -460.46460266113274, "median": 164.13872528076172, "p90": 1244.934912109376, "max": 2043.0533447265625, "pos_frac": 0.75, "sample": [-312.26611328125, 17.77703857421875, 695.5166625976562, 56.16075134277344, 878.3465576171875, 589.3322143554688, 121.80159759521484, 498.61724853515625, 135.29574584960938, 593.6116333007812, 2043.0533447265625, -119.03237915039062, -585.2066650390625, 439.42041015625, 1026.13671875, 1449.81982421875, 110.04762268066406, 158.0444793701172, 485.39019775390625, 647.281494140625, -124.16493225097656, 555.1549072265625, 55.02067184448242, 165.64051818847656, -494.4996337890625, 262.77392578125, 545.9671020507812, 392.8588562011719, 228.6118621826172, 139.87281799316406, -366.416015625, 124.0675277709961, -591.0460205078125, 1552.3612060546875, 35.61235046386719, 1018.100341796875, 1525.905029296875, 531.0587158203125, 16.813413619995117, -88.06087493896484, 278.1348876953125, 548.7186889648438, 140.4508056640625, 326.7093811035156, -195.7138671875, 327.997802734375, 1338.70556640625, 220.87242126464844, -288.8415222167969, -253.02345275878906, 1826.9403076171875, 49.752845764160156, -1153.9522705078125, 162.63693237304688, 280.3246154785156, -685.1963500976562, 135.60939025878906, -606.3668823242188, 676.5333862304688, 143.44223022460938, 1504.776611328125, -381.0495300292969, 447.0028076171875, -597.523193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000596.npy"} +{"epoch": 0.8751835535976505, "step": 597, "batch_size": 64, "mean": 501.1011047363281, "std": 699.3677978515625, "min": -1190.7113037109375, "p10": -142.0111083984375, "median": 383.52488708496094, "p90": 1472.1178955078126, "max": 2383.426025390625, "pos_frac": 0.796875, "sample": [907.55224609375, 583.5866088867188, 1158.08447265625, 285.55810546875, 434.4228515625, 1542.3145751953125, -143.71536254882812, 1017.6296997070312, 1488.1170654296875, -407.232177734375, -63.525718688964844, -96.75042724609375, 290.8136291503906, 56.067989349365234, 695.0469970703125, 82.9286880493164, 1119.83349609375, 100.17372131347656, 367.3702697753906, 1831.8890380859375, 401.0125427246094, 501.093994140625, 28.386810302734375, 1080.4256591796875, 201.5778045654297, 292.0544128417969, 37.4832878112793, 212.62644958496094, 136.2286834716797, -994.4031982421875, -1190.7113037109375, -45.91510009765625, 1294.72119140625, 399.67950439453125, 605.2633056640625, 2279.083251953125, 1366.471923828125, -53.08350372314453, -417.666015625, 84.04711151123047, 690.3079833984375, 1254.791015625, 26.357032775878906, 272.8927001953125, 407.1424865722656, -138.03451538085938, 594.8178100585938, 900.8588256835938, 52.82914733886719, 1434.7864990234375, 154.8592529296875, -250.71002197265625, -198.87646484375, 2383.426025390625, 1935.1414794921875, 61.90086364746094, 589.8607177734375, 519.3477783203125, 752.8250732421875, 711.9656982421875, 113.88957214355469, 1543.407958984375, -5.034873962402344, 793.1764526367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000597.npy"} +{"epoch": 0.8766519823788547, "step": 598, "batch_size": 64, "mean": 394.01727294921875, "std": 591.6588134765625, "min": -1169.5767822265625, "p10": -158.97265472412107, "median": 412.51173400878906, "p90": 1134.2729614257814, "max": 2201.1884765625, "pos_frac": 0.75, "sample": [448.0111083984375, 633.685302734375, 405.9939270019531, 1069.164794921875, -1169.5767822265625, 322.5455627441406, 1106.8460693359375, 677.9136352539062, 199.24542236328125, 1146.02734375, -31.267425537109375, -701.6063232421875, -907.557373046875, 187.5254669189453, 1203.3642578125, -149.9004364013672, 694.5324096679688, 744.5457153320312, 1294.7998046875, 1757.054931640625, 250.36553955078125, 1262.7353515625, 714.333740234375, -275.068359375, 282.30804443359375, 447.2619323730469, 557.907470703125, 356.05718994140625, -1009.9009399414062, 147.69361877441406, 625.2706298828125, 626.9745483398438, 472.80255126953125, -126.17310333251953, -399.43560791015625, 552.4498901367188, 636.4801025390625, 1220.0419921875, 196.06666564941406, 321.71917724609375, 206.65054321289062, 208.50079345703125, -106.13021087646484, 501.7786560058594, -17.58582878112793, 680.2510986328125, 419.029541015625, 423.34368896484375, -3.8012256622314453, 111.41126251220703, 353.0382995605469, 2201.1884765625, 787.357177734375, 780.14794921875, 554.4114379882812, 293.6723327636719, -59.709716796875, 990.9041748046875, 545.7037963867188, -162.86074829101562, 18.463119506835938, -113.36048126220703, 822.074951171875, -8.611370086669922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000598.npy"} +{"epoch": 0.8781204111600588, "step": 599, "batch_size": 64, "mean": 334.4490051269531, "std": 512.0916137695312, "min": -2036.3336181640625, "p10": -137.8798736572265, "median": 318.95494079589844, "p90": 931.437420654297, "max": 1550.001220703125, "pos_frac": 0.859375, "sample": [432.4333801269531, 827.3602294921875, 319.1585693359375, 465.8375244140625, 797.9978637695312, 208.54766845703125, 1550.001220703125, 264.63507080078125, 215.05303955078125, 365.8104248046875, 449.9862060546875, -682.2843017578125, 6.40557861328125, 87.89935302734375, 0.46076011657714844, 383.603271484375, 906.6297607421875, 942.0692749023438, 548.2816162109375, 1247.8538818359375, 190.41900634765625, 588.2130126953125, 730.43212890625, 441.4924011230469, 350.2371826171875, 46.76323699951172, 128.91558837890625, 308.186279296875, 261.54443359375, -2036.3336181640625, 698.1785888671875, 264.6331787109375, 510.7486572265625, 267.5810546875, 1095.1356201171875, 717.7213745117188, 564.1463623046875, -96.6290283203125, 1162.9342041015625, 180.91989135742188, 447.7296142578125, 120.08859252929688, 900.751708984375, -251.3662567138672, -50.21827697753906, 327.86322021484375, 522.7062377929688, 158.31436157226562, 324.8686218261719, 135.24032592773438, 1075.9495849609375, 229.56333923339844, 107.65491485595703, 15.024238586425781, -326.3965759277344, 33.929866790771484, 280.5202941894531, 424.71466064453125, -294.10260009765625, 1192.65185546875, -155.55880737304688, -305.76300048828125, 318.7513122558594, 458.8396301269531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000599.npy"} +{"epoch": 0.8795888399412628, "step": 600, "batch_size": 64, "mean": 313.2653503417969, "std": 615.0382080078125, "min": -1123.8260498046875, "p10": -537.7569519042968, "median": 209.2383270263672, "p90": 1062.9716796875, "max": 2317.26123046875, "pos_frac": 0.765625, "sample": [165.61888122558594, 647.701171875, 614.3322143554688, -1123.8260498046875, 847.2069702148438, 590.743408203125, -59.433467864990234, 579.655029296875, -618.0671997070312, 505.6534118652344, 81.28487396240234, -60.95444107055664, 1009.1268310546875, -225.84561157226562, 293.7010498046875, 642.860595703125, -713.8639526367188, 1296.1336669921875, 75.9989242553711, 319.0572509765625, 807.5867919921875, 1002.15869140625, -182.1487579345703, 273.2295227050781, 14.128555297851562, -363.77001953125, 703.3697509765625, 1095.8104248046875, 1171.91357421875, 834.5504760742188, 145.78573608398438, 11.661041259765625, 981.5238037109375, 72.55951690673828, 1070.978515625, 625.5198974609375, 29.027450561523438, 250.2084197998047, -455.88519287109375, 1370.8717041015625, 2317.26123046875, 196.61936950683594, -154.77207946777344, 21.62071990966797, 1389.2181396484375, 163.9537353515625, 782.8114624023438, 30.037059783935547, -776.5764770507812, 518.957275390625, 81.69563293457031, 1044.2890625, 330.12420654296875, 9.765556335449219, 221.85728454589844, 786.3657836914062, -591.05859375, 192.77371215820312, 6.357107162475586, -572.8448486328125, -628.0413208007812, 481.094482421875, -210.73550415039062, 82.0467758178711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000600.npy"} +{"epoch": 0.8810572687224669, "step": 601, "batch_size": 64, "mean": 405.19476318359375, "std": 532.619140625, "min": -1110.34814453125, "p10": -234.9526885986328, "median": 366.0316467285156, "p90": 1135.3258544921878, "max": 1709.3031005859375, "pos_frac": 0.78125, "sample": [660.879150390625, 1190.882080078125, 1709.3031005859375, 715.5115966796875, -54.9166145324707, 1019.7229614257812, 370.26861572265625, 156.08642578125, -204.81036376953125, 256.84051513671875, -238.8753662109375, -275.20880126953125, 1271.1201171875, 449.80328369140625, 274.0904235839844, 653.1769409179688, 361.794677734375, 642.775390625, -225.79977416992188, 1364.40283203125, 679.9334716796875, 593.4434814453125, 779.1557006835938, -57.17869186401367, 566.856689453125, 248.58494567871094, -43.97412109375, 565.0698852539062, 816.4517822265625, 230.66531372070312, 421.4316711425781, 317.08441162109375, -889.308837890625, 613.912109375, -380.9629821777344, 591.0935668945312, 146.233642578125, -38.37455749511719, 158.92098999023438, 252.266845703125, 564.7640380859375, 702.4067993164062, 137.37969970703125, -1110.34814453125, 1169.2652587890625, -453.3001708984375, 43.164947509765625, 1501.8248291015625, 614.1910400390625, -251.41650390625, 838.1840209960938, 894.9513549804688, 196.64549255371094, 356.1606750488281, 194.5740509033203, 972.6663208007812, 249.84107971191406, 1239.83056640625, 1056.1339111328125, 397.833984375, -91.63773345947266, 236.60174560546875, 443.6636962890625, 360.73150634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000601.npy"} +{"epoch": 0.882525697503671, "step": 602, "batch_size": 64, "mean": 500.39605712890625, "std": 547.3839111328125, "min": -495.4358215332031, "p10": -86.11441192626948, "median": 383.1275177001953, "p90": 1125.6633056640626, "max": 2324.347412109375, "pos_frac": 0.84375, "sample": [444.8145446777344, 156.12677001953125, -139.09205627441406, 334.3108215332031, 38.349891662597656, 160.59661865234375, 975.5265502929688, 734.2977294921875, 190.2393341064453, 262.47393798828125, 431.9442138671875, 69.37208557128906, 677.3761596679688, 979.1334228515625, 1134.6407470703125, 60.08838653564453, 480.05224609375, 325.40240478515625, 434.07720947265625, 1247.925048828125, -18.351150512695312, 224.010498046875, -35.15594482421875, -495.4358215332031, 1073.443359375, 734.18310546875, 295.38397216796875, 527.7708129882812, 132.06878662109375, -104.42900085449219, 97.2922592163086, 1004.61279296875, 883.3948364257812, 145.52210998535156, 757.9891357421875, 1233.724365234375, 247.93789672851562, 482.78753662109375, -431.4715881347656, -292.2138977050781, 797.7603759765625, 1104.7159423828125, 749.0505981445312, 952.8505859375, 747.14111328125, 836.9019775390625, 2324.347412109375, -113.92634582519531, 1178.3009033203125, -402.29315185546875, 1032.356201171875, 829.7449951171875, 231.13674926757812, 171.34500122070312, 1267.452392578125, -43.38037109375, 279.91070556640625, 839.6138916015625, 154.14566040039062, 117.89230346679688, 802.7836303710938, 2185.2822265625, 310.4796447753906, 211.01597595214844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000602.npy"} +{"epoch": 0.8839941262848752, "step": 603, "batch_size": 64, "mean": 453.99981689453125, "std": 660.41259765625, "min": -915.5481567382812, "p10": -313.27838745117185, "median": 361.95921325683594, "p90": 1319.7637207031253, "max": 2498.57177734375, "pos_frac": 0.734375, "sample": [-383.771728515625, -246.10765075683594, 364.97528076171875, 43.67902374267578, 1340.0657958984375, -21.935455322265625, 1144.154052734375, 413.06878662109375, 862.97265625, 1262.939208984375, -74.61663818359375, 29.842063903808594, -641.833251953125, 840.6045532226562, 51.769840240478516, 417.9892272949219, 46.72943115234375, 1420.2906494140625, 1888.782958984375, 488.8296203613281, 980.9922485351562, 904.462890625, 695.973876953125, 54.472599029541016, 266.54949951171875, 301.0816345214844, 435.51495361328125, 135.26242065429688, -15.19277572631836, 253.27801513671875, 1563.01220703125, -915.5481567382812, 1656.816650390625, -195.73544311523438, -5.2594146728515625, 349.974609375, 933.9203491210938, -311.885009765625, 662.0243530273438, 545.3050537109375, 925.99853515625, -313.87554931640625, -88.68681335449219, -38.64542007446289, -125.5925521850586, 550.2673950195312, 776.7457885742188, 138.4629669189453, -342.6831970214844, 437.6064453125, 486.7962646484375, 1272.3922119140625, 923.6002197265625, -366.2525634765625, 243.60711669921875, -459.96453857421875, 2498.57177734375, 358.9431457519531, 1718.966064453125, 43.3736572265625, 852.63916015625, 1104.8616943359375, 904.4666137695312, 10.941043853759766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000603.npy"} +{"epoch": 0.8854625550660793, "step": 604, "batch_size": 64, "mean": 493.2238464355469, "std": 678.4652709960938, "min": -895.0358276367188, "p10": -313.49714050292965, "median": 439.3319396972656, "p90": 1535.649353027344, "max": 2307.789794921875, "pos_frac": 0.71875, "sample": [-365.61749267578125, 172.90127563476562, -262.3268127441406, -117.42552185058594, 642.7030639648438, 989.7188720703125, 948.2613525390625, 1089.564453125, 932.5780639648438, 2307.789794921875, -895.0358276367188, 922.404541015625, -4.2804412841796875, -293.0589904785156, 855.203125, 1113.8271484375, 887.1531372070312, 87.37493896484375, 383.1335754394531, 990.5184326171875, 152.35882568359375, -589.8610229492188, 76.495361328125, 1497.471923828125, 605.858642578125, -577.8001098632812, 1665.19140625, -230.7788543701172, 140.74960327148438, 171.11962890625, -4.734153747558594, 440.644775390625, 682.9924926757812, 434.5022277832031, -67.78646850585938, 1636.0733642578125, 642.4938354492188, 28.865516662597656, -322.25634765625, 854.4354858398438, 586.8980102539062, 438.01910400390625, 499.4633483886719, 289.95843505859375, 427.1507873535156, 2206.14404296875, -527.6898193359375, 1593.71142578125, 453.6470642089844, -50.21153259277344, 604.885498046875, 1054.3968505859375, 789.7860107421875, -49.29774856567383, 1815.0687255859375, 561.0997924804688, -380.19915771484375, -17.849212646484375, 394.7258605957031, 865.9326782226562, -27.5833740234375, 500.33404541015625, 1552.0111083984375, 364.5019836425781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000604.npy"} +{"epoch": 0.8869309838472834, "step": 605, "batch_size": 64, "mean": 364.563232421875, "std": 619.1599731445312, "min": -1080.8778076171875, "p10": -280.0462249755859, "median": 268.94615173339844, "p90": 1077.9835083007818, "max": 2804.8779296875, "pos_frac": 0.734375, "sample": [-55.34141540527344, -289.3349609375, -21.6883544921875, 195.22933959960938, -3.7275161743164062, 84.87379455566406, 806.4883422851562, 666.2239379882812, 889.0159912109375, 235.28546142578125, 616.6793212890625, -25.077728271484375, 142.04217529296875, -285.9608154296875, 449.58074951171875, 458.5934753417969, 167.33750915527344, 833.3178100585938, 1608.2188720703125, -263.12890625, 2804.8779296875, 114.95669555664062, 572.73486328125, 20.994234085083008, 184.9086151123047, 898.8948974609375, -159.04071044921875, -266.2455139160156, 306.29571533203125, 364.7637023925781, 47.723411560058594, -296.93896484375, 819.2278442382812, -510.6501159667969, -134.22027587890625, 437.4383239746094, 1147.77685546875, 88.26701354980469, 262.80120849609375, 741.6778564453125, 64.89759826660156, 2262.09912109375, -653.8203125, -1080.8778076171875, 491.2506103515625, 427.17449951171875, -299.1505432128906, 1166.8359375, 104.74964141845703, -4.756513595581055, 549.5943603515625, 433.5511474609375, 1195.9844970703125, 275.0910949707031, 241.1620635986328, 405.02752685546875, 421.5736083984375, 1140.0440673828125, 418.6727294921875, 188.2532501220703, 933.175537109375, 394.1478576660156, -161.23883056640625, 763.734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000605.npy"} +{"epoch": 0.8883994126284875, "step": 606, "batch_size": 64, "mean": 406.254150390625, "std": 578.8989868164062, "min": -604.4510498046875, "p10": -307.6711807250976, "median": 307.5107116699219, "p90": 1068.1832275390625, "max": 2125.666259765625, "pos_frac": 0.75, "sample": [-492.88165283203125, 37.690032958984375, 138.55960083007812, -334.6037292480469, 334.2984313964844, 927.8582763671875, 287.6645812988281, 187.1148681640625, 1117.270751953125, 786.7747192382812, 153.0751495361328, 2125.666259765625, 901.281005859375, -34.853271484375, -515.354736328125, 233.05242919921875, 504.228271484375, 232.659423828125, 245.68008422851562, 1323.093994140625, 841.4561767578125, -415.386962890625, 1570.7025146484375, 22.112632751464844, 938.246337890625, 700.8404541015625, 561.2489013671875, 187.47901916503906, -91.4248275756836, -244.8285675048828, 507.64031982421875, 657.0533447265625, -441.51507568359375, 288.4375305175781, -561.3220825195312, 375.30322265625, 210.76992797851562, 692.5697631835938, -105.01212310791016, -105.3511962890625, -604.4510498046875, -126.30896759033203, 2109.659423828125, 1073.4830322265625, 251.85549926757812, 690.0985107421875, 79.93903350830078, 505.177001953125, 941.6738891601562, 322.89544677734375, -22.877365112304688, 863.2105712890625, -38.91133117675781, 292.1259765625, 1055.8170166015625, 1524.1204833984375, 354.000244140625, 670.8212280273438, -3.2524490356445312, 276.31158447265625, 528.5940551757812, 601.5214233398438, 562.0758666992188, 345.3937072753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000606.npy"} +{"epoch": 0.8898678414096917, "step": 607, "batch_size": 64, "mean": 347.606689453125, "std": 581.8700561523438, "min": -1150.8497314453125, "p10": -226.73369750976556, "median": 307.9254608154297, "p90": 1045.9131164550784, "max": 2569.103515625, "pos_frac": 0.8125, "sample": [468.2796630859375, -123.78273010253906, 529.8599243164062, 227.17625427246094, 149.92718505859375, 173.76002502441406, 1065.0953369140625, 430.13909912109375, 4.314788818359375, 1282.115478515625, -305.0166015625, 724.49853515625, 452.3182067871094, -251.39117431640625, 122.2978515625, 1065.883544921875, -663.5620727539062, 194.8248291015625, 377.02850341796875, 80.19928741455078, -169.1995849609375, 145.0402374267578, 376.7221984863281, 475.60595703125, -468.4803466796875, 616.4624633789062, -130.37261962890625, 1512.0107421875, 716.3009033203125, 126.26454162597656, 587.876708984375, 106.89154815673828, 763.1898803710938, 254.52944946289062, 542.3917846679688, 384.15869140625, 1001.1546020507812, 27.287353515625, 1522.5106201171875, 380.5259704589844, -1072.88720703125, 575.39501953125, -167.98724365234375, 533.41064453125, 681.08203125, 91.99237060546875, 3.8278579711914062, 125.93706512451172, -1150.8497314453125, 268.53179931640625, 531.58935546875, 326.1231384277344, -149.95474243164062, 628.614013671875, 249.91604614257812, 49.42222595214844, 289.727783203125, 2569.103515625, 1369.0555419921875, 707.2258911132812, 289.4372253417969, -347.7000732421875, 589.1575927734375, 481.8223876953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000607.npy"} +{"epoch": 0.8913362701908958, "step": 608, "batch_size": 64, "mean": 394.43609619140625, "std": 487.6206359863281, "min": -508.9827575683594, "p10": -264.8615325927734, "median": 387.0048522949219, "p90": 936.8466552734377, "max": 1691.9454345703125, "pos_frac": 0.8125, "sample": [679.5803833007812, 842.6430053710938, 368.36395263671875, 508.54779052734375, -422.0726623535156, 1528.12158203125, -357.0835876464844, 132.4229278564453, -194.17909240722656, 572.2408447265625, 733.6663208007812, 48.07969665527344, 49.997718811035156, 614.7772827148438, -118.80723571777344, 146.0354766845703, -508.9827575683594, 439.86968994140625, -95.03868865966797, 336.0982360839844, 502.6417236328125, 641.241943359375, 88.46562194824219, 754.416259765625, 47.954505920410156, 408.07696533203125, 653.1060791015625, 487.4891662597656, 956.2774047851562, -265.09857177734375, 77.8162841796875, -45.45817565917969, 253.20545959472656, 323.5086975097656, 714.8521728515625, 299.56646728515625, 481.08355712890625, 405.645751953125, 1197.0113525390625, -297.56890869140625, 1690.763916015625, 219.02569580078125, 649.0408935546875, 1022.2266845703125, 1195.794189453125, 313.29534912109375, 63.476531982421875, 815.373046875, 44.253501892089844, -282.3627014160156, 613.103759765625, 3.4816360473632812, 546.12890625, 792.7034301757812, -264.3084411621094, 891.5082397460938, -344.7373352050781, 99.86697387695312, 1691.9454345703125, 730.3302001953125, 749.0147094726562, 515.2033081054688, 199.20346069335938, 301.0662841796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000608.npy"} +{"epoch": 0.8928046989720999, "step": 609, "batch_size": 64, "mean": 509.67462158203125, "std": 781.104736328125, "min": -1132.5072021484375, "p10": -249.16799468994128, "median": 410.7147216796875, "p90": 1216.559851074219, "max": 3841.43115234375, "pos_frac": 0.78125, "sample": [75.50216674804688, 106.74121856689453, 104.2748794555664, -620.065673828125, 254.5305938720703, 408.35894775390625, 1256.4852294921875, 413.07049560546875, 592.2719116210938, -347.44512939453125, 769.5745239257812, 1785.91162109375, 99.28086853027344, 443.3001708984375, 765.9688110351562, -1132.5072021484375, -93.35134887695312, 513.1996459960938, 1036.768798828125, 397.5000915527344, 695.7838134765625, -100.40899658203125, 700.1193237304688, 634.4799194335938, -324.55841064453125, 588.9271240234375, 1317.99755859375, 219.76358032226562, -493.6918029785156, 675.6321411132812, 1643.2335205078125, 635.6482543945312, 2437.844482421875, 548.1668090820312, 2892.26416015625, 940.68701171875, -9.996284484863281, 714.460693359375, -78.18746185302734, 293.6546325683594, 390.2080383300781, 203.03614807128906, 828.570556640625, 3841.43115234375, 778.998291015625, 92.43243408203125, 437.1611633300781, -113.81535339355469, -658.6927490234375, 177.9501190185547, 336.1983642578125, 279.6111145019531, 1102.5706787109375, -39.58665466308594, -4.691215515136719, 1021.1597900390625, 304.7545166015625, 626.838134765625, 656.4592895507812, 1123.400634765625, 269.91357421875, 443.84783935546875, -307.17626953125, 67.40580749511719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000609.npy"} +{"epoch": 0.8942731277533039, "step": 610, "batch_size": 64, "mean": 313.662353515625, "std": 590.554443359375, "min": -1913.3743896484375, "p10": -402.4262023925781, "median": 300.8416290283203, "p90": 922.6317382812505, "max": 1717.2615966796875, "pos_frac": 0.78125, "sample": [592.727294921875, 713.6580810546875, 104.52973937988281, 209.61228942871094, -388.8856201171875, 691.2636108398438, 80.58436584472656, 565.9742431640625, 574.7135620117188, -577.2609252929688, 17.16663360595703, 568.6644287109375, 20.17536163330078, 1283.623779296875, 622.1207275390625, 349.76055908203125, 166.46823120117188, 188.54714965820312, -545.0983276367188, -1913.3743896484375, 469.96917724609375, 1717.2615966796875, 164.77517700195312, 1014.2200927734375, -257.9698486328125, 370.86883544921875, 138.8544921875, -408.22930908203125, 771.8870239257812, 739.3829345703125, 1605.835205078125, -59.35697937011719, 0.6893157958984375, 713.5413818359375, 294.18426513671875, 1571.24853515625, -482.2277526855469, 204.87374877929688, 322.73126220703125, 642.1701049804688, 699.065185546875, 447.83251953125, -41.7586784362793, 735.8605346679688, 138.54672241210938, 248.61370849609375, 972.4064331054688, 142.57513427734375, -115.64279174804688, 337.2257080078125, 34.62281799316406, 609.441650390625, 265.7818298339844, -95.94841003417969, 654.51220703125, 167.88241577148438, 334.23101806640625, 308.12933349609375, -174.32362365722656, 307.4989929199219, -473.7134094238281, -654.47998046875, 806.4907836914062, 1559.859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000610.npy"} +{"epoch": 0.895741556534508, "step": 611, "batch_size": 64, "mean": 539.230224609375, "std": 747.3662719726562, "min": -783.7963256835938, "p10": -258.3432540893554, "median": 423.71192932128906, "p90": 1484.3809936523437, "max": 2606.8134765625, "pos_frac": 0.765625, "sample": [1421.3038330078125, 431.7895812988281, 2606.8134765625, 1660.573974609375, 1155.3416748046875, 605.2615966796875, -89.59998321533203, 1340.6885986328125, 115.6621322631836, 227.79006958007812, 326.1958312988281, 720.27587890625, 14.909374237060547, -147.3780975341797, -712.5304565429688, -228.88319396972656, -360.39324951171875, -783.7963256835938, 278.6047668457031, 1452.8489990234375, -705.1573486328125, 1234.375732421875, 2120.376708984375, -397.92999267578125, 1474.2908935546875, 567.7706298828125, 1.3872451782226562, 998.8410034179688, 574.1875610351562, 49.69572448730469, -93.58013916015625, 1011.6817626953125, 367.8602294921875, -72.81226348876953, 435.95574951171875, 1116.923095703125, 43.51081085205078, 235.4373779296875, -201.54690551757812, 292.0350341796875, 998.3040771484375, 357.14593505859375, 12.928115844726562, 1842.99560546875, 437.0740966796875, 473.9591064453125, -291.8968200683594, 491.2727966308594, 1003.588134765625, -111.83536529541016, 652.3720092773438, -270.968994140625, 262.8825378417969, 415.63427734375, 794.6087646484375, 496.724609375, 349.7413635253906, 1831.208984375, 465.03704833984375, 1488.705322265625, 1182.32568359375, -7.869300842285156, 2526.341796875, 21.67403793334961], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000611.npy"} +{"epoch": 0.8972099853157122, "step": 612, "batch_size": 64, "mean": 412.41900634765625, "std": 568.640625, "min": -819.7246704101562, "p10": -273.43865966796875, "median": 349.06353759765625, "p90": 1082.3628173828126, "max": 2391.05224609375, "pos_frac": 0.8125, "sample": [975.4953002929688, 306.3393249511719, 305.2430725097656, -359.4724426269531, -60.901004791259766, 26.880884170532227, 1017.0647583007812, 356.5807800292969, 429.62811279296875, 459.0221862792969, -624.33447265625, 870.9335327148438, 193.88308715820312, 1038.359375, 834.0338134765625, 943.5825805664062, 553.0277099609375, 230.24267578125, 509.227294921875, 1207.19580078125, 806.9053955078125, 95.96245574951172, -819.7246704101562, 1302.234619140625, 714.7825317382812, -272.1419677734375, 822.4575805664062, -436.515380859375, 609.599853515625, 638.0133056640625, 504.7740478515625, -789.93701171875, 77.11708068847656, 341.5462951660156, 69.30902099609375, -335.85638427734375, 595.5265502929688, 397.0440979003906, 122.244384765625, -158.8853759765625, 135.82237243652344, 153.5783233642578, 153.49261474609375, 209.9410400390625, 2391.05224609375, 62.002262115478516, 807.3770751953125, 1101.221435546875, 1228.467041015625, 260.6278076171875, 163.64271545410156, 785.8681030273438, 1412.4398193359375, 147.82254028320312, -16.919723510742188, -165.54835510253906, 484.75994873046875, 1.0710601806640625, 971.2689819335938, -273.994384765625, 1135.5587158203125, 683.1022338867188, 955.2660522460938, 110.406494140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000612.npy"} +{"epoch": 0.8986784140969163, "step": 613, "batch_size": 64, "mean": 496.83154296875, "std": 602.9065551757812, "min": -862.5313720703125, "p10": -30.466036224365226, "median": 408.52552795410156, "p90": 1198.5684082031253, "max": 2370.504638671875, "pos_frac": 0.84375, "sample": [575.285400390625, 1224.2889404296875, 1342.61181640625, -181.29100036621094, 1064.728515625, 1138.5538330078125, 43.91865921020508, 1134.1727294921875, 932.355224609375, 30.03397560119629, 1443.8812255859375, 206.87265014648438, 689.2914428710938, 236.416748046875, 1555.7479248046875, -320.852783203125, -5.6096343994140625, 423.914306640625, 377.287353515625, -22.865707397460938, 368.9669189453125, 77.55101776123047, 721.6334838867188, 605.06494140625, 1124.0245361328125, 300.6431884765625, 1114.74755859375, 1015.8660278320312, 7.39068603515625, 503.925048828125, 731.5330200195312, 44.131744384765625, -33.72332000732422, 583.618896484375, 364.1392822265625, 1835.427734375, 666.994873046875, 698.2974853515625, 2370.504638671875, 3.8203353881835938, -856.7022094726562, 996.71337890625, 648.3374633789062, 824.103759765625, 620.1129150390625, -862.5313720703125, 179.6953887939453, 1521.9129638671875, -4.473545074462891, 238.28216552734375, 854.08203125, -219.80535888671875, -486.0658874511719, 160.18927001953125, 1107.36669921875, 523.8475341796875, 161.18576049804688, 393.1367492675781, 51.304527282714844, 567.7225952148438, 7.5076141357421875, 96.8602066040039, 187.53683471679688, 93.5982666015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000613.npy"} +{"epoch": 0.9001468428781204, "step": 614, "batch_size": 64, "mean": 485.2371826171875, "std": 682.4478759765625, "min": -758.361083984375, "p10": -258.980694580078, "median": 459.6297912597656, "p90": 1356.8653808593754, "max": 2577.29345703125, "pos_frac": 0.734375, "sample": [209.75865173339844, 436.328857421875, -593.1375732421875, 953.8035888671875, -87.91716003417969, 1282.5189208984375, 14.504886627197266, 387.8717346191406, 504.57025146484375, -11.36208724975586, 1266.9569091796875, 318.17816162109375, 518.0064697265625, 762.8468627929688, 205.36505126953125, -166.0338134765625, 1429.82861328125, 756.4702758789062, 195.97828674316406, -153.9140167236328, 1115.8837890625, -19.107330322265625, 653.4371337890625, 957.71240234375, -645.6848754882812, -118.89542388916016, 482.93072509765625, -108.93716430664062, 614.5933227539062, -296.7484130859375, 582.1989135742188, 1746.148681640625, -170.85601806640625, 781.2136840820312, -542.87548828125, -146.806396484375, 686.0031127929688, -758.361083984375, -42.44862365722656, 724.20703125, 1551.2503662109375, 379.7981262207031, 1568.814208984375, 263.2566223144531, 1118.3031005859375, 1140.1041259765625, 1270.9930419921875, 263.77880859375, 654.5487060546875, 698.1323852539062, 27.539146423339844, 333.8923034667969, 1388.7281494140625, 105.14968872070312, 687.8037109375, 2577.29345703125, 1899.572998046875, 143.2049560546875, -654.8387451171875, -736.7711791992188, 1219.3919677734375, 147.72027587890625, 505.1399841308594, 778.14404296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000614.npy"} +{"epoch": 0.9016152716593245, "step": 615, "batch_size": 64, "mean": 406.22174072265625, "std": 654.9724731445312, "min": -1847.3687744140625, "p10": -214.1412460327148, "median": 258.9588394165039, "p90": 1132.911572265625, "max": 2489.979736328125, "pos_frac": 0.8125, "sample": [147.51516723632812, 718.9938354492188, 177.83123779296875, 1455.82666015625, 228.41734313964844, 1239.3704833984375, -25.472152709960938, 63.99677276611328, 41.19873046875, 99.28195190429688, 937.6357421875, 273.4111328125, 242.6700897216797, -334.49951171875, 803.4951171875, -446.5713195800781, 965.2835083007812, -1847.3687744140625, 245.18934631347656, 378.6955261230469, 272.72833251953125, 159.73095703125, 2023.7950439453125, 163.40301513671875, 72.68733978271484, 1019.9949951171875, -165.84683227539062, 324.89373779296875, 508.8517761230469, 99.55209350585938, 393.31158447265625, 910.7526245117188, 649.533447265625, 979.2493896484375, 1332.089599609375, 3.7653579711914062, 180.7213592529297, 113.29153442382812, -412.349609375, -115.71601104736328, -831.2781372070312, 1050.5025634765625, 1139.524169921875, 52.07965087890625, 1592.116943359375, 533.5494995117188, 555.4909057617188, 1117.482177734375, 155.279296875, 481.65325927734375, 947.5741577148438, -234.83885192871094, 194.80592346191406, -23.323333740234375, 725.5309448242188, -395.5334167480469, 222.37359619140625, 380.35418701171875, 465.3711853027344, 752.6765747070312, -130.9123077392578, 2489.979736328125, 741.4156494140625, 136.97776794433594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000615.npy"} +{"epoch": 0.9030837004405287, "step": 616, "batch_size": 64, "mean": 337.239990234375, "std": 563.5425415039062, "min": -717.1775512695312, "p10": -348.5673065185547, "median": 300.2670135498047, "p90": 1160.3758422851565, "max": 1729.2728271484375, "pos_frac": 0.765625, "sample": [902.2392578125, 13.269195556640625, 108.00666046142578, 740.7044067382812, 489.4858093261719, 60.813453674316406, 312.44921875, -233.23129272460938, 329.7800598144531, 1188.768798828125, 1333.463134765625, 603.9046020507812, 186.86624145507812, 92.10966491699219, -666.3921508789062, 1191.8516845703125, 421.3175964355469, 288.0848083496094, 122.58983612060547, -99.8091049194336, 1424.3074951171875, 540.03564453125, 483.8152160644531, 458.3050842285156, -323.7012634277344, -613.9149169921875, 448.92022705078125, 279.16009521484375, -61.683311462402344, 1271.2506103515625, 1641.8692626953125, 167.45335388183594, -490.96575927734375, 16.564476013183594, 34.08823776245117, -681.6057739257812, -161.05523681640625, 195.39382934570312, 933.5630493164062, 664.8076782226562, 423.491943359375, 395.37518310546875, 401.86151123046875, 593.0762329101562, -717.1775512695312, 998.6395263671875, 177.0910186767578, -319.83160400390625, 167.28863525390625, 709.455322265625, 384.844970703125, -306.4184875488281, 1729.2728271484375, 380.6539306640625, 757.5927734375, 145.12655639648438, 281.096923828125, 144.1573028564453, -296.6211242675781, 626.5880737304688, 973.114990234375, -359.22418212890625, 1094.1256103515625, -413.1029968261719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000616.npy"} +{"epoch": 0.9045521292217328, "step": 617, "batch_size": 64, "mean": 239.33428955078125, "std": 549.9153442382812, "min": -1120.830810546875, "p10": -401.5051757812499, "median": 264.7053527832031, "p90": 685.8989868164062, "max": 2128.8388671875, "pos_frac": 0.765625, "sample": [334.2320556640625, 141.29241943359375, -216.438720703125, 325.4556579589844, 400.0152282714844, -227.56854248046875, 29.7029972076416, -597.765625, 658.9923095703125, 165.50831604003906, -231.17514038085938, 384.59674072265625, 400.2426452636719, 435.49432373046875, 210.4630126953125, 228.491943359375, 368.8731689453125, 120.67527770996094, 46.37770080566406, 1274.209716796875, 678.758544921875, 572.2413330078125, 173.5426025390625, 34.35314178466797, 399.0122985839844, 444.7196960449219, 103.2883529663086, -228.5889434814453, 333.1059265136719, -1120.830810546875, 55.434173583984375, 396.5604553222656, -301.92718505859375, -297.6949157714844, 687.2484741210938, 328.0941162109375, 1077.47216796875, 682.7501831054688, 556.2569580078125, 449.8536071777344, 547.5557250976562, -444.18145751953125, 847.178466796875, 631.9865112304688, 1828.3756103515625, 465.261962890625, 587.1951904296875, -628.6304931640625, 246.89468383789062, 505.32989501953125, -1060.5538330078125, 125.84526062011719, 37.89861297607422, 228.58343505859375, 282.5160217285156, -123.97467041015625, 833.4881591796875, -169.15155029296875, 16.788827896118164, -447.7467041015625, 2128.8388671875, 303.4302978515625, -738.4503173828125, 37.591339111328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000617.npy"} +{"epoch": 0.9060205580029369, "step": 618, "batch_size": 64, "mean": 517.63623046875, "std": 842.433837890625, "min": -1799.8807373046875, "p10": -461.64458923339845, "median": 434.40679931640625, "p90": 1292.820642089844, "max": 3091.216796875, "pos_frac": 0.78125, "sample": [-1799.8807373046875, 850.16845703125, 248.19720458984375, 321.9898986816406, 94.5259780883789, 261.38519287109375, 138.4156494140625, 66.52226257324219, 2372.493408203125, 929.4354858398438, 246.6401824951172, -956.0972290039062, 838.9257202148438, 205.06068420410156, 1278.69921875, 330.3620910644531, -387.263916015625, 1179.3782958984375, 779.2931518554688, -317.2714538574219, -194.8089599609375, -465.41607666015625, -487.284912109375, 24.700851440429688, -778.8060302734375, -672.1676025390625, 187.99227905273438, 1298.8726806640625, 1047.052978515625, 115.31561279296875, 601.27001953125, 444.89666748046875, 671.1250610351562, 1045.07470703125, 970.9135131835938, -868.1447143554688, -452.8444519042969, 1356.443115234375, 335.5123596191406, 467.6662902832031, 827.3367919921875, 1274.3759765625, 990.1488037109375, 2523.355712890625, 955.9830932617188, -116.71869659423828, 423.91693115234375, 652.585693359375, 330.2161865234375, 3091.216796875, 810.994140625, 41.95256805419922, 1473.2884521484375, 1250.0067138671875, 872.0554809570312, 511.52593994140625, -163.6730194091797, 1118.345947265625, 2489.130615234375, -38.637237548828125, 423.30255126953125, 1047.7628173828125, 654.363525390625, 357.53656005859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000618.npy"} +{"epoch": 0.9074889867841409, "step": 619, "batch_size": 64, "mean": 395.64404296875, "std": 803.299560546875, "min": -1036.502197265625, "p10": -517.3037841796875, "median": 278.3852081298828, "p90": 1407.0930297851564, "max": 3312.255615234375, "pos_frac": 0.671875, "sample": [-1001.739013671875, -615.66162109375, 4.6888427734375, 1165.9327392578125, 1491.5262451171875, -104.2719497680664, 284.3180847167969, 188.25289916992188, 342.08984375, -460.7377014160156, 635.8877563476562, 725.87890625, 406.54150390625, 1813.169189453125, -670.5184936523438, 209.11102294921875, 1204.4560546875, 397.43511962890625, 1136.886474609375, 1058.8902587890625, -762.6334228515625, -728.5419921875, 1298.4576416015625, 1376.9710693359375, -38.22023010253906, -486.4619140625, -68.51534271240234, 560.5504150390625, 638.56005859375, -81.20037841796875, 104.72824096679688, 499.9517517089844, -371.46197509765625, 72.146240234375, 1420.00244140625, -304.5422668457031, -81.1412353515625, -223.3467254638672, 736.522216796875, 582.6229248046875, -58.72341537475586, 1815.119873046875, 580.824951171875, 611.2733154296875, 962.3796997070312, 76.07331848144531, -233.05581665039062, 2386.197998046875, 1560.3009033203125, 272.45233154296875, 98.22614288330078, 229.61807250976562, 404.1402282714844, 3312.255615234375, -1036.502197265625, -530.521728515625, 613.507568359375, -86.626708984375, 1220.609375, -62.61551284790039, 325.0919189453125, 151.133544921875, 7.806585311889648, 345.6692199707031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000619.npy"} +{"epoch": 0.908957415565345, "step": 620, "batch_size": 64, "mean": 367.34222412109375, "std": 607.8278198242188, "min": -1031.25244140625, "p10": -273.6461517333984, "median": 337.4606628417969, "p90": 1327.2459838867187, "max": 1961.2783203125, "pos_frac": 0.765625, "sample": [430.2699279785156, -326.0052490234375, 456.3177490234375, 634.8692626953125, 454.18402099609375, 714.318359375, 193.6746063232422, 246.418212890625, -78.50492095947266, 1355.5078125, -284.5209045410156, 321.17864990234375, -248.271728515625, 1334.3751220703125, 485.4410400390625, 138.67027282714844, 1087.760009765625, 462.2524108886719, -49.700408935546875, 1386.9930419921875, 1961.2783203125, 198.90444946289062, 419.1668701171875, 1041.4583740234375, -525.3696899414062, 195.42025756835938, 600.1824340820312, 222.17105102539062, 172.44175720214844, 80.97421264648438, -134.42919921875, 1612.128173828125, 418.1304931640625, 1699.554443359375, -201.6988525390625, 493.80230712890625, 42.802757263183594, 495.109130859375, 22.793819427490234, 396.33056640625, 413.6600646972656, 469.1828308105469, -217.111572265625, -104.66555786132812, 1310.611328125, 737.2289428710938, 696.7799072265625, -1031.25244140625, 147.35781860351562, 217.41427612304688, -231.3502197265625, -992.1881103515625, 1723.267578125, 328.3575744628906, 236.03330993652344, 346.5637512207031, 462.4892883300781, 128.07339477539062, -486.61993408203125, 436.95379638671875, 273.46160888671875, 961.8853759765625, -637.3980712890625, 394.7880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000620.npy"} +{"epoch": 0.9104258443465492, "step": 621, "batch_size": 64, "mean": 558.34814453125, "std": 721.8560791015625, "min": -1617.6788330078125, "p10": -243.4786209106445, "median": 495.5881805419922, "p90": 1420.5224609375002, "max": 3028.694580078125, "pos_frac": 0.84375, "sample": [671.4598388671875, -415.6578674316406, -809.596435546875, -221.04054260253906, 1820.9010009765625, 43.055213928222656, 108.21387481689453, 609.9071655273438, 685.4673461914062, 960.8378295898438, 835.7540893554688, 323.4281005859375, 310.05255126953125, 861.2349853515625, 265.81097412109375, -343.8250427246094, -273.2986145019531, 863.5491333007812, -1617.6788330078125, 192.82305908203125, 1561.99658203125, 209.94598388671875, 956.2275390625, 510.067138671875, 1180.1644287109375, 3028.694580078125, 170.11239624023438, 1013.1932373046875, 1100.638671875, -90.11532592773438, 1346.9293212890625, 779.0449829101562, 861.4091796875, 330.00048828125, 844.1320190429688, 1662.8973388671875, 181.12039184570312, 686.8932495117188, 422.1890563964844, 481.1092224121094, -109.82328796386719, 1265.5184326171875, -253.09494018554688, 561.9906005859375, 279.4460754394531, 153.54476928710938, 1452.0623779296875, 314.3838806152344, 1310.0755615234375, 1213.421142578125, -953.5543212890625, 340.11663818359375, 95.95015716552734, 219.28384399414062, 320.37738037109375, 664.1293334960938, 906.5386962890625, 1597.6578369140625, 219.14456176757812, 359.59320068359375, 1825.7236328125, 931.4505615234375, 706.444091796875, 205.84910583496094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000621.npy"} +{"epoch": 0.9118942731277533, "step": 622, "batch_size": 64, "mean": 399.9915466308594, "std": 617.837158203125, "min": -1051.384033203125, "p10": -143.3406066894531, "median": 375.58009338378906, "p90": 865.0925354003906, "max": 2674.97265625, "pos_frac": 0.828125, "sample": [681.3710327148438, 367.8480529785156, -153.46505737304688, 383.97344970703125, 568.9755859375, 372.01019287109375, -701.5899658203125, 379.1499938964844, 2464.942138671875, 593.6321411132812, 74.81214141845703, -296.558349609375, 81.40437316894531, -332.4693603515625, -26.586761474609375, 519.49853515625, 169.3922882080078, 389.2703857421875, 73.90090942382812, 260.182373046875, 1204.8309326171875, -10.2364501953125, 492.635498046875, 2674.97265625, 770.2977905273438, 58.097991943359375, 619.0169677734375, 754.8333740234375, 204.23133850097656, -1036.060546875, 232.00250244140625, 23.003662109375, 866.0392456054688, 201.61985778808594, 272.13616943359375, 862.883544921875, 487.748779296875, -480.16119384765625, 1065.1339111328125, 545.34521484375, 786.4171142578125, 852.911865234375, 15.196388244628906, 727.1640625, 216.04537963867188, -50.89361572265625, 388.3539123535156, -119.71688842773438, 59.80882263183594, 297.4103698730469, 1510.9970703125, 455.98040771484375, 746.290771484375, 650.3068237304688, 613.6019287109375, 681.0652465820312, -1051.384033203125, 26.352149963378906, 126.96998596191406, 1447.14404296875, 659.925537109375, 567.5150756835938, 9.470584869384766, 304.4610595703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000622.npy"} +{"epoch": 0.9133627019089574, "step": 623, "batch_size": 64, "mean": 310.7744445800781, "std": 610.7889404296875, "min": -1160.532958984375, "p10": -247.31089935302734, "median": 235.0264129638672, "p90": 1060.4595336914067, "max": 2687.61669921875, "pos_frac": 0.703125, "sample": [52.20013427734375, 255.85577392578125, -788.2537231445312, 1320.3529052734375, -172.18096923828125, -15.155529022216797, 700.5990600585938, 57.288604736328125, 834.7266845703125, 36.3392333984375, 1420.310546875, 298.7559509277344, 607.5829467773438, 97.34654235839844, 699.8550415039062, -248.03294372558594, 746.6971435546875, 370.00347900390625, -275.55975341796875, 124.6673583984375, -151.3341522216797, 489.8278503417969, 1212.74462890625, -172.94384765625, -1160.532958984375, 117.8956298828125, 332.0989990234375, -694.1296997070312, 422.79913330078125, 593.3983154296875, -245.62612915039062, 417.7452087402344, -80.69344329833984, -90.2745132446289, -160.12808227539062, 96.26651000976562, 941.9410400390625, 689.7399291992188, 840.71533203125, 227.45669555664062, 157.01026916503906, -638.92333984375, 341.0033264160156, 1358.0479736328125, -19.388397216796875, -61.9869270324707, 2687.61669921875, 513.9531860351562, -28.784404754638672, -431.0439147949219, 1111.253173828125, 565.08154296875, 258.09991455078125, 298.56549072265625, 242.59613037109375, 1746.29736328125, 102.55712890625, 117.11112976074219, 268.19140625, 27.413402557373047, 531.7799072265625, 213.0025634765625, 781.988037109375, -2.24041748046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000623.npy"} +{"epoch": 0.9148311306901615, "step": 624, "batch_size": 64, "mean": 469.9930419921875, "std": 565.01025390625, "min": -1042.8865966796875, "p10": -211.3280303955078, "median": 496.0617218017578, "p90": 1157.1246337890627, "max": 1480.5223388671875, "pos_frac": 0.78125, "sample": [603.2628784179688, -355.16839599609375, 676.2255859375, 332.902099609375, 498.96466064453125, 973.212890625, 1098.94482421875, 1480.5223388671875, 1445.392578125, 194.6236572265625, 179.71029663085938, -35.64569091796875, 173.60592651367188, 1182.058837890625, 919.2392578125, -83.98236083984375, 583.5763549804688, 768.9659423828125, 1223.736083984375, -979.2394409179688, 88.95570373535156, 277.6854248046875, 49.32178497314453, 856.0283203125, 379.9863586425781, 191.42111206054688, 818.1830444335938, 815.9992065429688, 1079.0445556640625, -215.97055053710938, 154.7240753173828, 1084.3604736328125, -200.4954833984375, 314.9439697265625, -444.6400146484375, 976.4934692382812, 450.37103271484375, -90.92559814453125, 948.165771484375, 386.902099609375, -1042.8865966796875, 75.0822525024414, -27.1044921875, 681.7864990234375, 1081.357421875, 1045.4986572265625, 288.09698486328125, 765.3643798828125, 943.4393310546875, 493.1587829589844, 189.0484161376953, -432.59912109375, -31.28779411315918, 1184.561767578125, 551.1448974609375, 603.4432983398438, -331.6918640136719, 604.1046142578125, -65.28553771972656, 726.6654052734375, 1307.4752197265625, 201.89385986328125, 1420.387451171875, 1046.437744140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000624.npy"} +{"epoch": 0.9162995594713657, "step": 625, "batch_size": 64, "mean": 511.17987060546875, "std": 543.3054809570312, "min": -349.70208740234375, "p10": -21.310681152343733, "median": 424.667236328125, "p90": 1208.5734497070314, "max": 2218.273681640625, "pos_frac": 0.875, "sample": [93.92330169677734, 88.19876098632812, -270.9192199707031, 929.669921875, 248.81365966796875, 50.5760498046875, 826.6494140625, 1223.101806640625, 1012.47705078125, 948.3060302734375, 54.220458984375, 489.3820495605469, 292.09503173828125, 95.75889587402344, 1681.142822265625, 526.1766967773438, 209.2628936767578, 55.09059524536133, 2218.273681640625, 489.82208251953125, 2005.6119384765625, 224.1295623779297, 152.95401000976562, 624.9058837890625, 132.53237915039062, 745.7637939453125, 1782.684326171875, -29.424240112304688, 217.2412872314453, 320.1279296875, 466.35943603515625, 651.46484375, 456.4030456542969, -349.70208740234375, 561.5845947265625, -97.19000244140625, 83.87812042236328, 647.6220092773438, 310.5745849609375, 746.921875, 1174.6739501953125, 418.5238037109375, 1351.6466064453125, 421.2105712890625, 161.5283966064453, 444.9150695800781, -60.37565612792969, 428.1239013671875, 636.472412109375, 783.956298828125, 89.84049224853516, -2.3790435791015625, 938.4439697265625, 371.03875732421875, 55.28578567504883, 174.85671997070312, 166.51625061035156, 468.7269592285156, 1156.3111572265625, -334.2077941894531, 677.4659423828125, -85.93017578125, 1374.892333984375, 987.5092163085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000625.npy"} +{"epoch": 0.9177679882525698, "step": 626, "batch_size": 64, "mean": 288.66387939453125, "std": 668.381591796875, "min": -2632.54296875, "p10": -395.3630432128905, "median": 273.9818801879883, "p90": 934.4569091796877, "max": 2064.61083984375, "pos_frac": 0.71875, "sample": [588.3182373046875, -14.62030029296875, 504.3626708984375, 348.51800537109375, 126.19522094726562, 233.9840545654297, 715.943603515625, 318.3312072753906, 886.7108764648438, -2632.54296875, 1644.6961669921875, 327.3091735839844, 1485.3892822265625, 2064.61083984375, -621.4680786132812, 192.93698120117188, 244.9019775390625, -465.8954162597656, -4.291290283203125, -154.1996307373047, 18.12298583984375, 246.9864959716797, 456.648681640625, 100.72803497314453, 26.513139724731445, 18.297714233398438, 155.64532470703125, -656.548095703125, 434.7272644042969, 629.3681640625, -125.67192077636719, 641.4161987304688, -543.0438232421875, -694.8964233398438, 587.2951049804688, 1371.68701171875, 862.9049682617188, 357.3697204589844, 365.28277587890625, 657.2705078125, 562.9526977539062, -230.78750610351562, 954.9194946289062, 477.41961669921875, 1768.8349609375, 704.3167114257812, -113.42195892333984, 100.0007553100586, 104.60275268554688, 390.82012939453125, -54.26708984375, 816.7484130859375, 300.9772644042969, 1147.6005859375, -17.141265869140625, 205.83763122558594, 687.6873168945312, 346.7723388671875, -667.9185791015625, -85.50669860839844, 138.98130798339844, -47.003684997558594, 306.9683837890625, -24.199565887451172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000626.npy"} +{"epoch": 0.9192364170337739, "step": 627, "batch_size": 64, "mean": 585.8370361328125, "std": 846.9072265625, "min": -720.9466552734375, "p10": -175.76781005859374, "median": 423.5992431640625, "p90": 1452.641650390625, "max": 4083.74365234375, "pos_frac": 0.75, "sample": [584.64892578125, 247.742919921875, 1164.2430419921875, 481.7475891113281, -167.52285766601562, -68.28987121582031, 118.75870513916016, 639.830322265625, 566.4345092773438, 428.4001770019531, 1254.297119140625, -484.1510925292969, 208.39404296875, 489.5845642089844, -104.73706817626953, 1034.021484375, -145.17251586914062, 839.8618774414062, 950.564697265625, 1129.8133544921875, -718.6619873046875, -179.30136108398438, 621.671142578125, -20.13581657409668, 1745.09326171875, 194.64895629882812, 418.7983093261719, 2497.11083984375, 975.560791015625, 382.40338134765625, 1951.179443359375, 74.00849914550781, 570.2305297851562, 1204.4278564453125, 538.8013305664062, 1721.9776611328125, 1426.6854248046875, 177.81655883789062, 535.572265625, -152.49774169921875, 118.50830841064453, -21.936525344848633, 3324.06494140625, 1463.7657470703125, 880.61767578125, -227.83651733398438, 4083.74365234375, -272.806396484375, 503.8492126464844, -83.88650512695312, 1004.2366333007812, 399.8402404785156, 899.7656860351562, -720.9466552734375, 267.9881591796875, 312.7360534667969, 430.968994140625, 230.3876495361328, -405.8827819824219, 1299.891845703125, -39.713356018066406, 254.6306915283203, 295.4400939941406, 362.2850341796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000627.npy"} +{"epoch": 0.920704845814978, "step": 628, "batch_size": 64, "mean": 441.0691833496094, "std": 646.2359008789062, "min": -798.4420166015625, "p10": -342.1834686279296, "median": 374.0031280517578, "p90": 1329.3171508789062, "max": 2156.11328125, "pos_frac": 0.703125, "sample": [339.4017028808594, -22.77862548828125, 408.60455322265625, 1312.0416259765625, 1497.44482421875, 164.59414672851562, 671.3839111328125, 136.02764892578125, -205.71653747558594, -48.38592529296875, 142.75387573242188, 172.24847412109375, -798.4420166015625, 1054.90625, 1093.9254150390625, 1195.411376953125, -58.06379699707031, -262.166748046875, 1629.747802734375, 504.72052001953125, 2156.11328125, 535.4434204101562, 246.80404663085938, -202.4378662109375, -62.018272399902344, -376.4763488769531, 511.3955078125, 911.7595825195312, -433.889404296875, -438.1736755371094, 959.4857788085938, 280.9152526855469, 794.956787109375, 1313.879638671875, 1282.48583984375, 867.169189453125, 464.1830749511719, 731.4976806640625, 152.76780700683594, 179.71575927734375, -144.94668579101562, 545.58984375, 511.579833984375, 113.03229522705078, 1550.1636962890625, 1349.970947265625, 1335.9332275390625, 240.77896118164062, -392.48748779296875, -748.3157348632812, -107.02169799804688, -168.6269073486328, 1337.1883544921875, 841.01025390625, -702.1393432617188, -32.40399932861328, 1086.311767578125, 430.8770751953125, 525.2048950195312, 131.45330810546875, 1174.892333984375, 564.8997192382812, -40.95472717285156, 23.203845977783203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000628.npy"} +{"epoch": 0.922173274596182, "step": 629, "batch_size": 64, "mean": 401.8076477050781, "std": 540.7566528320312, "min": -1010.6290893554688, "p10": -263.29606781005856, "median": 414.2801208496094, "p90": 1145.1170532226563, "max": 1719.5050048828125, "pos_frac": 0.796875, "sample": [218.1041259765625, 1234.119140625, -338.8468017578125, -456.2355041503906, 375.7115478515625, 415.073486328125, 398.194091796875, 789.2224731445312, 1264.0523681640625, 532.0385131835938, 542.4671020507812, 784.045654296875, -85.9698257446289, 447.6711120605469, 1240.77197265625, -170.27667236328125, 187.35296630859375, 31.300968170166016, 34.59259796142578, 581.2565307617188, 30.9556884765625, 1117.8179931640625, 423.6675720214844, -204.19081115722656, 166.96852111816406, 489.9734191894531, -786.6197509765625, -131.54632568359375, 1104.654052734375, 744.6605224609375, 73.09488677978516, 819.0254516601562, 1544.96533203125, 1719.5050048828125, 273.5823059082031, 135.36045837402344, -1010.6290893554688, 1316.60791015625, 946.9998779296875, -288.62689208984375, -290.97113037109375, -660.0709838867188, 342.22088623046875, 368.78424072265625, 512.386474609375, 1156.816650390625, 305.2991027832031, 506.95025634765625, 699.6610717773438, -27.59722328186035, 813.7218017578125, 369.9530029296875, 150.97265625, 644.91357421875, 207.73577880859375, 523.6707763671875, 33.31330108642578, 655.6342163085938, 897.4722900390625, 515.9534301757812, 413.48675537109375, 543.9052734375, 609.99658203125, -89.39212036132812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000629.npy"} +{"epoch": 0.9236417033773862, "step": 630, "batch_size": 64, "mean": 228.9735565185547, "std": 694.4979248046875, "min": -1351.7347412109375, "p10": -466.42320251464844, "median": 164.32184600830078, "p90": 994.2103881835939, "max": 3232.13671875, "pos_frac": 0.6875, "sample": [1136.8509521484375, -1210.330810546875, -564.8382568359375, 353.3958740234375, -230.17843627929688, 691.6629028320312, 64.95207977294922, -231.83599853515625, 2354.0654296875, -242.66995239257812, 666.5784912109375, -104.56739807128906, 189.66879272460938, 1010.6966552734375, 824.6498413085938, -1351.7347412109375, 177.49514770507812, -587.864013671875, 307.4931640625, 203.51744079589844, 403.4898376464844, -172.4143829345703, 144.17381286621094, 1461.5733642578125, -195.4530792236328, 3232.13671875, 63.26640319824219, 955.742431640625, 131.36282348632812, -235.3642120361328, 34.04718017578125, -319.13494873046875, 285.14593505859375, -541.423583984375, 327.4678955078125, -469.1736755371094, -169.24513244628906, 352.2942810058594, 156.44203186035156, 408.16204833984375, 173.20123291015625, -192.38519287109375, 291.3041687011719, -659.4554443359375, 165.14794921875, 370.54034423828125, 133.87109375, 284.37811279296875, 680.083740234375, -63.96253204345703, 78.33467102050781, 1034.559326171875, 657.6636352539062, 163.49574279785156, -299.3035888671875, 40.64881896972656, 401.9081726074219, 123.18809509277344, 633.4972534179688, -460.00543212890625, 1191.14013671875, 196.19834899902344, 368.044921875, 32.11102294921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000630.npy"} +{"epoch": 0.9251101321585903, "step": 631, "batch_size": 64, "mean": 368.1886291503906, "std": 666.9747924804688, "min": -1875.023193359375, "p10": -419.7190795898437, "median": 354.68621826171875, "p90": 1190.35810546875, "max": 1934.2716064453125, "pos_frac": 0.71875, "sample": [1188.35791015625, 974.5239868164062, -424.4015808105469, 352.41717529296875, -367.99151611328125, 496.7108154296875, -345.1336364746094, -143.51409912109375, -230.02053833007812, -514.709716796875, 1641.197265625, 1238.2806396484375, -474.5953063964844, 534.4418334960938, -558.0241088867188, 152.2227325439453, 572.9387817382812, 919.3751220703125, 33.098243713378906, -408.7932434082031, 156.20724487304688, -33.66636657714844, 282.6677551269531, -649.2046508789062, 1010.0526123046875, 66.46255493164062, 120.8396987915039, -218.2710418701172, 801.364990234375, 792.38037109375, -1875.023193359375, 496.9191589355469, 1934.2716064453125, 711.8433227539062, 356.95526123046875, 606.7335815429688, 333.537353515625, -163.67787170410156, -162.86724853515625, 1191.21533203125, 135.44607543945312, 635.1707763671875, 1575.0936279296875, 924.7788696289062, 621.130859375, -864.1566772460938, 1148.8072509765625, 345.1605224609375, 1495.197998046875, 668.2318725585938, 54.69158172607422, 631.8358764648438, -162.26177978515625, 413.3781433105469, 361.32147216796875, 206.567626953125, 1212.473876953125, 701.2156982421875, 1008.631103515625, -84.11922454833984, 230.971435546875, 873.3966064453125, 263.0690612792969, 772.9176635742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000631.npy"} +{"epoch": 0.9265785609397944, "step": 632, "batch_size": 64, "mean": 392.960205078125, "std": 649.0064086914062, "min": -1274.6002197265625, "p10": -334.2511871337891, "median": 370.2868957519531, "p90": 1146.819812011719, "max": 2326.781982421875, "pos_frac": 0.734375, "sample": [655.0185546875, -763.1083374023438, 287.141357421875, 382.36773681640625, 634.414306640625, -1274.6002197265625, 1166.69140625, 1100.4527587890625, 1557.8895263671875, 998.0347290039062, 770.9824829101562, 680.083251953125, 227.906005859375, 903.5234375, 203.45938110351562, 213.00601196289062, 580.9195556640625, 456.0211486816406, 46.60622787475586, 95.4214859008789, 1631.827880859375, 355.5776062011719, 724.003173828125, 489.09185791015625, -501.91937255859375, 1172.82373046875, 461.1761474609375, -319.2027893066406, 440.35491943359375, -234.6361083984375, -1100.3720703125, 343.8437194824219, 1083.7977294921875, 1081.5115966796875, 580.6038818359375, 772.1907348632812, 1044.1600341796875, 762.4081420898438, 382.2241516113281, -327.6755065917969, 1308.349365234375, -96.12842559814453, -234.4945068359375, -113.51907348632812, 622.5343627929688, 289.8134765625, 691.2138671875, 183.60601806640625, 1172.246826171875, 2326.781982421875, 1000.6231689453125, -118.65267944335938, -500.564697265625, 133.48492431640625, 129.62620544433594, -337.0693359375, 358.3496398925781, 308.5388488769531, -556.283935546875, -2.5679931640625, 89.61485290527344, -226.21470642089844, -117.81819915771484, 1073.9622802734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000632.npy"} +{"epoch": 0.9280469897209985, "step": 633, "batch_size": 64, "mean": 281.1269226074219, "std": 587.009521484375, "min": -1113.125244140625, "p10": -364.37037658691406, "median": 248.6222915649414, "p90": 1121.1532958984378, "max": 1527.3831787109375, "pos_frac": 0.703125, "sample": [-330.1580810546875, 1382.63330078125, 861.89208984375, -719.4965209960938, 472.54248046875, 326.23187255859375, -390.79339599609375, -83.38284301757812, 1227.74462890625, 84.56352996826172, 241.42691040039062, 153.59283447265625, 401.77899169921875, -1113.125244140625, 618.1500244140625, 134.8089141845703, 165.20654296875, 1174.6845703125, 321.8448791503906, -258.0446472167969, -125.01355743408203, 351.2825622558594, 308.072265625, -39.99095916748047, 718.652099609375, 121.13540649414062, -59.54270935058594, -188.91558837890625, -894.1070556640625, 1009.2698974609375, 350.3699951171875, 238.33013916015625, 1161.1319580078125, 503.9486999511719, 822.7189331054688, -66.16517639160156, -330.4901123046875, -1070.8818359375, -372.6436767578125, 118.46399688720703, 1394.783935546875, 453.6856384277344, 249.75897216796875, -803.6199951171875, 600.904052734375, 649.1912231445312, 502.359130859375, 1527.3831787109375, 981.1090087890625, 1027.8697509765625, 123.94827270507812, 250.826171875, -345.0660095214844, 969.7202758789062, 247.48561096191406, -27.865463256835938, 161.75453186035156, 451.91552734375, 1330.98193359375, 87.37796020507812, 568.391357421875, -5.664031982421875, 73.73303985595703, 293.4326171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000633.npy"} +{"epoch": 0.9295154185022027, "step": 634, "batch_size": 64, "mean": 243.2664337158203, "std": 595.4829711914062, "min": -964.5828247070312, "p10": -458.78165283203117, "median": 183.95001220703125, "p90": 1034.9295654296875, "max": 2206.643798828125, "pos_frac": 0.65625, "sample": [194.1242218017578, 820.3815307617188, -260.9128112792969, 1562.34375, -575.2748413085938, 50.966552734375, 29.2073917388916, 181.59884643554688, 291.8164978027344, 1342.4517822265625, 34.90374755859375, 357.03173828125, -96.2139663696289, 288.64410400390625, 657.1220092773438, 186.30117797851562, 294.47540283203125, -169.05853271484375, -715.330322265625, 1010.761962890625, 1142.0767822265625, -221.4523162841797, -190.33070373535156, 102.84768676757812, -45.472808837890625, 60.27727508544922, 73.50735473632812, 694.8583984375, 471.4665222167969, 29.166492462158203, 2206.643798828125, 312.64361572265625, 616.9056396484375, -964.5828247070312, 1045.287109375, 1198.1846923828125, 640.5433349609375, -381.1195068359375, -492.0654296875, 200.73941040039062, 768.620361328125, -128.73023986816406, -103.70075225830078, -903.64794921875, 218.42747497558594, -587.1417846679688, 48.34391403198242, -507.1888427734375, -178.0030517578125, 913.5374755859375, -357.47113037109375, 767.09619140625, 448.048095703125, 1318.26123046875, 407.2522888183594, 91.57481384277344, 264.5868835449219, 681.6396484375, -89.65030670166016, 248.56466674804688, -89.86492919921875, -20.767745971679688, -284.6392517089844, 658.4407958984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000634.npy"} +{"epoch": 0.9309838472834068, "step": 635, "batch_size": 64, "mean": 357.1339111328125, "std": 645.9878540039062, "min": -1089.2183837890625, "p10": -296.12164764404287, "median": 327.056640625, "p90": 997.9147949218752, "max": 2458.15478515625, "pos_frac": 0.71875, "sample": [828.524169921875, 121.41004943847656, 2458.15478515625, -102.733642578125, 426.0731506347656, -74.94026947021484, 133.29058837890625, 22.199230194091797, -11.381210327148438, 497.85308837890625, 168.88519287109375, 1430.2628173828125, -49.11534118652344, 838.5813598632812, -531.330322265625, 377.959228515625, 458.7346496582031, -20.893035888671875, 831.46533203125, 27.966079711914062, 379.1455078125, 161.54954528808594, 704.7122802734375, 942.8863525390625, 332.815185546875, 51.406166076660156, 11.782608032226562, 384.6563720703125, -198.48353576660156, 1968.912841796875, -96.3713150024414, 376.761474609375, 338.2779541015625, 1320.3216552734375, 703.979736328125, -93.32176971435547, 91.99279022216797, 0.4330253601074219, 506.1995544433594, 1021.4984130859375, -182.9170684814453, 897.7877197265625, 882.7171630859375, 377.7978820800781, -714.217041015625, -182.82333374023438, 639.0450439453125, -48.858253479003906, -337.966552734375, 670.1234130859375, -350.78558349609375, -398.10479736328125, -1089.2183837890625, 757.6887817382812, 1218.53662109375, 321.298095703125, 369.7298889160156, 675.698974609375, -901.7706298828125, 2294.2431640625, 155.302734375, 470.0411071777344, 293.62518310546875, 299.4766845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000635.npy"} +{"epoch": 0.9324522760646109, "step": 636, "batch_size": 64, "mean": 424.78887939453125, "std": 662.819091796875, "min": -1038.6435546875, "p10": -231.4474411010742, "median": 330.1589050292969, "p90": 988.5485290527345, "max": 3623.93701171875, "pos_frac": 0.734375, "sample": [44.768890380859375, -369.0714111328125, 90.27035522460938, -166.59811401367188, 314.2300720214844, -434.299072265625, 394.2744140625, 325.96807861328125, 544.0062255859375, 112.95577239990234, 802.123291015625, -349.38800048828125, 381.6951904296875, 1122.74072265625, -23.939491271972656, 1012.7492065429688, -44.9119873046875, 2296.793212890625, 1189.981201171875, 530.6761474609375, 217.15773010253906, 992.6016235351562, -164.42996215820312, -280.279296875, 696.9256591796875, 273.7805480957031, 979.09130859375, 382.71356201171875, 179.32322692871094, 3623.93701171875, 59.86192321777344, -123.49537658691406, 948.8117065429688, 1278.1778564453125, -366.2297058105469, 939.6123046875, 906.654296875, 396.8788757324219, 628.517333984375, -216.116943359375, 694.5601806640625, 221.9744110107422, 768.6856689453125, 851.9625244140625, 653.5189819335938, 303.31732177734375, 749.762939453125, -101.41378784179688, 276.56353759765625, -200.14532470703125, 181.2048797607422, -3.170440673828125, -1038.6435546875, 334.3497314453125, 311.5173034667969, -238.0176544189453, 930.3839721679688, 758.1934814453125, 316.3987121582031, 456.9288330078125, -6.5925140380859375, 378.67034912109375, 492.6803894042969, 965.2800903320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000636.npy"} +{"epoch": 0.933920704845815, "step": 637, "batch_size": 64, "mean": 469.54571533203125, "std": 647.8881225585938, "min": -1010.3731079101562, "p10": -86.6541633605957, "median": 321.471923828125, "p90": 1427.0541992187502, "max": 2398.585693359375, "pos_frac": 0.8125, "sample": [124.36852264404297, 110.16683197021484, 548.17529296875, 9.922660827636719, 56.48988723754883, 1010.640625, 306.37359619140625, 63.7442626953125, 619.8492431640625, 1275.21142578125, 753.1951293945312, -1010.3731079101562, 75.1226806640625, 247.29159545898438, -238.3695526123047, 633.9061889648438, 886.4509887695312, 1456.1717529296875, 1035.8104248046875, 640.14208984375, 1464.3609619140625, 268.28680419921875, 160.12188720703125, 2398.585693359375, 99.57669067382812, 533.4848022460938, -254.18344116210938, 3.8405418395996094, 287.1208190917969, -53.352203369140625, 353.1391296386719, -176.58389282226562, -154.435302734375, 612.3173217773438, 403.0963439941406, -87.09999084472656, 336.57025146484375, 725.2593383789062, 841.0733642578125, 259.625244140625, 618.946044921875, 359.40521240234375, 250.75765991210938, 996.564453125, 804.864013671875, 575.2191772460938, 1443.951904296875, -1.4409103393554688, 1746.1158447265625, -955.2022705078125, 1173.6678466796875, -33.82252502441406, 360.188232421875, 1632.08349609375, 406.13519287109375, 146.89552307128906, 51.55775451660156, 1387.626220703125, -48.53257751464844, 2348.480712890625, 14.10965347290039, 108.58262634277344, 125.291015625, -85.61389923095703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000637.npy"} +{"epoch": 0.9353891336270191, "step": 638, "batch_size": 64, "mean": 366.377685546875, "std": 718.7194213867188, "min": -3386.078125, "p10": -92.91921844482422, "median": 267.38604736328125, "p90": 1148.499267578125, "max": 1764.4619140625, "pos_frac": 0.734375, "sample": [1292.222900390625, -834.0191040039062, -3386.078125, 935.1614379882812, 276.71759033203125, 138.53598022460938, 35.4598388671875, -25.5557861328125, 1133.932373046875, 328.2236633300781, 748.46337890625, 1346.3824462890625, 57.68748474121094, 720.5791625976562, 253.71775817871094, 483.575439453125, 679.1924438476562, -502.2288513183594, 529.10546875, -21.097454071044922, 442.9212951660156, 42.59923553466797, -239.82781982421875, 122.22199249267578, 308.70782470703125, 947.4032592773438, 1523.96435546875, -18.813678741455078, 663.47265625, 236.47274780273438, 741.6259765625, 258.05450439453125, 712.8362426757812, -77.7247543334961, -92.94639587402344, 934.2969970703125, 970.3873291015625, -55.67277145385742, 347.4693908691406, -341.5472717285156, -24.380231857299805, 419.2364196777344, 237.29202270507812, 1764.4619140625, -92.63549041748047, -25.728044509887695, 946.6337890625, 120.95332336425781, 613.1932983398438, 1154.5538330078125, -45.816497802734375, -92.85580444335938, 1018.3814697265625, 497.44000244140625, 176.98361206054688, 87.94857788085938, 41.290016174316406, 51.324928283691406, 852.3038940429688, 1134.3719482421875, -137.96408081054688, 104.31053924560547, 1655.59814453125, 1375.3963623046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000638.npy"} +{"epoch": 0.9368575624082232, "step": 639, "batch_size": 64, "mean": 166.44647216796875, "std": 696.6668701171875, "min": -1281.532958984375, "p10": -663.3500122070312, "median": 166.78636169433594, "p90": 1105.3677612304691, "max": 1683.3221435546875, "pos_frac": 0.625, "sample": [1542.2178955078125, -858.4353637695312, -515.956787109375, 1179.397216796875, 1003.910400390625, -420.91741943359375, 309.1643371582031, 382.9885559082031, -572.0902709960938, 838.7384033203125, 1634.1431884765625, -56.455936431884766, 388.2599792480469, 603.3709716796875, -1007.1493530273438, 299.8406677246094, -418.0322265625, -445.35260009765625, 51.857879638671875, 427.8101501464844, 708.159423828125, -1208.788330078125, 861.6383666992188, 606.2598876953125, -618.6256713867188, -1281.532958984375, -1063.1572265625, 278.6259460449219, 82.0921401977539, 32.65266418457031, 587.1513061523438, 529.3971557617188, 1.6189498901367188, -666.445556640625, 1148.8494873046875, 1236.593017578125, -374.1380615234375, 832.8739624023438, -331.0767822265625, -9.776710510253906, 61.841087341308594, -63.578887939453125, 660.7100830078125, 432.8426818847656, 1683.3221435546875, -818.8806762695312, -227.02383422851562, 98.36702728271484, -315.6968994140625, -439.07305908203125, 163.0733184814453, 730.99755859375, 284.39837646484375, 939.8582153320312, -622.8057861328125, -333.0620422363281, -656.1270751953125, 170.49940490722656, 118.65443420410156, 670.8967895507812, 336.7237548828125, 1288.5947265625, 573.6690673828125, 194.69345092773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000639.npy"} +{"epoch": 0.9383259911894273, "step": 640, "batch_size": 64, "mean": 554.8192138671875, "std": 775.8626098632812, "min": -935.2141723632812, "p10": -212.07056732177733, "median": 411.3685302734375, "p90": 1418.1853149414064, "max": 2963.619140625, "pos_frac": 0.765625, "sample": [757.0156860351562, 533.6309814453125, -506.5576171875, -171.81053161621094, 137.86212158203125, 607.24072265625, 284.12664794921875, 1369.7447509765625, 1323.7325439453125, 214.16421508789062, 1438.945556640625, 1644.0430908203125, 949.3600463867188, 46.882720947265625, -290.9989929199219, 487.6976013183594, 879.7160034179688, -169.244384765625, -62.62648010253906, 1323.857666015625, -867.697998046875, 953.987548828125, 399.80224609375, 767.6246337890625, -215.62130737304688, -935.2141723632812, 121.59616088867188, 362.12420654296875, 2963.619140625, 900.7900390625, 599.2833251953125, 1007.624267578125, -654.6163940429688, 610.68359375, 2561.228271484375, 325.6861267089844, -171.98678588867188, 249.70484924316406, 82.0634765625, 344.8682861328125, -84.86674499511719, 188.8777618408203, 10.334457397460938, 1226.0816650390625, 1002.5758056640625, 166.97549438476562, 888.4686279296875, 671.6976318359375, -487.5157775878906, 1328.0262451171875, 2194.59814453125, 348.4496154785156, -21.464447021484375, 1072.37841796875, -30.372238159179688, 684.0817260742188, 422.934814453125, 860.2750854492188, 92.66340637207031, 743.6536254882812, 1479.3560791015625, 2465.0654296875, -203.78550720214844, 287.6089782714844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000640.npy"} +{"epoch": 0.9397944199706314, "step": 641, "batch_size": 64, "mean": 367.82965087890625, "std": 749.135986328125, "min": -1224.53173828125, "p10": -550.0671722412109, "median": 374.3223114013672, "p90": 1120.0082763671876, "max": 3079.039794921875, "pos_frac": 0.75, "sample": [165.18048095703125, 371.960693359375, 535.634033203125, 1126.9906005859375, -576.4561767578125, -135.81527709960938, 347.9476318359375, 1521.8037109375, 495.76318359375, 210.6702423095703, -1080.460205078125, 599.931884765625, 35.30958557128906, -425.7651062011719, 3079.039794921875, -573.6497192382812, -495.0412292480469, -462.426025390625, 8.967514038085938, 983.45361328125, -105.25027465820312, 66.1180191040039, 111.52227783203125, 590.4420776367188, 240.24539184570312, 672.447998046875, -1210.8265380859375, 877.1605224609375, 561.7630615234375, 159.9490203857422, 971.74609375, 481.19903564453125, 620.5006103515625, 72.6341552734375, 171.2369384765625, -37.38895034790039, 1297.9334716796875, 307.9871826171875, 801.8715209960938, 658.9553833007812, 128.9901123046875, 16.190309524536133, -73.7380142211914, -681.9588623046875, 280.49957275390625, 813.7985229492188, -1224.53173828125, 1103.7161865234375, 992.2409057617188, 974.1093139648438, 2363.845703125, 813.1953735351562, -1110.907470703125, 736.0506591796875, -21.197242736816406, 376.6839294433594, 624.9957885742188, -357.50189208984375, 589.33154296875, 376.9907531738281, 773.6009521484375, 1235.7564697265625, 591.3789672851562, 1176.2711181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000641.npy"} +{"epoch": 0.9412628487518355, "step": 642, "batch_size": 64, "mean": 445.2503662109375, "std": 907.1710205078125, "min": -1610.4476318359375, "p10": -373.2165893554687, "median": 308.92420959472656, "p90": 1354.170703125001, "max": 4856.80517578125, "pos_frac": 0.734375, "sample": [99.11622619628906, 643.605224609375, 816.7568359375, 622.1434936523438, 267.19122314453125, -1610.4476318359375, 773.597412109375, 290.9181213378906, -68.93099212646484, -589.9583740234375, -105.45990753173828, 100.18148803710938, 1479.45849609375, -389.6158447265625, 841.4033203125, 1020.8838500976562, 451.48419189453125, 729.6207885742188, -841.2777709960938, 45.47780990600586, 227.92526245117188, -135.615478515625, 428.14617919921875, 44.68846130371094, 287.5089111328125, 654.024658203125, -63.807899475097656, 225.52581787109375, 97.43035888671875, 413.6755676269531, 924.961669921875, 607.5187377929688, 790.6767578125, 2424.818115234375, -231.74612426757812, 72.22402954101562, 326.9302978515625, 741.7490234375, -519.64697265625, 219.19894409179688, 843.3709106445312, 163.7967529296875, 446.1053771972656, 1457.8125, -334.95166015625, 1112.33984375, 778.5526123046875, 1801.0777587890625, -192.00868225097656, -78.2752685546875, 4856.80517578125, 955.1101684570312, -675.6651000976562, 447.404052734375, -183.5696258544922, -194.3476104736328, 347.9881591796875, 2476.012451171875, 42.577362060546875, -1018.7236328125, 444.1627197265625, 1680.6024169921875, 230.13311767578125, 977.3790893554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000642.npy"} +{"epoch": 0.9427312775330396, "step": 643, "batch_size": 64, "mean": 342.09228515625, "std": 557.9434814453125, "min": -748.882568359375, "p10": -416.50483093261715, "median": 323.69895935058594, "p90": 1032.1501831054688, "max": 1930.4443359375, "pos_frac": 0.765625, "sample": [358.9494323730469, 283.3996276855469, 165.5831756591797, 848.6129150390625, 811.1127319335938, 666.060791015625, 492.00390625, -70.51171112060547, 323.57867431640625, 912.2095947265625, 15.747045516967773, -681.4571533203125, 294.8492736816406, 257.5123596191406, -201.96888732910156, 975.61181640625, 819.3618774414062, 1066.938720703125, 1048.3897705078125, 503.13848876953125, 167.56373596191406, 40.00288391113281, -317.5326843261719, 234.43106079101562, 61.41286087036133, 149.806884765625, -748.882568359375, 593.406494140625, 501.3657531738281, 993.470947265625, -490.22479248046875, 323.8192443847656, 359.8194580078125, 272.1544189453125, 994.2578125, 1197.99609375, 398.0217590332031, -449.2362060546875, 688.8675537109375, 341.8617858886719, -520.8171997070312, 1849.9503173828125, 1074.882568359375, 330.49224853515625, 661.4161987304688, 111.19074249267578, 2.964170455932617, 388.67108154296875, 1930.4443359375, 53.62907791137695, 918.073974609375, 624.011474609375, 715.142333984375, -280.10333251953125, 1054.18603515625, 41.50941467285156, -391.0775146484375, -427.4022521972656, 379.21240234375, -97.58395385742188, -163.529296875, -151.16201782226562, -569.3695068359375, 157.6702880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000643.npy"} +{"epoch": 0.9441997063142438, "step": 644, "batch_size": 64, "mean": 350.287109375, "std": 726.9475708007812, "min": -1838.5657958984375, "p10": -491.5324554443358, "median": 204.83700561523438, "p90": 1209.5008666992192, "max": 2432.180908203125, "pos_frac": 0.734375, "sample": [2432.180908203125, 33.405784606933594, 841.8756713867188, -1838.5657958984375, 540.79443359375, 360.57940673828125, -713.209716796875, 86.58790588378906, 921.1129760742188, 291.6221923828125, 43.65211486816406, -51.86070251464844, 107.49127960205078, 460.27850341796875, 140.74220275878906, 623.56689453125, 180.33514404296875, -7.7218017578125, 228.10208129882812, -533.6375122070312, 160.72183227539062, 2093.677001953125, 50.95130920410156, -641.76171875, 739.4037475585938, 831.143798828125, -1.9887161254882812, 472.13958740234375, -242.50872802734375, 1553.6884765625, -63.44054412841797, -296.8196105957031, 752.3021240234375, -81.17021179199219, 62.06590270996094, -24.2793025970459, 1328.205810546875, 756.630615234375, 667.298828125, 262.2093505859375, 119.89974212646484, -393.2873229980469, 530.7584228515625, 57.33832550048828, 1098.0107421875, 555.666748046875, -589.8530883789062, -239.13494873046875, 132.0852813720703, 1070.11474609375, 1565.27587890625, 834.5870361328125, 95.63417053222656, -1138.970458984375, 1035.407958984375, 1095.781005859375, 349.6898193359375, 1257.2823486328125, 29.507232666015625, 364.9104309082031, 511.2181701660156, 1946.0255126953125, 181.57192993164062, -576.9471435546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000644.npy"} +{"epoch": 0.9456681350954479, "step": 645, "batch_size": 64, "mean": 174.8188934326172, "std": 707.9917602539062, "min": -1354.2305908203125, "p10": -705.8393005371094, "median": 81.15388870239258, "p90": 1141.5938720703127, "max": 2205.183349609375, "pos_frac": 0.609375, "sample": [430.6788635253906, 1076.14404296875, -690.815673828125, 960.1738891601562, -1354.2305908203125, -1130.966552734375, -707.2890014648438, 20.45111083984375, 421.9070129394531, -167.07899475097656, -88.08851623535156, -42.4742431640625, 52.732826232910156, -7.075992584228516, -89.60134887695312, -451.89630126953125, 412.21343994140625, 297.3450622558594, 920.8081665039062, 1169.643798828125, 666.3878173828125, -431.3079833984375, 48.0008544921875, -2.1213531494140625, 109.574951171875, 23.007854461669922, 1170.683349609375, -26.735939025878906, -778.4502563476562, -1199.544677734375, 190.17054748535156, 432.29791259765625, -623.060791015625, 1448.64599609375, 892.7515258789062, 9.098602294921875, 1877.7052001953125, 841.6080322265625, -1148.1712646484375, -702.4566650390625, 1302.11376953125, -73.62136840820312, 113.43153381347656, 171.17701721191406, 449.8038330078125, 548.5457763671875, 430.69012451171875, 309.104736328125, 229.92950439453125, 569.1828002929688, 40.75628662109375, -286.78594970703125, 1283.0096435546875, 21.492576599121094, 471.0670166015625, 2205.183349609375, -121.4277572631836, 308.01605224609375, -851.0762329101562, -126.9855728149414, 651.68408203125, 153.0039825439453, -67.96226501464844, -372.588623046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000645.npy"} +{"epoch": 0.947136563876652, "step": 646, "batch_size": 64, "mean": 308.0731201171875, "std": 571.8493041992188, "min": -1009.4287719726562, "p10": -388.1632110595703, "median": 219.75778198242188, "p90": 1080.864147949219, "max": 1987.7760009765625, "pos_frac": 0.734375, "sample": [896.3077392578125, 1112.2615966796875, -35.95099639892578, 1388.2607421875, 594.90478515625, 232.7240447998047, 638.3053588867188, 687.7940063476562, 968.7298583984375, 26.354995727539062, 331.5789489746094, 497.71539306640625, -904.99267578125, 0.575164794921875, 1003.4322509765625, 170.4179229736328, -359.8125915527344, 273.6966857910156, 1053.133056640625, -114.68285369873047, 400.78521728515625, -140.37628173828125, -158.4466552734375, 635.0187377929688, 80.45114135742188, 324.9672546386719, 446.82513427734375, 234.99497985839844, 46.5230712890625, 846.1159057617188, 101.10406494140625, 462.6252136230469, 78.72488403320312, 189.38047790527344, -187.00794982910156, 206.79151916503906, 62.08868408203125, 1538.325927734375, -216.14051818847656, -1009.4287719726562, 391.6805419921875, 617.3441162109375, 80.33027648925781, -414.534423828125, -78.78074645996094, -521.1257934570312, -68.23857116699219, 51.510887145996094, -428.7747802734375, 1987.7760009765625, 1092.7489013671875, 240.66217041015625, 604.7332153320312, -91.37055969238281, 1348.5140380859375, -400.3134765625, -493.1168212890625, 65.2467269897461, 192.4127655029297, 172.40597534179688, 1213.622314453125, 964.2013549804688, 515.403076171875, 270.2664489746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000646.npy"} +{"epoch": 0.9486049926578561, "step": 647, "batch_size": 64, "mean": 379.8256530761719, "std": 530.0704956054688, "min": -1085.0350341796875, "p10": -174.6228256225586, "median": 393.8932189941406, "p90": 1078.556689453125, "max": 1888.49072265625, "pos_frac": 0.78125, "sample": [724.6217041015625, 616.2154541015625, 299.9800109863281, 248.53001403808594, 363.9700622558594, 1089.511962890625, 264.8812561035156, 398.8282165527344, 564.8453369140625, 521.8748168945312, 1571.552978515625, 155.75881958007812, 44.41998291015625, 1702.4501953125, 706.6473999023438, 493.5770263671875, -41.80805969238281, 1052.994384765625, 625.68017578125, -155.119140625, 217.03494262695312, 177.74371337890625, -25.70765495300293, -160.3477325439453, 594.65625, 404.1297607421875, -74.56773376464844, 1888.49072265625, -867.6040649414062, 797.5889892578125, -190.656982421875, -204.1304931640625, 83.88137817382812, 735.3792114257812, 997.8887329101562, 754.767333984375, 433.84130859375, 405.22283935546875, -523.5504760742188, 178.71263122558594, 556.088134765625, 51.96160125732422, 1156.7119140625, -180.74072265625, -84.51296997070312, 430.354248046875, 389.9855041503906, 397.8009338378906, -7.912010192871094, 1130.0340576171875, 198.1930694580078, 102.11698150634766, 1090.7926025390625, 200.364990234375, 20.874191284179688, 559.79443359375, -1085.0350341796875, 827.98486328125, -303.0338439941406, 363.65411376953125, 472.0837707519531, 597.5425415039062, 430.7711486816406, 120.78060913085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000647.npy"} +{"epoch": 0.9500734214390602, "step": 648, "batch_size": 64, "mean": 558.360107421875, "std": 771.2487182617188, "min": -1023.7716674804688, "p10": -207.25274963378905, "median": 492.66680908203125, "p90": 1549.1890747070318, "max": 3488.336181640625, "pos_frac": 0.75, "sample": [492.8323059082031, 277.00921630859375, 199.82327270507812, 508.1465759277344, 252.76284790039062, 625.7941284179688, 1927.235595703125, 1602.9366455078125, 706.1400146484375, 1401.3831787109375, -38.403106689453125, -630.4890747070312, -82.23348999023438, 932.7424926757812, 264.4625244140625, -101.13368225097656, 1259.3485107421875, 681.0039672851562, 49.81956481933594, -209.463134765625, -25.380598068237305, 492.5013122558594, -75.00348663330078, 869.2307739257812, -327.30206298828125, 1821.0113525390625, 569.5702514648438, 128.17652893066406, -32.36277770996094, -129.8731689453125, 835.6236572265625, 748.402099609375, -203.41879272460938, 3488.336181640625, 646.1932373046875, 547.693115234375, -1023.7716674804688, 510.5963439941406, 68.35919189453125, -681.69921875, -208.8958740234375, 315.28680419921875, 2237.996826171875, 708.4653930664062, 709.60400390625, 409.31768798828125, 1423.778076171875, -66.71917724609375, 586.0291748046875, 704.136962890625, 1038.7711181640625, 371.1119079589844, 326.3377380371094, 475.46246337890625, 1309.6053466796875, 117.74532318115234, 525.11083984375, 776.904296875, 434.9170227050781, 2274.98046875, 1291.27490234375, 252.0301513671875, -453.8416442871094, 1829.037109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000648.npy"} +{"epoch": 0.9515418502202643, "step": 649, "batch_size": 64, "mean": 272.9219665527344, "std": 667.59814453125, "min": -839.1541748046875, "p10": -473.2643798828125, "median": 214.38704681396484, "p90": 1036.956286621094, "max": 3316.512451171875, "pos_frac": 0.5625, "sample": [455.69683837890625, -16.22490692138672, 394.58544921875, -111.59439086914062, 485.1772766113281, -303.3934020996094, -38.88003158569336, 183.78033447265625, 711.7843017578125, 350.26361083984375, 536.04248046875, -143.17633056640625, -534.2919311523438, 84.54978942871094, -531.3367919921875, -117.15554809570312, -66.92698669433594, 411.6458435058594, -320.9356384277344, 248.717529296875, 685.6400146484375, 160.8287811279297, -470.3209533691406, -186.04090881347656, 925.9537963867188, -752.5446166992188, 664.5419311523438, 907.4436645507812, -81.63786315917969, 607.461181640625, 408.4737548828125, -367.19268798828125, 36.72979736328125, -70.54280853271484, 991.7821044921875, 550.7215576171875, 873.3306884765625, 383.73394775390625, 827.6416625976562, -399.1123046875, 1201.484130859375, 430.42510986328125, -2.8698463439941406, 1076.235595703125, 903.832763671875, -213.76785278320312, 1353.7601318359375, 3316.512451171875, 385.19842529296875, 374.78582763671875, -101.48810577392578, 244.99375915527344, -37.62303161621094, 884.3026733398438, -383.61883544921875, 1206.3184814453125, 1145.326416015625, -474.5258483886719, -16.605609893798828, -103.46265411376953, -835.7725830078125, -839.1541748046875, 1056.316650390625, -478.81610107421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000649.npy"} +{"epoch": 0.9530102790014684, "step": 650, "batch_size": 64, "mean": 423.06866455078125, "std": 719.4315795898438, "min": -1161.5919189453125, "p10": -469.7320648193359, "median": 331.9364013671875, "p90": 1305.7423950195314, "max": 2435.885986328125, "pos_frac": 0.765625, "sample": [171.1776123046875, -599.98828125, 759.9981079101562, 768.9037475585938, 214.93838500976562, 963.6566162109375, 299.7662353515625, -957.9171142578125, 2176.63818359375, 629.9637451171875, 1138.052978515625, -755.6017456054688, 1388.8045654296875, 994.1852416992188, 885.8367919921875, 1252.6142578125, 328.609619140625, 1604.9208984375, 1138.350830078125, 2435.885986328125, 794.5347290039062, -47.03868865966797, 1631.7227783203125, 609.4502563476562, 241.3850860595703, 48.1412467956543, -91.73370361328125, 498.4768981933594, 183.62844848632812, 448.8073425292969, 614.2865600585938, -119.77241516113281, -321.7705078125, -973.61376953125, -451.72174072265625, 258.0656433105469, -27.888408660888672, 1944.89892578125, -112.38151550292969, 17.33477783203125, 510.6651611328125, 244.42727661132812, -477.4507751464844, 597.6663208007812, 502.9544372558594, 607.8717041015625, 80.59254455566406, 910.80810546875, 140.25897216796875, 1328.5115966796875, 302.7646484375, 335.26318359375, 639.230712890625, 942.326416015625, 837.1060180664062, 99.45104217529297, 484.74114990234375, -633.6854858398438, -1161.5919189453125, 241.6143798828125, 758.89306640625, 64.09339141845703, -361.9400329589844, 98.2136001586914], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000650.npy"} +{"epoch": 0.9544787077826725, "step": 651, "batch_size": 64, "mean": 542.5181274414062, "std": 605.9367065429688, "min": -606.4207153320312, "p10": -64.90942382812499, "median": 436.37391662597656, "p90": 1286.8205078125, "max": 2219.025390625, "pos_frac": 0.859375, "sample": [870.2892456054688, 610.0585327148438, 2219.025390625, 100.5228271484375, -310.03729248046875, -14.96063232421875, 405.9976501464844, 610.7030639648438, 20.628829956054688, 152.1917266845703, 1278.4945068359375, 2211.279296875, 323.19122314453125, -159.58331298828125, 64.7142333984375, -77.0405502319336, 47.822269439697266, 527.7447509765625, -300.0570983886719, 819.5970458984375, 1706.59765625, 391.5494384765625, 824.10009765625, 305.35589599609375, 711.0202026367188, 56.27196502685547, -54.14666748046875, -69.52203369140625, 170.512451171875, 736.5078125, 181.8306121826172, 26.233680725097656, 948.2896118164062, 56.0709228515625, 1200.299560546875, 1029.1201171875, 573.66650390625, 73.83438110351562, 702.6505126953125, 4.219890594482422, 417.7236633300781, 1139.5721435546875, -292.072021484375, 549.740234375, 728.8452758789062, 659.4087524414062, 38.273590087890625, 939.3748168945312, 79.92820739746094, 660.4146728515625, 213.39566040039062, 355.7431640625, 1290.3887939453125, 1843.6749267578125, 395.02825927734375, -606.4207153320312, 1129.0487060546875, 455.024169921875, 1754.372802734375, 485.0326232910156, 1069.9437255859375, 1438.186279296875, 883.81982421875, 117.6673583984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000651.npy"} +{"epoch": 0.9559471365638766, "step": 652, "batch_size": 64, "mean": 412.978759765625, "std": 680.460205078125, "min": -1406.5001220703125, "p10": -304.3774322509764, "median": 305.8762664794922, "p90": 1306.0082031250004, "max": 2448.010986328125, "pos_frac": 0.796875, "sample": [356.4085998535156, 129.41744995117188, 793.8242797851562, -1134.901123046875, 562.477294921875, 441.5021057128906, 186.5148162841797, -464.0238342285156, 772.8509521484375, 162.35235595703125, 1333.9093017578125, 1366.1417236328125, 1216.7166748046875, -1406.5001220703125, 73.46929168701172, 555.83447265625, 47.147369384765625, 854.22265625, 1345.4752197265625, -357.1651916503906, 1139.65673828125, 328.4469909667969, 98.89908599853516, -181.20599365234375, 41.78131866455078, 7.9041748046875, 877.3831787109375, -70.13182067871094, 404.9179992675781, 97.60315704345703, 120.04545593261719, 2448.010986328125, -375.3944396972656, 2299.60498046875, 331.9269714355469, 1590.585693359375, 36.53569030761719, 101.211669921875, 7.492006301879883, 31.084075927734375, 459.51409912109375, -16.32038116455078, 206.89346313476562, -100.6105728149414, 982.2008056640625, 902.484130859375, 897.400634765625, -113.92276763916016, 506.76507568359375, 738.0467529296875, 1704.032470703125, 86.22682189941406, 792.3049926757812, 235.34695434570312, -491.8009948730469, 428.2961120605469, 1240.9056396484375, -377.56024169921875, 384.3723449707031, 771.4781494140625, 231.47230529785156, 531.4949340820312, 283.3055419921875, -23.7169189453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000652.npy"} +{"epoch": 0.9574155653450808, "step": 653, "batch_size": 64, "mean": 382.07867431640625, "std": 660.6478271484375, "min": -1091.6036376953125, "p10": -303.43859558105464, "median": 274.14305114746094, "p90": 1258.2618774414066, "max": 2227.124267578125, "pos_frac": 0.703125, "sample": [849.417236328125, -0.534454345703125, -144.9710235595703, 1852.6851806640625, 232.1973419189453, -317.4214172363281, 98.34913635253906, -222.1103515625, 74.1632080078125, 908.2011108398438, 583.0440673828125, -364.8043212890625, -64.12291717529297, 1311.7249755859375, 910.23486328125, 1285.447509765625, 847.851806640625, 1194.8287353515625, 1189.475830078125, -146.10113525390625, 183.63735961914062, 291.2102355957031, 1774.8072509765625, 472.38104248046875, 495.86480712890625, 780.05078125, 1049.5372314453125, 293.7947998046875, -14.326927185058594, -458.0728454589844, 1384.423095703125, 257.07586669921875, 476.47076416015625, -1091.6036376953125, -243.23605346679688, -171.98512268066406, 371.95379638671875, 540.71875, -670.4282836914062, 2127.662353515625, 2227.124267578125, 644.5110473632812, 6.047468185424805, -37.522544860839844, 624.1444091796875, 502.32305908203125, 18.12646484375, -270.81201171875, 240.02651977539062, 247.26947021484375, 522.0567626953125, 129.78932189941406, 729.8440551757812, 202.63406372070312, 476.8368225097656, -795.2595825195312, -347.8915100097656, 370.6474609375, 700.8844604492188, 119.51057434082031, 511.3088073730469, -261.7515869140625, 233.97140502929688, -268.2757568359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000653.npy"} +{"epoch": 0.9588839941262849, "step": 654, "batch_size": 64, "mean": 369.70977783203125, "std": 625.1162719726562, "min": -543.942138671875, "p10": -333.85187072753905, "median": 311.2791290283203, "p90": 1318.5677368164063, "max": 2109.93017578125, "pos_frac": 0.65625, "sample": [51.03374481201172, 341.2115478515625, -212.96844482421875, 156.9312744140625, 433.68707275390625, -129.1029510498047, -329.61907958984375, 324.434326171875, -6.193279266357422, 785.14208984375, 726.3768920898438, -295.1293640136719, 112.17235565185547, 425.4180908203125, -464.12481689453125, -219.800537109375, -34.63213348388672, 2109.93017578125, 949.7902221679688, 561.3599853515625, 573.5173950195312, 1677.7745361328125, 452.92889404296875, 292.06854248046875, 1460.655517578125, 684.3557739257812, -169.3903045654297, 683.3805541992188, 148.46432495117188, -107.34979248046875, -149.5130157470703, 1308.2559814453125, 379.2309265136719, 26.413482666015625, 31.334320068359375, -28.03795623779297, 877.4180297851562, 150.83493041992188, 310.4598388671875, 387.9476013183594, 458.84161376953125, 710.0130004882812, 724.0264892578125, -265.9393615722656, -543.942138671875, 848.0328369140625, 331.2061767578125, 1811.289794921875, 1322.987060546875, -335.6659240722656, -26.28045654296875, 1438.060546875, 606.50244140625, 312.0984191894531, -64.07256317138672, -396.0399169921875, 583.1683959960938, -349.9765930175781, 1876.385986328125, 1307.4195556640625, 11.071868896484375, -366.41497802734375, -405.8221130371094, -202.19046020507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000654.npy"} +{"epoch": 0.960352422907489, "step": 655, "batch_size": 64, "mean": 389.4818115234375, "std": 620.5560913085938, "min": -761.013671875, "p10": -320.4558807373047, "median": 278.47413635253906, "p90": 1182.1852416992188, "max": 2355.885986328125, "pos_frac": 0.703125, "sample": [-348.81500244140625, -761.013671875, 1320.6580810546875, 181.26126098632812, 68.8997573852539, 1190.484130859375, 795.926025390625, 908.6652221679688, 294.9350280761719, -232.17581176757812, 712.2400512695312, 49.413185119628906, -66.17803955078125, -571.601318359375, -10.345098495483398, 1210.12255859375, -148.23873901367188, 867.4305419921875, -315.80853271484375, 1053.78369140625, -332.5748291015625, 429.67303466796875, 932.0966796875, -590.716552734375, -322.4476013183594, 582.173828125, 328.4253845214844, 111.04605102539062, -106.77708435058594, 291.3853759765625, -78.99471282958984, 1602.4349365234375, 162.8456573486328, 255.56016540527344, 639.1842651367188, 887.9697265625, -669.5636596679688, 750.4840698242188, 582.6358642578125, 1002.4669189453125, 411.9326171875, 63.946266174316406, 634.8972778320312, 2355.885986328125, 1604.4859619140625, -78.66206359863281, 339.9150695800781, -292.0514221191406, 1077.4429931640625, 561.3605346679688, -53.79240417480469, 98.7442626953125, 15.75848388671875, 265.5628967285156, 1162.8211669921875, 251.0865936279297, 1607.6900634765625, -203.8376007080078, -55.31257629394531, 543.8107299804688, 671.01611328125, 209.9511260986328, 157.4383087158203, 919.7951049804688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000655.npy"} +{"epoch": 0.9618208516886931, "step": 656, "batch_size": 64, "mean": 647.8305053710938, "std": 707.4137573242188, "min": -507.18017578125, "p10": -106.18420333862304, "median": 435.26983642578125, "p90": 1723.362646484375, "max": 2541.660400390625, "pos_frac": 0.8125, "sample": [1516.2406005859375, 827.1543579101562, -507.18017578125, -43.18181228637695, 107.34906768798828, 281.08770751953125, 315.75653076171875, 589.5590209960938, 252.36936950683594, 813.6773681640625, 830.5458984375, 673.048583984375, 303.7315673828125, 38.505889892578125, 420.3958435058594, 1711.7275390625, -130.74444580078125, 135.69009399414062, 1728.34912109375, 178.74386596679688, 828.1083984375, -11.1671142578125, 2021.0146484375, -158.99209594726562, -113.55361938476562, 257.43292236328125, 958.2779541015625, 1177.98486328125, 485.70343017578125, 1114.936279296875, 378.0454406738281, 2541.660400390625, 697.1574096679688, -108.98728942871094, 2292.1865234375, 882.4197387695312, 450.1438293457031, 962.0711669921875, 2391.875732421875, 74.72421264648438, 2163.158203125, -162.11973571777344, 1407.53857421875, 37.2271728515625, 1086.5301513671875, 970.1409301757812, 375.01373291015625, 2101.378173828125, 314.6510925292969, 233.13613891601562, 755.5441284179688, 148.0972442626953, -192.17404174804688, 370.27679443359375, 948.8444213867188, 250.48391723632812, -75.92884826660156, 266.296630859375, -99.64366912841797, 1068.0491943359375, 937.5478515625, -2.664175033569336, 645.594482421875, 750.3065185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000656.npy"} +{"epoch": 0.9632892804698973, "step": 657, "batch_size": 64, "mean": 516.2921142578125, "std": 687.0513305664062, "min": -1582.7921142578125, "p10": -163.15619506835938, "median": 460.8197937011719, "p90": 1351.7451171875002, "max": 2418.077880859375, "pos_frac": 0.859375, "sample": [1070.251220703125, 30.11829376220703, 800.5647583007812, 465.46197509765625, 1474.6912841796875, 1555.0888671875, 739.1528930664062, 131.7371063232422, 1367.9854736328125, -156.5264892578125, -305.38201904296875, 506.8690185546875, 1313.8509521484375, -193.39378356933594, 221.1538543701172, 456.1776123046875, 61.580841064453125, 384.1393737792969, 99.98259735107422, 1534.6566162109375, 2309.38232421875, 498.39892578125, 75.55968475341797, 2418.077880859375, 1209.698486328125, 1242.5338134765625, 204.0351104736328, 57.85107421875, 684.6860961914062, 638.9312744140625, 818.9259033203125, 450.7420654296875, 33.413177490234375, -1582.7921142578125, 568.3857421875, 470.80621337890625, 261.958251953125, -165.99749755859375, 11.7420654296875, 316.258056640625, 654.6834716796875, -1007.351318359375, -255.30270385742188, 559.8665771484375, 951.4241943359375, 89.29766082763672, 250.79917907714844, 554.6788940429688, 481.8314208984375, 131.00506591796875, 750.56591796875, 720.6746826171875, 207.9438934326172, 802.8489990234375, 389.98834228515625, 359.0825500488281, 1289.6365966796875, 2393.537353515625, 426.8575134277344, -227.57290649414062, 280.5496520996094, -37.16450500488281, 685.5631103515625, 508.49566650390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000657.npy"} +{"epoch": 0.9647577092511013, "step": 658, "batch_size": 64, "mean": 407.352783203125, "std": 607.00048828125, "min": -1120.95361328125, "p10": -375.47870178222655, "median": 394.4300994873047, "p90": 1232.2049926757816, "max": 2101.5654296875, "pos_frac": 0.75, "sample": [743.3314819335938, 731.4295654296875, 745.2391357421875, -340.88507080078125, 392.0473327636719, -197.58709716796875, 1127.5267333984375, 289.3591613769531, -438.2611083984375, 403.4881591796875, 361.9501953125, 1347.114990234375, 92.19529724121094, 213.60507202148438, 315.66912841796875, -570.5867919921875, 338.9518737792969, 388.791015625, 123.75162506103516, -64.67958068847656, 582.8521728515625, 113.45018768310547, 396.8128662109375, 150.36024475097656, -1120.95361328125, 917.2311401367188, -382.1831359863281, 634.4010009765625, 649.2379150390625, -154.20050048828125, -91.52787780761719, 261.90948486328125, -74.18954467773438, 421.26025390625, 280.24615478515625, 1500.5640869140625, -14.811532974243164, 1151.0037841796875, 751.9520263671875, 1127.024658203125, 92.41596984863281, 414.9573669433594, 538.6143188476562, 1375.21142578125, 1262.7015380859375, 577.0928344726562, -503.1775207519531, 575.1574096679688, 840.3411865234375, 813.228271484375, -486.86407470703125, 1387.5457763671875, -359.83502197265625, 2101.5654296875, 735.6296997070312, 1161.04638671875, 596.93798828125, 1447.59814453125, -318.2146301269531, 670.7998657226562, 81.49153137207031, 409.2400207519531, 188.20831298828125, -634.0044555664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000658.npy"} +{"epoch": 0.9662261380323054, "step": 659, "batch_size": 64, "mean": 408.43243408203125, "std": 594.0093383789062, "min": -878.2866821289062, "p10": -221.46066894531245, "median": 357.66741943359375, "p90": 1129.8806884765627, "max": 2592.271728515625, "pos_frac": 0.765625, "sample": [620.7976684570312, -25.946929931640625, 210.68490600585938, 910.015869140625, -150.4269256591797, -244.5683135986328, 1234.19384765625, -386.5322265625, -450.2359924316406, -146.8247833251953, 1034.58203125, 336.18634033203125, 556.8096313476562, -167.54283142089844, 76.08595275878906, 158.41775512695312, 642.5914916992188, 125.44912719726562, 223.4132080078125, 168.80130004882812, 346.7936096191406, 542.9948120117188, -434.4823913574219, 2592.271728515625, -878.2866821289062, 1402.515869140625, 368.5412292480469, 548.21875, 1164.2945556640625, 332.005615234375, 374.1376647949219, 453.4195556640625, 0.4413337707519531, -24.560623168945312, 689.4603271484375, 403.8741455078125, 52.72746276855469, 9.39621353149414, 61.8331298828125, 1160.8880615234375, -483.800048828125, -29.504173278808594, 699.6996459960938, -136.1195831298828, 769.6442260742188, 10.094173431396484, -72.4640121459961, 651.2373657226562, 1700.0499267578125, 1873.9217529296875, 812.4797973632812, 201.49122619628906, 827.5350341796875, 475.4282531738281, 479.5926513671875, 289.19677734375, 959.8314819335938, 1057.5301513671875, 601.62158203125, -301.7391052246094, 774.95703125, 139.70521545410156, 523.2531127929688, 423.599853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000659.npy"} +{"epoch": 0.9676945668135095, "step": 660, "batch_size": 64, "mean": 520.118896484375, "std": 654.8275146484375, "min": -1094.9327392578125, "p10": -240.6016372680664, "median": 475.0273132324219, "p90": 1433.5294311523444, "max": 2357.407470703125, "pos_frac": 0.78125, "sample": [-132.5821075439453, 248.780517578125, 1074.8568115234375, 489.1907043457031, 477.6175537109375, 495.32952880859375, 582.7609252929688, 1502.8660888671875, 1067.638427734375, 880.0176391601562, 351.0035705566406, -373.986083984375, 1213.59326171875, 574.4644165039062, 921.716064453125, 1685.7926025390625, -190.5841522216797, -62.41780090332031, -234.9504852294922, -217.51071166992188, 1063.88671875, 1489.4825439453125, 69.94532012939453, 460.0841979980469, 862.8431396484375, 1963.481201171875, 391.6819763183594, -245.94590759277344, 742.720947265625, 385.25299072265625, 116.0388412475586, 1302.97216796875, -525.3539428710938, 2357.407470703125, 267.4012756347656, 593.9880981445312, 765.7706909179688, 80.46943664550781, 437.0836181640625, 799.6572875976562, 214.51011657714844, 199.4851531982422, 711.9906005859375, 806.7564697265625, -158.27398681640625, 6.093353271484375, 130.5855712890625, 1676.190185546875, 924.1273803710938, 1126.564697265625, -276.7315673828125, -21.136390686035156, -519.0026245117188, 704.0185546875, 1016.0679931640625, 527.3828125, 142.82467651367188, 361.9529113769531, 545.0864868164062, 1848.5162353515625, -243.0235595703125, -1094.9327392578125, 472.43707275390625, 453.65625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000660.npy"} +{"epoch": 0.9691629955947136, "step": 661, "batch_size": 64, "mean": 464.16583251953125, "std": 681.4691772460938, "min": -859.752685546875, "p10": -99.28571548461913, "median": 376.8994903564453, "p90": 1117.6450195312507, "max": 3411.3154296875, "pos_frac": 0.828125, "sample": [119.85639953613281, -100.29508209228516, 864.620849609375, -23.165483474731445, 397.13629150390625, 209.1282196044922, 543.6484985351562, -343.8459167480469, 708.120849609375, 322.75177001953125, 1422.069091796875, 344.5847473144531, 705.2792358398438, 614.65087890625, 3411.3154296875, 905.804931640625, 179.41030883789062, 46.6597900390625, -859.752685546875, 153.80010986328125, -10.726604461669922, 810.0725708007812, 146.16917419433594, 435.84942626953125, 218.33383178710938, 231.3723907470703, 346.32855224609375, 895.009765625, 1195.2440185546875, 56.840728759765625, 318.333251953125, 596.9386596679688, 587.7843017578125, 1278.2598876953125, 517.2638549804688, 266.8426513671875, 69.32586669921875, 356.6626892089844, 218.8627471923828, 520.3369750976562, -565.5216064453125, 397.67578125, -96.93052673339844, 802.0720825195312, 936.5806884765625, -783.478271484375, 796.05859375, 689.4566650390625, 39.48503875732422, 687.3460693359375, 2890.213623046875, -672.7070922851562, 256.94598388671875, 59.78227233886719, 763.3317260742188, 1434.7958984375, 122.30895233154297, 612.1287841796875, -190.69403076171875, 1332.5213623046875, 539.5419921875, 531.1691284179688, 448.77435302734375, -1.1257553100585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000661.npy"} +{"epoch": 0.9706314243759178, "step": 662, "batch_size": 64, "mean": 363.70428466796875, "std": 682.1414184570312, "min": -1302.299072265625, "p10": -437.1643920898437, "median": 385.4128875732422, "p90": 1130.444909667969, "max": 2386.787353515625, "pos_frac": 0.703125, "sample": [-62.707115173339844, 857.476318359375, 1203.516845703125, -1302.299072265625, 616.4332885742188, -483.75616455078125, 454.56744384765625, 856.1552734375, 1839.2828369140625, 17.748573303222656, 855.3123779296875, -403.2332763671875, -1074.3857421875, -181.65675354003906, 1414.9005126953125, 107.96880340576172, -15.524057388305664, 346.8359680175781, 2386.787353515625, 842.0814208984375, 1151.1409912109375, -172.03372192382812, 411.28131103515625, -181.30902099609375, 183.33013916015625, 795.5330810546875, 103.91304016113281, 969.879638671875, 411.4435729980469, 284.19537353515625, 338.98492431640625, -217.41845703125, 371.4609375, -341.2322692871094, 549.7030639648438, 1067.9964599609375, 571.5224609375, 399.3648376464844, 755.2271118164062, 618.2128295898438, 36.310150146484375, 191.1226348876953, 958.013427734375, 1874.94580078125, 784.8905029296875, 1257.2564697265625, -862.8580322265625, 447.2796630859375, 1082.154052734375, 347.17877197265625, -451.706298828125, 50.51992416381836, -92.38139343261719, 570.5944213867188, -570.8621826171875, -236.29153442382812, 595.1181640625, 544.39794921875, 358.4103088378906, 590.215087890625, 636.7416381835938, -783.13525390625, -321.076171875, -76.46498107910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000662.npy"} +{"epoch": 0.9720998531571219, "step": 663, "batch_size": 64, "mean": 439.39404296875, "std": 732.7092895507812, "min": -1619.02587890625, "p10": -191.28532333374017, "median": 286.7314910888672, "p90": 1173.6542846679688, "max": 3198.836181640625, "pos_frac": 0.78125, "sample": [836.607421875, 340.053955078125, 810.2310180664062, 45.95318603515625, 298.5816650390625, 1185.7501220703125, 199.55003356933594, 285.0259094238281, -55.82763671875, 355.5603332519531, 100.17186737060547, 158.0437469482422, 1571.4971923828125, 865.9147338867188, 64.56248474121094, 904.3997192382812, 821.6839599609375, 110.72968292236328, -1619.02587890625, 219.55572509765625, 288.43707275390625, 573.3624877929688, -310.11468505859375, 661.0220947265625, 649.0405883789062, 382.07470703125, 635.7124633789062, 122.37652587890625, 435.944091796875, 3198.836181640625, -296.14105224609375, 1112.8541259765625, 1649.2952880859375, 18.875381469726562, 661.3682250976562, 2212.135986328125, 1614.2991943359375, 81.55516052246094, 23.933992385864258, 1896.3221435546875, 1089.9554443359375, -222.50344848632812, 91.18798828125, 344.30572509765625, -105.9189453125, 589.4887084960938, 1035.8853759765625, 155.27223205566406, 136.0641326904297, 1019.4959716796875, 190.72850036621094, -33.408302307128906, 1145.4306640625, -1092.170654296875, 898.755615234375, -16.686939239501953, -527.2196044921875, -118.44303131103516, 84.83267211914062, 666.7972412109375, -315.13104248046875, -99.6902847290039, -86.00802612304688, 179.989501953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000663.npy"} +{"epoch": 0.973568281938326, "step": 664, "batch_size": 64, "mean": 480.83197021484375, "std": 649.633544921875, "min": -811.4672241210938, "p10": -172.38188934326172, "median": 403.1311950683594, "p90": 1277.259753417969, "max": 2146.79931640625, "pos_frac": 0.765625, "sample": [1023.9490356445312, 1.5998821258544922, 548.104736328125, 688.6640625, 883.324951171875, -19.379249572753906, 327.5232849121094, 1902.0374755859375, -111.5423812866211, 1353.8409423828125, -458.79669189453125, 777.5130004882812, 387.74261474609375, 233.16807556152344, 635.2329711914062, 225.04876708984375, -723.004150390625, 454.95477294921875, -32.652122497558594, 546.13916015625, 1237.4654541015625, 1378.083251953125, 54.997802734375, 1192.3955078125, -173.77017211914062, 131.70889282226562, 1980.8438720703125, -213.5660400390625, 519.7266235351562, -551.9820556640625, 1155.61376953125, 210.47164916992188, 42.57293701171875, 1089.311279296875, 528.9273071289062, 401.4531555175781, -811.4672241210938, 404.8092346191406, 703.873291015625, 1136.7586669921875, 128.23487854003906, 485.1398620605469, 843.0798950195312, -117.0889892578125, 976.837646484375, 2146.79931640625, 937.0335693359375, 215.15969848632812, 2078.2080078125, -112.4578857421875, -154.67733764648438, 1294.314453125, 1086.537353515625, 172.1222381591797, 105.52633666992188, 485.7141418457031, 190.70852661132812, 313.395263671875, -169.14256286621094, -60.324981689453125, 424.7306213378906, 19.668548583984375, 791.9082641601562, -369.8778076171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000664.npy"} +{"epoch": 0.9750367107195301, "step": 665, "batch_size": 64, "mean": 412.32568359375, "std": 530.8889770507812, "min": -717.0221557617188, "p10": -131.58336181640624, "median": 369.7187042236328, "p90": 1089.9761413574226, "max": 1775.973876953125, "pos_frac": 0.796875, "sample": [-129.72061157226562, 14.22610092163086, 458.6234436035156, 281.5840148925781, -132.38168334960938, 518.02197265625, 828.122802734375, -79.279541015625, 1534.1900634765625, 508.199951171875, 294.20074462890625, 524.926025390625, 137.86077880859375, -573.3123168945312, 606.6573486328125, 8.520448684692383, -47.47821044921875, 241.36672973632812, 16.535171508789062, 572.4468383789062, 703.8380737304688, 125.29408264160156, 434.315185546875, 455.1828308105469, -65.69828796386719, 309.4781799316406, -717.0221557617188, 1511.6375732421875, 727.0199584960938, -27.74417495727539, 777.1588134765625, 372.065673828125, 701.551025390625, 367.3717346191406, 1161.0595703125, 247.78485107421875, 1211.6153564453125, 534.342529296875, 417.7817077636719, 247.8815155029297, 221.0011749267578, 1775.973876953125, 579.057373046875, -335.87420654296875, 452.80072021484375, 901.767822265625, 345.40673828125, -511.26641845703125, 552.8804931640625, 924.1148071289062, 97.7355728149414, 205.35899353027344, 918.8804321289062, 210.51290893554688, 1686.255859375, -276.59320068359375, 681.1339111328125, -210.17294311523438, 163.06805419921875, 835.7131958007812, 1714.07666015625, 40.33552551269531, 444.6375732421875, -106.18557739257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000665.npy"} +{"epoch": 0.9765051395007343, "step": 666, "batch_size": 64, "mean": 424.4996643066406, "std": 613.1046752929688, "min": -1076.9796142578125, "p10": -307.04052734375, "median": 323.110595703125, "p90": 1077.39521484375, "max": 2647.434814453125, "pos_frac": 0.78125, "sample": [799.8553466796875, 365.67547607421875, 719.0750122070312, 423.73126220703125, -421.6443786621094, 697.9525756835938, 1061.666748046875, 1008.3748779296875, 154.36819458007812, 955.9268798828125, -34.702659606933594, 245.72463989257812, 363.9931945800781, -481.340576171875, -290.85687255859375, 254.28114318847656, 282.9346008300781, -129.95025634765625, -78.023681640625, -104.0843505859375, 1289.4468994140625, -0.32274627685546875, 815.5665283203125, 562.830078125, 2066.849365234375, 10.876205444335938, 806.8848876953125, 265.1175537109375, -137.41964721679688, -653.6278686523438, 242.54946899414062, 653.219482421875, 294.51495361328125, 1084.135986328125, 904.257568359375, 441.5040283203125, 941.2737426757812, -346.8892517089844, 369.38623046875, 539.7926025390625, 42.5869255065918, 866.695068359375, -313.97637939453125, 1316.0369873046875, 328.8970947265625, 317.3240966796875, -1076.9796142578125, 1014.3768310546875, 363.3397521972656, 109.11184692382812, 389.40582275390625, 687.2120361328125, 1084.9583740234375, 27.179019927978516, 282.314453125, 148.82894897460938, 233.316650390625, 308.84326171875, 2647.434814453125, 222.3636016845703, -425.0522155761719, 926.7694702148438, 294.787353515625, 1429.3013916015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000666.npy"} +{"epoch": 0.9779735682819384, "step": 667, "batch_size": 64, "mean": 493.6817321777344, "std": 643.7807006835938, "min": -719.4481201171875, "p10": -303.0472290039062, "median": 444.4435272216797, "p90": 1231.3046630859378, "max": 2321.8681640625, "pos_frac": 0.78125, "sample": [447.1406555175781, -499.00726318359375, 693.2042236328125, -719.4481201171875, -52.638214111328125, 1016.0032958984375, 969.7071533203125, 540.2616577148438, 494.9554443359375, 1268.33056640625, 223.89149475097656, 98.25717163085938, 105.33185577392578, 400.4385986328125, 335.85205078125, -692.9469604492188, 802.946044921875, 560.2156372070312, 1688.12158203125, 886.7997436523438, 796.212890625, 230.45797729492188, 1144.910888671875, 122.02201080322266, 599.36572265625, 327.32159423828125, 1028.5364990234375, -135.2469482421875, 1015.450927734375, 761.1941528320312, 786.720458984375, 566.7183227539062, 962.9400024414062, 623.0394287109375, 422.1952209472656, -321.6210632324219, 391.02154541015625, 441.74639892578125, 367.5676574707031, -40.15528106689453, -150.93125915527344, 1541.9560546875, 1837.9481201171875, 1.2905502319335938, 729.7284545898438, 1472.02490234375, 690.5293579101562, 243.01461791992188, 158.13525390625, 725.6353759765625, -20.534828186035156, 819.9310302734375, -398.28216552734375, 259.590576171875, 2321.8681640625, 2099.258056640625, 81.3933334350586, 1051.56640625, -598.3892211914062, 319.2522277832031, -689.0977172851562, -259.7082824707031, -52.692169189453125, 754.3280639648438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000667.npy"} +{"epoch": 0.9794419970631424, "step": 668, "batch_size": 64, "mean": 347.7958679199219, "std": 771.0363159179688, "min": -1302.610107421875, "p10": -528.6578308105469, "median": 285.7936096191406, "p90": 1265.0007934570312, "max": 2774.34326171875, "pos_frac": 0.6875, "sample": [481.8768615722656, 274.86724853515625, 1333.7769775390625, -1219.252197265625, 214.74342346191406, 884.5366821289062, 1355.720703125, 178.4342498779297, 1074.82275390625, 463.06890869140625, -605.4319458007812, 670.8151245117188, 120.47100067138672, 77.52035522460938, 391.56610107421875, 1100.6666259765625, 164.2113800048828, -21.151229858398438, -442.3121337890625, -529.6677856445312, -986.568603515625, 989.5248413085938, -475.0069580078125, 827.1597290039062, -160.52578735351562, 655.8869018554688, 385.4344787597656, 468.2151794433594, 36.31379699707031, 758.7865600585938, 326.45843505859375, 640.320556640625, 1241.6923828125, -526.30126953125, 1037.5755615234375, 436.12799072265625, -931.8313598632812, 1074.6790771484375, 85.87085723876953, -191.81210327148438, 250.71417236328125, -55.81806182861328, 1338.36767578125, -109.87615203857422, -600.1593627929688, 1168.3585205078125, -130.60833740234375, -402.16241455078125, 308.4740295410156, -41.93824005126953, 365.6939697265625, 202.60052490234375, 296.719970703125, 195.21810913085938, -165.45077514648438, 1274.9901123046875, 406.7944641113281, 393.08087158203125, 2386.4716796875, -1302.610107421875, -79.45531463623047, 2065.6904296875, 58.2138671875, 2774.34326171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000668.npy"} +{"epoch": 0.9809104258443465, "step": 669, "batch_size": 64, "mean": 498.524169921875, "std": 657.339111328125, "min": -1383.6656494140625, "p10": -212.85424194335934, "median": 415.4113311767578, "p90": 1390.7096801757814, "max": 2472.658935546875, "pos_frac": 0.8125, "sample": [226.11082458496094, 52.55998229980469, -68.3540267944336, 1022.134765625, -859.8917236328125, 383.22979736328125, 664.6256103515625, 755.73486328125, 405.0622863769531, -81.62053680419922, 2472.658935546875, -231.21902465820312, 28.138839721679688, 280.6777648925781, -295.1313171386719, 408.6443786621094, 1575.9197998046875, -87.26073455810547, 104.43134307861328, 190.33689880371094, 549.1357421875, 736.1002197265625, 1575.1302490234375, 1293.7161865234375, 89.29478454589844, 1774.97509765625, 72.0023193359375, 1405.765380859375, 1017.2611083984375, 79.79345703125, 604.588623046875, 1295.0224609375, -170.00308227539062, 623.0535888671875, 1448.3192138671875, -587.1884155273438, -232.62991333007812, 851.490478515625, 745.3282470703125, 832.21630859375, 1355.5797119140625, 320.4652404785156, 148.8193359375, 323.1876525878906, -1383.6656494140625, 792.3921508789062, 924.736328125, 657.640869140625, 423.3935546875, 271.0084228515625, -495.2043762207031, 1011.521728515625, 422.17828369140625, 431.0364685058594, 733.5415649414062, 742.0709838867188, 53.29338836669922, 265.0502624511719, 488.7674865722656, 1230.5694580078125, -15.16872787475586, 1503.00732421875, 380.2542419433594, 370.9103088378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000669.npy"} +{"epoch": 0.9823788546255506, "step": 670, "batch_size": 64, "mean": 497.284912109375, "std": 660.2279663085938, "min": -1287.3046875, "p10": -248.16761932373046, "median": 374.3037872314453, "p90": 1391.0453491210938, "max": 1766.820068359375, "pos_frac": 0.765625, "sample": [227.13406372070312, 486.79241943359375, 246.53421020507812, 1371.2335205078125, 472.2629089355469, -148.33526611328125, -175.50729370117188, -54.81964111328125, 1070.1124267578125, 597.37744140625, 305.66265869140625, -74.15283203125, -1287.3046875, 1766.820068359375, 1176.8060302734375, 1761.549072265625, 714.0218505859375, 348.5641784667969, 1166.440673828125, 664.8121337890625, 123.20668029785156, -513.6094970703125, 1349.8687744140625, -258.259033203125, -568.8501586914062, 376.6502380371094, 1487.954833984375, 34.440921783447266, 1083.5897216796875, -520.138916015625, 1491.6864013671875, 783.5413208007812, -235.3527374267578, 215.28933715820312, 237.34738159179688, 1399.5361328125, 1256.162841796875, -232.5728302001953, 214.93698120117188, 1305.48828125, 384.10931396484375, 1218.97412109375, 866.99853515625, -240.30059814453125, 1704.7908935546875, 899.753173828125, 371.95733642578125, 334.9976806640625, -251.53919982910156, -275.26171875, 46.586639404296875, 1434.908203125, 105.06854248046875, 275.61309814453125, 1360.270263671875, 140.5283966064453, 299.2280578613281, 1020.8779907226562, -3.091796875, 887.1300659179688, 438.1571044921875, 519.5617065429688, 42.2496337890625, 577.7469482421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000670.npy"} +{"epoch": 0.9838472834067548, "step": 671, "batch_size": 64, "mean": 399.7263488769531, "std": 658.7308349609375, "min": -1557.872314453125, "p10": -220.95528717041014, "median": 362.93568420410156, "p90": 1241.62431640625, "max": 1863.3753662109375, "pos_frac": 0.75, "sample": [48.74790954589844, -1557.872314453125, 486.7471618652344, 100.21682739257812, -133.78079223632812, 751.5239868164062, 275.1359558105469, 332.01971435546875, 660.8050537109375, 408.90997314453125, 433.11376953125, -186.19046020507812, 137.5526885986328, 1125.7734375, 989.91943359375, 48.70982360839844, 235.76893615722656, 224.32757568359375, 997.895263671875, 771.4048461914062, 131.45211791992188, 1471.009765625, -1487.426025390625, 1181.09619140625, 814.769287109375, -56.81996154785156, 1817.761474609375, 417.0871276855469, 265.62310791015625, 613.6925659179688, -125.97476196289062, -606.7095947265625, -438.7189025878906, 520.6226196289062, 1318.8095703125, -154.65365600585938, 367.177978515625, 278.8195495605469, 658.8809204101562, 1247.68994140625, 1380.9886474609375, 370.5566711425781, -194.98683166503906, 60.541893005371094, 811.7918701171875, -29.389419555664062, 258.1588134765625, 1015.0296630859375, -322.6619567871094, -90.19913482666016, 713.9024047851562, 443.3728942871094, -82.37091827392578, 255.79339599609375, 1227.47119140625, 933.0338745117188, -232.08462524414062, -650.8709106445312, 214.70143127441406, 1401.327880859375, 388.90869140625, 1863.3753662109375, 1102.48486328125, 358.6933898925781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000671.npy"} +{"epoch": 0.9853157121879589, "step": 672, "batch_size": 64, "mean": 404.5885925292969, "std": 744.872802734375, "min": -1355.709716796875, "p10": -547.9264678955077, "median": 355.21607971191406, "p90": 1294.62890625, "max": 2706.104248046875, "pos_frac": 0.6875, "sample": [422.49285888671875, -46.952239990234375, -351.19439697265625, 186.72601318359375, -187.87294006347656, 152.73744201660156, 735.4442138671875, 402.33160400390625, 1687.180419921875, 258.9657287597656, 92.86125946044922, -116.59229278564453, -402.0025939941406, -274.60760498046875, 539.3702392578125, 247.27432250976562, -151.76919555664062, -664.4442749023438, 460.77313232421875, 1689.8206787109375, 86.3131103515625, 858.2139892578125, -2.8209152221679688, -135.48532104492188, 1279.2711181640625, -1355.709716796875, 1143.58154296875, -937.4879150390625, 487.0029296875, 2706.104248046875, -134.31344604492188, 948.2716674804688, 941.9765625, 1118.92431640625, 1154.5506591796875, 775.200439453125, 578.2020263671875, -610.4652709960938, 214.42044067382812, 2013.605712890625, -876.19580078125, -81.83077239990234, 322.0357666015625, 1265.375732421875, 857.91552734375, 643.5909423828125, 914.4364013671875, 205.5941162109375, 762.6331787109375, 600.8480834960938, 1472.968505859375, 347.0832824707031, -135.23583984375, 713.4417724609375, 75.595703125, 778.000732421875, 1301.2108154296875, 62.06389617919922, -76.48918151855469, -745.7604370117188, 595.4073486328125, 363.348876953125, 1399.1458740234375, -681.4124755859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000672.npy"} +{"epoch": 0.986784140969163, "step": 673, "batch_size": 64, "mean": 387.2952880859375, "std": 636.2482299804688, "min": -1311.568359375, "p10": -314.7584014892578, "median": 389.45518493652344, "p90": 1035.1853454589848, "max": 3110.2158203125, "pos_frac": 0.78125, "sample": [-215.17466735839844, -655.8583984375, 478.5237731933594, 922.2774047851562, 342.96673583984375, 550.1705322265625, 638.2029418945312, 349.16595458984375, 1250.63720703125, 445.9229431152344, -172.4651336669922, 1450.0377197265625, 532.2978515625, 635.099609375, 118.43663024902344, 168.271728515625, -481.448974609375, 836.6311645507812, 1685.75830078125, 333.05804443359375, -325.05816650390625, 352.58935546875, 382.5273742675781, -79.45152282714844, 195.3700408935547, -1311.568359375, 180.88381958007812, 668.1663818359375, -424.5685729980469, 195.66383361816406, 733.4388427734375, 1398.71435546875, 146.5169219970703, -44.05586624145508, 765.3292846679688, 678.1139526367188, 396.38299560546875, -549.8684692382812, 507.98980712890625, 803.458251953125, 756.0598754882812, 44.73700714111328, 177.54580688476562, 706.75146484375, 453.252685546875, 88.87445831298828, 521.6317749023438, -200.42062377929688, 455.1713562011719, 3110.2158203125, 113.83113861083984, 77.67768859863281, 567.4407958984375, 627.7698974609375, -748.8797607421875, 554.552001953125, 527.3956909179688, 1083.574462890625, 119.99237060546875, -290.7256164550781, -59.641014099121094, 610.6984252929688, 1248.26318359375, 358.0431213378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000673.npy"} +{"epoch": 0.9882525697503671, "step": 674, "batch_size": 64, "mean": 493.2326965332031, "std": 663.4459228515625, "min": -587.7816162109375, "p10": -249.51350250244136, "median": 438.3824157714844, "p90": 1107.173815917969, "max": 3108.033447265625, "pos_frac": 0.765625, "sample": [-79.67828369140625, 36.46439743041992, 854.2694702148438, 675.4672241210938, 688.2077026367188, 747.400390625, 32.82945251464844, -105.61116027832031, -496.0224914550781, 1124.6676025390625, 1371.6563720703125, 900.5538330078125, 59.20900344848633, 495.342041015625, -194.2872772216797, 504.134765625, -68.45947265625, 2033.9404296875, 673.725830078125, 3108.033447265625, 556.7169189453125, 487.9278259277344, 42.47617721557617, 1013.0081176757812, 435.69769287109375, -273.181884765625, 1032.3424072265625, 441.067138671875, 547.5869140625, -132.90625, 1066.35498046875, 1994.7037353515625, -125.34927368164062, 198.415283203125, 377.59967041015625, 821.6136474609375, -319.18511962890625, -394.72125244140625, -332.0863952636719, 885.3176879882812, 662.8920288085938, 296.7344055175781, 352.09423828125, 1689.666259765625, 317.2503662109375, 954.838623046875, 841.976806640625, 980.9944458007812, 522.5158081054688, 146.90309143066406, -587.7816162109375, 209.8511962890625, 1842.9703369140625, -308.2030334472656, -21.436182022094727, 659.239990234375, 172.04638671875, 742.3086547851562, 321.1130065917969, 566.332275390625, 163.26199340820312, -84.83808898925781, 159.42337036132812, 281.49554443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000674.npy"} +{"epoch": 0.9897209985315712, "step": 675, "batch_size": 64, "mean": 506.56787109375, "std": 669.2509155273438, "min": -597.33447265625, "p10": -86.0140399932861, "median": 363.5639953613281, "p90": 1395.0937133789064, "max": 2719.42236328125, "pos_frac": 0.78125, "sample": [246.4782257080078, 1405.534423828125, 543.1369018554688, 122.50227355957031, 47.94329071044922, 140.3657684326172, 816.3010864257812, -34.61128234863281, 583.7874145507812, 293.7241516113281, 894.11865234375, -218.15625, 605.30859375, 1111.342529296875, -98.95012664794922, -35.133026123046875, 160.96450805664062, 1321.6351318359375, 483.8099670410156, 489.6487121582031, 774.42236328125, 5.612144470214844, 2265.00830078125, 789.9226684570312, 24.54167938232422, 370.3861389160156, -55.829837799072266, 706.1675415039062, 598.9083251953125, 434.73663330078125, 382.3838806152344, 1491.7998046875, 329.5283203125, 1791.41650390625, -46.728912353515625, 1269.56591796875, 2719.42236328125, -267.44183349609375, 976.5541381835938, 2036.444091796875, 280.1583557128906, 80.22640991210938, -43.249900817871094, 75.10601806640625, 312.84490966796875, -54.507965087890625, -570.7361450195312, 238.59262084960938, 648.4486083984375, 260.8489074707031, 129.68455505371094, 1468.76123046875, 732.2401123046875, -597.33447265625, -543.6295776367188, 400.5364990234375, 108.791015625, 693.157958984375, -25.081146240234375, 1370.7320556640625, 356.7418518066406, 1100.5499267578125, 993.20361328125, -472.3127746582031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000675.npy"} +{"epoch": 0.9911894273127754, "step": 676, "batch_size": 64, "mean": 481.5036315917969, "std": 605.1775512695312, "min": -898.1253662109375, "p10": -150.88591003417966, "median": 410.8320617675781, "p90": 1380.472900390625, "max": 2394.810302734375, "pos_frac": 0.765625, "sample": [2.5683822631835938, 1580.1798095703125, 825.7559204101562, 956.0459594726562, -65.80162048339844, -99.2825698852539, 447.6086730957031, 704.909423828125, 58.75949478149414, -898.1253662109375, 564.2432861328125, 43.2159423828125, -105.54286193847656, 1167.567138671875, 1668.923583984375, 2394.810302734375, -131.2286376953125, -173.7090301513672, -20.635482788085938, -164.68978881835938, 4.019811630249023, 500.4134521484375, 597.5869140625, -65.12149047851562, -159.31045532226562, 791.0096435546875, 99.77851867675781, 1363.089111328125, 349.8870544433594, 1736.08154296875, -48.988372802734375, 684.49560546875, 271.91888427734375, 723.40234375, 104.41455841064453, 794.2451782226562, 1549.5908203125, 429.039794921875, 524.8046875, 1387.923095703125, 757.1760864257812, -45.107017517089844, 147.53294372558594, -365.2386474609375, 1343.8656005859375, 970.7860107421875, 64.03607177734375, 990.4701538085938, 506.911865234375, 392.62432861328125, 803.5693359375, 636.455078125, 251.4493865966797, 371.2049560546875, 68.56900787353516, 242.1409912109375, 869.4136962890625, -216.4552001953125, 1414.349609375, 151.20562744140625, 495.7027587890625, 257.93878173828125, -183.08302307128906, 496.86041259765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000676.npy"} +{"epoch": 0.9926578560939795, "step": 677, "batch_size": 64, "mean": 465.65484619140625, "std": 754.8594360351562, "min": -912.7601318359375, "p10": -182.20745544433592, "median": 344.06785583496094, "p90": 1213.0600463867188, "max": 3902.89013671875, "pos_frac": 0.765625, "sample": [90.94629669189453, 3006.06298828125, 611.3859252929688, 5.885887145996094, 484.2826843261719, -172.51202392578125, -155.8359375, -333.5796813964844, 457.68353271484375, -43.0081787109375, 550.5761108398438, 524.2423095703125, 89.78355407714844, 1205.59033203125, 304.933837890625, 887.7507934570312, 901.6141967773438, 31.832839965820312, 299.02215576171875, 174.1401824951172, 1227.5908203125, 312.6849060058594, 1093.77587890625, -175.8273468017578, 354.21820068359375, 1309.501708984375, 108.88160705566406, 496.0964050292969, 368.668701171875, 1144.1217041015625, 392.0023193359375, -82.21151733398438, -184.94178771972656, 246.06695556640625, 208.54229736328125, -662.6420288085938, 902.21142578125, 302.2952880859375, 3902.89013671875, 171.06365966796875, 204.96511840820312, 512.139404296875, 975.8942260742188, 787.8831176757812, -144.34829711914062, 1216.2613525390625, 405.88665771484375, 303.6084899902344, 756.93701171875, 189.35292053222656, 1351.8641357421875, -10.130359649658203, 794.94921875, 333.9175109863281, 538.0828857421875, -431.2708435058594, -912.7601318359375, 1748.885498046875, 547.6646118164062, 687.174560546875, 727.7330322265625, -288.91668701171875, -780.9130249023438, -68.73739624023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000677.npy"} +{"epoch": 0.9941262848751835, "step": 678, "batch_size": 64, "mean": 544.760009765625, "std": 748.979736328125, "min": -1823.683349609375, "p10": -90.13137969970703, "median": 450.89044189453125, "p90": 1213.4886718750001, "max": 2753.530029296875, "pos_frac": 0.796875, "sample": [271.0369567871094, 941.7806396484375, 558.3042602539062, 631.49951171875, 502.446533203125, -81.44149780273438, 556.0328979492188, -1823.683349609375, 90.58978271484375, -274.5692443847656, -87.22425842285156, 1239.3914794921875, 72.57579040527344, 1176.9449462890625, -72.1822738647461, 555.18408203125, 851.4332885742188, -364.87469482421875, 909.2156372070312, 845.2969360351562, 984.6689453125, 1080.865478515625, 14.767715454101562, 2389.836181640625, 1166.668701171875, 2753.530029296875, 87.37919616699219, 261.536376953125, -13.490795135498047, 1153.819580078125, -488.6672058105469, 193.01559448242188, 275.59344482421875, 768.1983032226562, 274.38055419921875, 1016.0059814453125, 215.01229858398438, 1162.9041748046875, 262.27496337890625, 926.7415161132812, -201.70127868652344, 910.3773803710938, 1229.1502685546875, 1731.79638671875, 354.67388916015625, 267.73931884765625, 2390.031982421875, 2344.1806640625, 80.59135437011719, 574.2049560546875, 279.32952880859375, -595.863037109375, 399.3343505859375, -2.4058074951171875, 43.075340270996094, 68.28700256347656, 602.8865966796875, 1108.7635498046875, 626.227783203125, 520.2271728515625, 390.0557556152344, -91.37728881835938, 938.382080078125, -86.12386322021484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000678.npy"} +{"epoch": 0.9955947136563876, "step": 679, "batch_size": 64, "mean": 384.6784362792969, "std": 626.41162109375, "min": -769.24951171875, "p10": -290.1199600219726, "median": 286.2602844238281, "p90": 1356.7357543945316, "max": 2123.605224609375, "pos_frac": 0.734375, "sample": [585.2535400390625, 61.63013458251953, 1271.2421875, 2050.54345703125, 471.1752624511719, 706.6802368164062, 780.70556640625, 417.8222961425781, 1393.3758544921875, 341.3777770996094, 430.55426025390625, 472.527587890625, 436.9147644042969, 168.52134704589844, 1039.876953125, -411.3751525878906, -152.53890991210938, 85.10653686523438, 212.94732666015625, 939.312744140625, 630.9860229492188, 1567.1015625, -223.10939025878906, 116.69546508789062, 136.13880920410156, -318.8387756347656, 359.05902099609375, 1577.563720703125, -473.8596496582031, 529.0020751953125, 793.861328125, 64.7109375, -69.06645202636719, 36.71674346923828, 987.367431640625, 367.11474609375, 1593.173583984375, 145.7448272705078, -188.87451171875, 72.8975601196289, 298.3038635253906, -55.40821838378906, 405.1810607910156, -32.149261474609375, -363.7513122558594, 1499.6727294921875, 274.2167053222656, -193.63290405273438, -769.24951171875, -160.27212524414062, 957.552734375, -671.395751953125, 538.0047607421875, -192.8158416748047, 530.83935546875, 259.3592224121094, -59.27114486694336, 200.81607055664062, 757.6627197265625, 2123.605224609375, 207.15139770507812, 648.8533935546875, -700.2548217773438, 110.36225128173828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000679.npy"} +{"epoch": 0.9970631424375918, "step": 680, "batch_size": 64, "mean": 513.3322143554688, "std": 767.4342041015625, "min": -2263.132080078125, "p10": -232.54038391113272, "median": 392.4099578857422, "p90": 1549.5302856445314, "max": 2415.0927734375, "pos_frac": 0.8125, "sample": [1187.626220703125, 490.5404968261719, 395.68487548828125, 958.501708984375, 148.9308319091797, 1152.1971435546875, 1824.7354736328125, 589.8768920898438, 199.27359008789062, 389.1350402832031, 544.9705810546875, 188.0784454345703, 807.7135009765625, -397.6634216308594, 1086.1248779296875, 1474.296630859375, 741.6050415039062, -644.0548095703125, -277.28466796875, 880.4332275390625, 1190.158447265625, 225.82196044921875, -118.56452941894531, 343.6864318847656, -279.6149597167969, 980.117919921875, 831.0507202148438, 172.94248962402344, 635.2283935546875, 1564.355712890625, 196.89657592773438, 45.32306671142578, 195.03012084960938, 1762.0357666015625, 1105.709228515625, 157.46568298339844, 176.5796356201172, 531.765625, 2055.609375, -780.8399047851562, 134.5504150390625, 247.57388305664062, 462.23809814453125, 580.0997314453125, 153.30029296875, -2263.132080078125, 1913.6817626953125, -108.38105010986328, -569.6212768554688, 1514.9376220703125, 707.197509765625, 2415.0927734375, 21.011962890625, 5.0981903076171875, 146.06161499023438, 1487.341552734375, 733.0565185546875, 1660.4256591796875, -37.10153579711914, 136.78070068359375, -128.13705444335938, 227.95486450195312, -19.458770751953125, 701.2124633789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000680.npy"} +{"epoch": 0.9985315712187959, "step": 681, "batch_size": 64, "mean": 398.695068359375, "std": 649.7728271484375, "min": -898.1030883789062, "p10": -392.05385131835936, "median": 396.9673767089844, "p90": 1172.8136108398442, "max": 2215.88330078125, "pos_frac": 0.6875, "sample": [791.093994140625, -383.638427734375, 514.31494140625, 176.34783935546875, 892.2001953125, -898.1030883789062, 186.41146850585938, 412.2407531738281, -452.7953186035156, 266.3453369140625, -254.65185546875, 381.6940002441406, -463.6009521484375, -395.66046142578125, 1216.748779296875, 1640.306884765625, 463.027587890625, -56.26079559326172, -270.19384765625, 1032.575439453125, -225.40625, -10.202140808105469, 854.6502685546875, 479.0786437988281, 1782.391357421875, 714.0509643554688, 208.67581176757812, 992.03564453125, 957.1810302734375, 594.6284790039062, 109.08747100830078, 495.4100341796875, 214.9635009765625, -88.22732543945312, -171.4639892578125, 211.6799774169922, 202.3099822998047, -623.967041015625, 494.3758239746094, -126.43333435058594, -349.66693115234375, 2215.88330078125, -759.2943115234375, 904.7896118164062, -97.79521942138672, 523.6024169921875, -468.5655822753906, 205.7639923095703, 39.94679260253906, -264.67156982421875, 999.0328979492188, -298.0276184082031, 888.3651733398438, 581.992431640625, 1366.73828125, 502.58673095703125, 1051.8670654296875, 1070.2982177734375, 1048.85693359375, 692.8165283203125, 144.38072204589844, 1373.112060546875, 1405.905517578125, 875.3475341796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000681.npy"} diff --git a/margin_logs/step_0000001.npy b/margin_logs/step_0000001.npy new file mode 100644 index 0000000..248c095 --- /dev/null +++ b/margin_logs/step_0000001.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb7ed5e9b5d6de6c4e509dd17cf5d9c91337fabd0c174e116c5e60872823ad93 +size 384 diff --git a/margin_logs/step_0000002.npy b/margin_logs/step_0000002.npy new file mode 100644 index 0000000..984e4c2 --- /dev/null +++ b/margin_logs/step_0000002.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc23171824afa57340cda53f69d83aef67c7c0b95175e9ec4a3a7bc3c221bc4f +size 384 diff --git a/margin_logs/step_0000003.npy b/margin_logs/step_0000003.npy new file mode 100644 index 0000000..018093e --- /dev/null +++ b/margin_logs/step_0000003.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef9fba6337a44bc530377336d3f3a7b1c18ba2344d0b825de24c378235189a9e +size 384 diff --git a/margin_logs/step_0000004.npy b/margin_logs/step_0000004.npy new file mode 100644 index 0000000..5e4aea2 --- /dev/null +++ b/margin_logs/step_0000004.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f58f909c6d7042bfeef50ff30480bfe6440a4d8606bfef76d98727242990fac +size 384 diff --git a/margin_logs/step_0000005.npy b/margin_logs/step_0000005.npy new file mode 100644 index 0000000..1b9b592 --- /dev/null +++ b/margin_logs/step_0000005.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3d132f2cfb375ad7363c4337963aba216f000d5442bcef686d06a58bfe1d2cb +size 384 diff --git a/margin_logs/step_0000006.npy b/margin_logs/step_0000006.npy new file mode 100644 index 0000000..3727ff1 --- /dev/null +++ b/margin_logs/step_0000006.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52f2b7c6e42e54c55fec6fac4d4c5021fd7bf385906f2848ef1b4cb5ea4ca024 +size 384 diff --git a/margin_logs/step_0000007.npy b/margin_logs/step_0000007.npy new file mode 100644 index 0000000..3530b84 --- /dev/null +++ b/margin_logs/step_0000007.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6ce95263774ccca4765825c0c1210107b13dc26b721aeeb6fa8c110128f3270 +size 384 diff --git a/margin_logs/step_0000008.npy b/margin_logs/step_0000008.npy new file mode 100644 index 0000000..6775833 --- /dev/null +++ b/margin_logs/step_0000008.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61d4aef506176d6cd9a50254d05b75b7969a64b60ce80db5efc8a27bb13a8a1b +size 384 diff --git a/margin_logs/step_0000009.npy b/margin_logs/step_0000009.npy new file mode 100644 index 0000000..78e1ce8 --- /dev/null +++ b/margin_logs/step_0000009.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935d35a8bec624c65cc5ac00b4e80d214d38d879d8c5e30c627b86c314afc35b +size 384 diff --git a/margin_logs/step_0000010.npy b/margin_logs/step_0000010.npy new file mode 100644 index 0000000..42dd9da --- /dev/null +++ b/margin_logs/step_0000010.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e25766c3b5d8aa1f00e29dbdd190430b009186f35c9ca8f0cf730af1e9107ff4 +size 384 diff --git a/margin_logs/step_0000011.npy b/margin_logs/step_0000011.npy new file mode 100644 index 0000000..8ee1cd6 --- /dev/null +++ b/margin_logs/step_0000011.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f2add72b56b4366ff6c34072c06568cca67d04ff435993995515864841d53c1 +size 384 diff --git a/margin_logs/step_0000012.npy b/margin_logs/step_0000012.npy new file mode 100644 index 0000000..86476da --- /dev/null +++ b/margin_logs/step_0000012.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcaae42d038f49b165c1aec3c3b931354a29d359466a30db5324c25079fd9883 +size 384 diff --git a/margin_logs/step_0000013.npy b/margin_logs/step_0000013.npy new file mode 100644 index 0000000..aa4e9b6 --- /dev/null +++ b/margin_logs/step_0000013.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ceec6cd0d93c64610e80eb9b4ef2d728fb90548fd8d20ee18f2acb185c97fcd +size 384 diff --git a/margin_logs/step_0000014.npy b/margin_logs/step_0000014.npy new file mode 100644 index 0000000..89e3faf --- /dev/null +++ b/margin_logs/step_0000014.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcb56f28c4ca7dbfa49f285b467f14bec912f568fd7baa2bf9bb14234fc8166c +size 384 diff --git a/margin_logs/step_0000015.npy b/margin_logs/step_0000015.npy new file mode 100644 index 0000000..dfc5834 --- /dev/null +++ b/margin_logs/step_0000015.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7436e70b4005f569b618569c0809eaeb620415bbcc3768c13e60055e180bb48f +size 384 diff --git a/margin_logs/step_0000016.npy b/margin_logs/step_0000016.npy new file mode 100644 index 0000000..e6b7f13 --- /dev/null +++ b/margin_logs/step_0000016.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:581b82c313787c63242098cb9dda955def33c19b46f22a302dc794b50fdce027 +size 384 diff --git a/margin_logs/step_0000017.npy b/margin_logs/step_0000017.npy new file mode 100644 index 0000000..6947700 --- /dev/null +++ b/margin_logs/step_0000017.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b40d6c93d746b5512f4cc522be884eb36bbb403e450b3a003dcac2f47c1a767 +size 384 diff --git a/margin_logs/step_0000018.npy b/margin_logs/step_0000018.npy new file mode 100644 index 0000000..6c9efa8 --- /dev/null +++ b/margin_logs/step_0000018.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43579a251c90dba6d5d8e319af66c8a9c6e92224f55ab7e5e663c350f91d20ab +size 384 diff --git a/margin_logs/step_0000019.npy b/margin_logs/step_0000019.npy new file mode 100644 index 0000000..a54c3de --- /dev/null +++ b/margin_logs/step_0000019.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6704adac9fc9006944d8a23ed22a7575ab09ec0da315a95e3bd2f67da4e5667c +size 384 diff --git a/margin_logs/step_0000020.npy b/margin_logs/step_0000020.npy new file mode 100644 index 0000000..f688f6d --- /dev/null +++ b/margin_logs/step_0000020.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25663e631359598337bc3bdec98a38f08419265c49665e63b6439873241f7bd0 +size 384 diff --git a/margin_logs/step_0000021.npy b/margin_logs/step_0000021.npy new file mode 100644 index 0000000..0098228 --- /dev/null +++ b/margin_logs/step_0000021.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0e6c0c8a924f95cb96290eb94f7f06246761f443f1531d1cd9e3da8038f853d +size 384 diff --git a/margin_logs/step_0000022.npy b/margin_logs/step_0000022.npy new file mode 100644 index 0000000..eb2cafc --- /dev/null +++ b/margin_logs/step_0000022.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cd7002b7a43d3daa5b3d5ddbba9a5f6b85a130cea88a10bb7b71f264b8daef1 +size 384 diff --git a/margin_logs/step_0000023.npy b/margin_logs/step_0000023.npy new file mode 100644 index 0000000..4c11afa --- /dev/null +++ b/margin_logs/step_0000023.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a961eae4dc8209af06886717537c0d951d700d2af35bf9b2ab9207878fb7b159 +size 384 diff --git a/margin_logs/step_0000024.npy b/margin_logs/step_0000024.npy new file mode 100644 index 0000000..902641c --- /dev/null +++ b/margin_logs/step_0000024.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc6f8091fb2f0bf20bfeac737fc7f10ca45531123fe5cc776c7e6be1df191836 +size 384 diff --git a/margin_logs/step_0000025.npy b/margin_logs/step_0000025.npy new file mode 100644 index 0000000..3bc2cd8 --- /dev/null +++ b/margin_logs/step_0000025.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd4d75b21a0697e91897a8d36b3c2a5888fd4edd2ef64a1ecf478a603855f4a9 +size 384 diff --git a/margin_logs/step_0000026.npy b/margin_logs/step_0000026.npy new file mode 100644 index 0000000..453c19c --- /dev/null +++ b/margin_logs/step_0000026.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a09b979ab01a090cce1281a4336aabf000f7877d1190c82f71b9e296507e54e +size 384 diff --git a/margin_logs/step_0000027.npy b/margin_logs/step_0000027.npy new file mode 100644 index 0000000..6247531 --- /dev/null +++ b/margin_logs/step_0000027.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41b2ce42cdf4bf89214c9152f9c9a5ee87f8486c25a427906adf722212e48e68 +size 384 diff --git a/margin_logs/step_0000028.npy b/margin_logs/step_0000028.npy new file mode 100644 index 0000000..8d9ae2a --- /dev/null +++ b/margin_logs/step_0000028.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d141d867e5375398959cfa5296c0b98a2c4c46c5c011ba7733a2de2f988df410 +size 384 diff --git a/margin_logs/step_0000029.npy b/margin_logs/step_0000029.npy new file mode 100644 index 0000000..cbef07a --- /dev/null +++ b/margin_logs/step_0000029.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:559825bfc21493e8460e285029820e2842222d175c59e2773cedb62a8b7eca24 +size 384 diff --git a/margin_logs/step_0000030.npy b/margin_logs/step_0000030.npy new file mode 100644 index 0000000..6035288 --- /dev/null +++ b/margin_logs/step_0000030.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:724e5ecd7767d272c3ab60eb60359950dd7cd6c7d552547c45e93aad660645f2 +size 384 diff --git a/margin_logs/step_0000031.npy b/margin_logs/step_0000031.npy new file mode 100644 index 0000000..a4a4a07 --- /dev/null +++ b/margin_logs/step_0000031.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50c7654fda1a38acbfbcf7dfe9f0c3406ee8bdcd25cb1e76ff030653fadf6a77 +size 384 diff --git a/margin_logs/step_0000032.npy b/margin_logs/step_0000032.npy new file mode 100644 index 0000000..a252c17 --- /dev/null +++ b/margin_logs/step_0000032.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df4f7736613b1836e8f41e45eadee86ce73975b22c9b6659f1db0c142fba1a7c +size 384 diff --git a/margin_logs/step_0000033.npy b/margin_logs/step_0000033.npy new file mode 100644 index 0000000..46c1cb4 --- /dev/null +++ b/margin_logs/step_0000033.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25c6950a6bb9d8289bbd115d602cb5bebec59e8c6ddd4d814aedd9eb80872bb2 +size 384 diff --git a/margin_logs/step_0000034.npy b/margin_logs/step_0000034.npy new file mode 100644 index 0000000..cd1f08c --- /dev/null +++ b/margin_logs/step_0000034.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e8de61efbd6d6d6a289eb34b96de8598a2fb86347747e3d111f31759dcf01d +size 384 diff --git a/margin_logs/step_0000035.npy b/margin_logs/step_0000035.npy new file mode 100644 index 0000000..b7163f7 --- /dev/null +++ b/margin_logs/step_0000035.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c087630a8db942a7972bcaaf1660979dc4a9a0787e44d9a56ccc1dc7f6e11150 +size 384 diff --git a/margin_logs/step_0000036.npy b/margin_logs/step_0000036.npy new file mode 100644 index 0000000..ec9ec83 --- /dev/null +++ b/margin_logs/step_0000036.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dba428671372838a925fc0606d920e891c97ca9761fd757f907064c4b310ec1 +size 384 diff --git a/margin_logs/step_0000037.npy b/margin_logs/step_0000037.npy new file mode 100644 index 0000000..dce2477 --- /dev/null +++ b/margin_logs/step_0000037.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b8923775dbc15704c8715b7b5499ef7775a0ea46e64c96b973aa5d66d2abc11 +size 384 diff --git a/margin_logs/step_0000038.npy b/margin_logs/step_0000038.npy new file mode 100644 index 0000000..806b044 --- /dev/null +++ b/margin_logs/step_0000038.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1f2d9b5b07220dfb92cb0da3576062fbd0aeb72c4c6cf27c890cd601f05ebf5 +size 384 diff --git a/margin_logs/step_0000039.npy b/margin_logs/step_0000039.npy new file mode 100644 index 0000000..c376b3b --- /dev/null +++ b/margin_logs/step_0000039.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0095086339959a25ed89ae837bc3349f9edf071a08c0857ee45808bdd0172e2 +size 384 diff --git a/margin_logs/step_0000040.npy b/margin_logs/step_0000040.npy new file mode 100644 index 0000000..fda3f8c --- /dev/null +++ b/margin_logs/step_0000040.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ce08e49255e2f141fb1625e702d3213cb18410a8f29b500c4df9177013f0153 +size 384 diff --git a/margin_logs/step_0000041.npy b/margin_logs/step_0000041.npy new file mode 100644 index 0000000..5a8fad4 --- /dev/null +++ b/margin_logs/step_0000041.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7939b3e3472549f4ccdb0cebe73b7f0593d76c0c4a5b3f3bbe3f827719a0d35 +size 384 diff --git a/margin_logs/step_0000042.npy b/margin_logs/step_0000042.npy new file mode 100644 index 0000000..964de34 --- /dev/null +++ b/margin_logs/step_0000042.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c27cdf15a7bbafe5fd805f083e53c03fe133daee4b1e9edb2b0c90720d664622 +size 384 diff --git a/margin_logs/step_0000043.npy b/margin_logs/step_0000043.npy new file mode 100644 index 0000000..c8b0ed7 --- /dev/null +++ b/margin_logs/step_0000043.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82c940ba18682dd58345d1b9bb80c22e63b0b5cdc2a06450047f8c14c055aa60 +size 384 diff --git a/margin_logs/step_0000044.npy b/margin_logs/step_0000044.npy new file mode 100644 index 0000000..5e38acf --- /dev/null +++ b/margin_logs/step_0000044.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1560bc1b328b20b59ec46c1b6518ff557af034c73e22be5317d7b1f4152f9533 +size 384 diff --git a/margin_logs/step_0000045.npy b/margin_logs/step_0000045.npy new file mode 100644 index 0000000..b98c76f --- /dev/null +++ b/margin_logs/step_0000045.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff46c441e4ecb022d6c54acae1868ab8ef1e1be74fe8f01b388d5c97dcd7f089 +size 384 diff --git a/margin_logs/step_0000046.npy b/margin_logs/step_0000046.npy new file mode 100644 index 0000000..4c77921 --- /dev/null +++ b/margin_logs/step_0000046.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23586d2326a79917c2d5c0fa01e8e861ed3b23640177817657f99db327cbf85b +size 384 diff --git a/margin_logs/step_0000047.npy b/margin_logs/step_0000047.npy new file mode 100644 index 0000000..a5213d2 --- /dev/null +++ b/margin_logs/step_0000047.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d18cbfa8e56622dcef7c44b37ef476ba4283510a3d08df14a8013ff16e09767c +size 384 diff --git a/margin_logs/step_0000048.npy b/margin_logs/step_0000048.npy new file mode 100644 index 0000000..9ff2419 --- /dev/null +++ b/margin_logs/step_0000048.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5fdd5336f2718e7e7f4290772808e1bbd065a8f5cd4a55dd698b682acb7547d +size 384 diff --git a/margin_logs/step_0000049.npy b/margin_logs/step_0000049.npy new file mode 100644 index 0000000..ec04954 --- /dev/null +++ b/margin_logs/step_0000049.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d389a7dd706fa628f585d5c6727f8e72fc1cb46815b2768e7e8628fcd36a80d +size 384 diff --git a/margin_logs/step_0000050.npy b/margin_logs/step_0000050.npy new file mode 100644 index 0000000..5aaee6b --- /dev/null +++ b/margin_logs/step_0000050.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38985c5f22c211939acc59c838ea4d00fb1c6de3b3206a5d14f0025345d7b20e +size 384 diff --git a/margin_logs/step_0000051.npy b/margin_logs/step_0000051.npy new file mode 100644 index 0000000..650ba71 --- /dev/null +++ b/margin_logs/step_0000051.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09b43e818d28fdf018cbab25d2081263ae3961a4706ac3fc5dd0d3c63e805425 +size 384 diff --git a/margin_logs/step_0000052.npy b/margin_logs/step_0000052.npy new file mode 100644 index 0000000..4e2cb7f --- /dev/null +++ b/margin_logs/step_0000052.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbe7385b508cb66b1a1b195568321921b3a903aa7a487cf75106ee358fc9a3dd +size 384 diff --git a/margin_logs/step_0000053.npy b/margin_logs/step_0000053.npy new file mode 100644 index 0000000..9d2315c --- /dev/null +++ b/margin_logs/step_0000053.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52ee7bed64438e80df63e70be09c9fd75ab6f05834129e4af304c3cca0f74d3f +size 384 diff --git a/margin_logs/step_0000054.npy b/margin_logs/step_0000054.npy new file mode 100644 index 0000000..6b80f71 --- /dev/null +++ b/margin_logs/step_0000054.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d192a8f37fcf80933876fce9daa05611c27241ccfb90d0c51918d2800bdab683 +size 384 diff --git a/margin_logs/step_0000055.npy b/margin_logs/step_0000055.npy new file mode 100644 index 0000000..2ef9093 --- /dev/null +++ b/margin_logs/step_0000055.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ad925a821805ef7a095d04f3f08709e40f97b0e71ff09d707401f397a137a3d +size 384 diff --git a/margin_logs/step_0000056.npy b/margin_logs/step_0000056.npy new file mode 100644 index 0000000..fd9d347 --- /dev/null +++ b/margin_logs/step_0000056.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cbae878fdab71af87cbb60bf09987183f95250a4efe717bd5e35eac9a80e7d8 +size 384 diff --git a/margin_logs/step_0000057.npy b/margin_logs/step_0000057.npy new file mode 100644 index 0000000..a07b571 --- /dev/null +++ b/margin_logs/step_0000057.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e83510ed5fed4951db0d8408aea88630aabeb685280badb8bdaee5d0beebbaf +size 384 diff --git a/margin_logs/step_0000058.npy b/margin_logs/step_0000058.npy new file mode 100644 index 0000000..0630b47 --- /dev/null +++ b/margin_logs/step_0000058.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1823ac5aae9ce45440d9ea4b6888e4e0aa9ece5328ea0be5b9d571741fb18f7 +size 384 diff --git a/margin_logs/step_0000059.npy b/margin_logs/step_0000059.npy new file mode 100644 index 0000000..efb862a --- /dev/null +++ b/margin_logs/step_0000059.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e83f6037338a135fdfd762b9591509e8089a3919cc7f203d736ab889293707b5 +size 384 diff --git a/margin_logs/step_0000060.npy b/margin_logs/step_0000060.npy new file mode 100644 index 0000000..d9b3230 --- /dev/null +++ b/margin_logs/step_0000060.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb5e5bf05d4cdda386af2dde17bb0f1a3ed9ecc856d0be454d3ac6dc4bdbb92b +size 384 diff --git a/margin_logs/step_0000061.npy b/margin_logs/step_0000061.npy new file mode 100644 index 0000000..4a05606 --- /dev/null +++ b/margin_logs/step_0000061.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4753a0fc117809720b940883da5570354851e1470b8946f7999c646e80c3f876 +size 384 diff --git a/margin_logs/step_0000062.npy b/margin_logs/step_0000062.npy new file mode 100644 index 0000000..f972cdf --- /dev/null +++ b/margin_logs/step_0000062.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f513d6abc36b523e7676dc491fe749742efc6f573f58e255b812030e17230a73 +size 384 diff --git a/margin_logs/step_0000063.npy b/margin_logs/step_0000063.npy new file mode 100644 index 0000000..68aa3ea --- /dev/null +++ b/margin_logs/step_0000063.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd5015e2cf72a396759bc912b8036d5601eb1e2ffc4cf540ad81368addc423e +size 384 diff --git a/margin_logs/step_0000064.npy b/margin_logs/step_0000064.npy new file mode 100644 index 0000000..097c8f4 --- /dev/null +++ b/margin_logs/step_0000064.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6d890429955117a9e8d2ee7d3c2d7513623807f81e1aa7a6a1685b7c318d9f0 +size 384 diff --git a/margin_logs/step_0000065.npy b/margin_logs/step_0000065.npy new file mode 100644 index 0000000..1648bd7 --- /dev/null +++ b/margin_logs/step_0000065.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76043ab36ade8d78e90be78370993ce6e42d1e491674ee81dcab9aba21867e30 +size 384 diff --git a/margin_logs/step_0000066.npy b/margin_logs/step_0000066.npy new file mode 100644 index 0000000..2222855 --- /dev/null +++ b/margin_logs/step_0000066.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f80bd0623f8f5b4c4e71162c50ce501c3921260f999453597663facd255fd4a3 +size 384 diff --git a/margin_logs/step_0000067.npy b/margin_logs/step_0000067.npy new file mode 100644 index 0000000..1f84780 --- /dev/null +++ b/margin_logs/step_0000067.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d477cc99352ef488628b6fd779aba8a4d8df18ca966346813bddd62876e5e3 +size 384 diff --git a/margin_logs/step_0000068.npy b/margin_logs/step_0000068.npy new file mode 100644 index 0000000..c8adecb --- /dev/null +++ b/margin_logs/step_0000068.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6c5c5c9e3327d809c4225b27870bc4ab9613451e5fa3661f77ccf994a91bd89 +size 384 diff --git a/margin_logs/step_0000069.npy b/margin_logs/step_0000069.npy new file mode 100644 index 0000000..3721e8e --- /dev/null +++ b/margin_logs/step_0000069.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66e1c310e31bbf66888936920ec51ab8264e58880dbfc72773a763fe11b26b93 +size 384 diff --git a/margin_logs/step_0000070.npy b/margin_logs/step_0000070.npy new file mode 100644 index 0000000..d643148 --- /dev/null +++ b/margin_logs/step_0000070.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a95cf2e0421595187d95b0feacbf9309a41ff00da27f8f6bf62b2182c4b4c74 +size 384 diff --git a/margin_logs/step_0000071.npy b/margin_logs/step_0000071.npy new file mode 100644 index 0000000..0397d82 --- /dev/null +++ b/margin_logs/step_0000071.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc830c5e1faf1e983f2e59babee707b80b5c83d2aa77eb600635dbf504faa347 +size 384 diff --git a/margin_logs/step_0000072.npy b/margin_logs/step_0000072.npy new file mode 100644 index 0000000..8a5ef05 --- /dev/null +++ b/margin_logs/step_0000072.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ab9f5163325cc9905185d071d014d5947d4380fbc23fe3136a63f97e5436805 +size 384 diff --git a/margin_logs/step_0000073.npy b/margin_logs/step_0000073.npy new file mode 100644 index 0000000..23266cc --- /dev/null +++ b/margin_logs/step_0000073.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8212237f51ff71adc13c8f0bf849579ca514d3dcb446279aa3e960ce3909002 +size 384 diff --git a/margin_logs/step_0000074.npy b/margin_logs/step_0000074.npy new file mode 100644 index 0000000..ccff7eb --- /dev/null +++ b/margin_logs/step_0000074.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f02fb8bafaa5393036a493922d5004a99dcb0cd4b69eaa037de958ae623dae16 +size 384 diff --git a/margin_logs/step_0000075.npy b/margin_logs/step_0000075.npy new file mode 100644 index 0000000..99be4f3 --- /dev/null +++ b/margin_logs/step_0000075.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7beebb815a846e596888c6cae702da91441220f4ec344facc317c8c4ea49f68e +size 384 diff --git a/margin_logs/step_0000076.npy b/margin_logs/step_0000076.npy new file mode 100644 index 0000000..2013f50 --- /dev/null +++ b/margin_logs/step_0000076.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4207f070e13ac4686642d524c881245c8036e963c41e2259db6c843e9848b4f7 +size 384 diff --git a/margin_logs/step_0000077.npy b/margin_logs/step_0000077.npy new file mode 100644 index 0000000..2d2d1fd --- /dev/null +++ b/margin_logs/step_0000077.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b187a12a73b3e891c84d80432f2c4baf148ee860506a3912e7301adbd37055 +size 384 diff --git a/margin_logs/step_0000078.npy b/margin_logs/step_0000078.npy new file mode 100644 index 0000000..b5db98d --- /dev/null +++ b/margin_logs/step_0000078.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d37a41a3f660e4b9eba8bd6f79cd97ce680f15920af61916027345e395c40b +size 384 diff --git a/margin_logs/step_0000079.npy b/margin_logs/step_0000079.npy new file mode 100644 index 0000000..579f651 --- /dev/null +++ b/margin_logs/step_0000079.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88b9ffa84c372d38f558ddbce94ff1169d153f7666afb68f7eedb06b22f8460f +size 384 diff --git a/margin_logs/step_0000080.npy b/margin_logs/step_0000080.npy new file mode 100644 index 0000000..e879a36 --- /dev/null +++ b/margin_logs/step_0000080.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f580702e4101fbf5f50472b8bfd10d95cddf0334e0fc87663e21e1ec79c492cc +size 384 diff --git a/margin_logs/step_0000081.npy b/margin_logs/step_0000081.npy new file mode 100644 index 0000000..131774e --- /dev/null +++ b/margin_logs/step_0000081.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24aa29085a8dbd2539d737bff03e2ff24e6662176a87d870eb49a46b4169f994 +size 384 diff --git a/margin_logs/step_0000082.npy b/margin_logs/step_0000082.npy new file mode 100644 index 0000000..034e83b --- /dev/null +++ b/margin_logs/step_0000082.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dd11d98c602fd9d53f7032ee69949d244b97c8c2c842f26a48b1abdc77a48e8 +size 384 diff --git a/margin_logs/step_0000083.npy b/margin_logs/step_0000083.npy new file mode 100644 index 0000000..42c0fa0 --- /dev/null +++ b/margin_logs/step_0000083.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:177628b3346f01db0088677f0257d7bd5f68e7286f7ffd0a44d7b8f35ee57379 +size 384 diff --git a/margin_logs/step_0000084.npy b/margin_logs/step_0000084.npy new file mode 100644 index 0000000..3fc51d3 --- /dev/null +++ b/margin_logs/step_0000084.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f30f62646c25553aa73cd248422dd6297a92ab983f34085871365d6a2e22b8a6 +size 384 diff --git a/margin_logs/step_0000085.npy b/margin_logs/step_0000085.npy new file mode 100644 index 0000000..8a1732a --- /dev/null +++ b/margin_logs/step_0000085.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80ea580768779452f6da5ba76691bab00cd40d68df22bdc8de51cac0ae0d18ab +size 384 diff --git a/margin_logs/step_0000086.npy b/margin_logs/step_0000086.npy new file mode 100644 index 0000000..bf21104 --- /dev/null +++ b/margin_logs/step_0000086.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f5dc8701f1ccd2c3bf2c76c1272028c8b51e6a528b4e293b299041e43316c8 +size 384 diff --git a/margin_logs/step_0000087.npy b/margin_logs/step_0000087.npy new file mode 100644 index 0000000..540aebf --- /dev/null +++ b/margin_logs/step_0000087.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42517d2620ae2a9b2f520ec8c32f7696f962018ce1254643d615e1e37b91446f +size 384 diff --git a/margin_logs/step_0000088.npy b/margin_logs/step_0000088.npy new file mode 100644 index 0000000..db3b12d --- /dev/null +++ b/margin_logs/step_0000088.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:649abc07f6ccdbf629551da061eb84c8b77760b2e2618b713dfea65090de08b6 +size 384 diff --git a/margin_logs/step_0000089.npy b/margin_logs/step_0000089.npy new file mode 100644 index 0000000..4eec770 --- /dev/null +++ b/margin_logs/step_0000089.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9139018e8b42b94012506a6c6bc8aef380aaccba1a595446c0f424652d1a5f7 +size 384 diff --git a/margin_logs/step_0000090.npy b/margin_logs/step_0000090.npy new file mode 100644 index 0000000..3e0d696 --- /dev/null +++ b/margin_logs/step_0000090.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ceb5255b2c0e358a8a57b69c7d1e8d00163f1d777f265de7c63307ad4578b69 +size 384 diff --git a/margin_logs/step_0000091.npy b/margin_logs/step_0000091.npy new file mode 100644 index 0000000..e8085ab --- /dev/null +++ b/margin_logs/step_0000091.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:179b5f88d2f3b56e297018db163b76ddae364c995af8e7dc5d35ac5c1b7d8260 +size 384 diff --git a/margin_logs/step_0000092.npy b/margin_logs/step_0000092.npy new file mode 100644 index 0000000..bde8e5c --- /dev/null +++ b/margin_logs/step_0000092.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27cc72c623eef3032b5981f4b2a2d606c1328f18713ef0065f4f82f188d4c9dd +size 384 diff --git a/margin_logs/step_0000093.npy b/margin_logs/step_0000093.npy new file mode 100644 index 0000000..a42f7c6 --- /dev/null +++ b/margin_logs/step_0000093.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc263fbbdc03397992d97e684c42941c5ddde3b276692a7e80c2662df2ad143 +size 384 diff --git a/margin_logs/step_0000094.npy b/margin_logs/step_0000094.npy new file mode 100644 index 0000000..083f316 --- /dev/null +++ b/margin_logs/step_0000094.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b0e44435a5a6591f5a2e674569e2851c4e017cac38eb92281db73284cd266c4 +size 384 diff --git a/margin_logs/step_0000095.npy b/margin_logs/step_0000095.npy new file mode 100644 index 0000000..3dc0853 --- /dev/null +++ b/margin_logs/step_0000095.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9e9ab261d902396981a6d3be49b20d8658eb2f5da9782cd4944f9ad9a6ced3e +size 384 diff --git a/margin_logs/step_0000096.npy b/margin_logs/step_0000096.npy new file mode 100644 index 0000000..deeec35 --- /dev/null +++ b/margin_logs/step_0000096.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bed18c11d07869a525b7126412d4357fec73498c3331966cf6923c024f6eb24 +size 384 diff --git a/margin_logs/step_0000097.npy b/margin_logs/step_0000097.npy new file mode 100644 index 0000000..4109954 --- /dev/null +++ b/margin_logs/step_0000097.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a566cf286d5b4717a3589080cee9b7c7b832c1aab3d8fcae007de91c1847bff6 +size 384 diff --git a/margin_logs/step_0000098.npy b/margin_logs/step_0000098.npy new file mode 100644 index 0000000..212baf6 --- /dev/null +++ b/margin_logs/step_0000098.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36a7a5c131ea5b6da246bf361cb7f141c81f3c0a423fce37054ddb145772b8ce +size 384 diff --git a/margin_logs/step_0000099.npy b/margin_logs/step_0000099.npy new file mode 100644 index 0000000..6ee451b --- /dev/null +++ b/margin_logs/step_0000099.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03ea0aaf0e8ebc0f5156330ce8b6fee2017a50d35821eed43227ab056ffa5798 +size 384 diff --git a/margin_logs/step_0000100.npy b/margin_logs/step_0000100.npy new file mode 100644 index 0000000..9fbf70e --- /dev/null +++ b/margin_logs/step_0000100.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a3a51fbfe5600bdd3de4e55c8214d8a24e2a81fe90a57814fdca92579970582 +size 384 diff --git a/margin_logs/step_0000101.npy b/margin_logs/step_0000101.npy new file mode 100644 index 0000000..3b2de33 --- /dev/null +++ b/margin_logs/step_0000101.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fb6b246b35a5bf6a7d4130ff7260dcbaf1fe3382e752bbf83b2dbe9d392152a +size 384 diff --git a/margin_logs/step_0000102.npy b/margin_logs/step_0000102.npy new file mode 100644 index 0000000..13cbb12 --- /dev/null +++ b/margin_logs/step_0000102.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b3eff469757e1b1e603e3a57ec70ccdb90bbd20917c2fa1dbedba3bd787fd8d +size 384 diff --git a/margin_logs/step_0000103.npy b/margin_logs/step_0000103.npy new file mode 100644 index 0000000..f12c821 --- /dev/null +++ b/margin_logs/step_0000103.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f6e914ad02d5e34fe57d9eb627c3465892bb8910d0bb75e16a0261498e1f247 +size 384 diff --git a/margin_logs/step_0000104.npy b/margin_logs/step_0000104.npy new file mode 100644 index 0000000..8b3effe --- /dev/null +++ b/margin_logs/step_0000104.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8785b82b2f7a4efb6843392d84add73690a99b5015ca016dd57a7d50964eb960 +size 384 diff --git a/margin_logs/step_0000105.npy b/margin_logs/step_0000105.npy new file mode 100644 index 0000000..16f3867 --- /dev/null +++ b/margin_logs/step_0000105.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0255eb50a9c627616bd63aef61b1545d252a67d32daec986afcfbd8dccb596a +size 384 diff --git a/margin_logs/step_0000106.npy b/margin_logs/step_0000106.npy new file mode 100644 index 0000000..699069b --- /dev/null +++ b/margin_logs/step_0000106.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:449142b6c888906156d3c2f96ead791f686dfb690b223a889d521c431add4a6f +size 384 diff --git a/margin_logs/step_0000107.npy b/margin_logs/step_0000107.npy new file mode 100644 index 0000000..da5572e --- /dev/null +++ b/margin_logs/step_0000107.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c742b75e58d32422c1586333198b59624d8f8c0c8ee9dfae6d6fc3df4f0d919d +size 384 diff --git a/margin_logs/step_0000108.npy b/margin_logs/step_0000108.npy new file mode 100644 index 0000000..10b91a2 --- /dev/null +++ b/margin_logs/step_0000108.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e01de836fdec5617e603a7fc6296f6fbbd4b5f72985e6402b3a441cb1fa658d +size 384 diff --git a/margin_logs/step_0000109.npy b/margin_logs/step_0000109.npy new file mode 100644 index 0000000..581efde --- /dev/null +++ b/margin_logs/step_0000109.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d2620f0eb49329e1986db317e9a139e3953726733f510ae04e1081cfb25a68b +size 384 diff --git a/margin_logs/step_0000110.npy b/margin_logs/step_0000110.npy new file mode 100644 index 0000000..c3c6c9c --- /dev/null +++ b/margin_logs/step_0000110.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82686d179d48a7925804293431edd1c4c9474073f54ac29e14c5771b1b9374b6 +size 384 diff --git a/margin_logs/step_0000111.npy b/margin_logs/step_0000111.npy new file mode 100644 index 0000000..7efb372 --- /dev/null +++ b/margin_logs/step_0000111.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a142046db40d0af2fa3e08ce498c1d6bc59103c41ba39f949abe3e1c1734eb0a +size 384 diff --git a/margin_logs/step_0000112.npy b/margin_logs/step_0000112.npy new file mode 100644 index 0000000..db71834 --- /dev/null +++ b/margin_logs/step_0000112.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ec32ea9353b8d241e60bb4d5cb6d190d05dccd7a929c996e559bc1bf6a3ef5a +size 384 diff --git a/margin_logs/step_0000113.npy b/margin_logs/step_0000113.npy new file mode 100644 index 0000000..041cda3 --- /dev/null +++ b/margin_logs/step_0000113.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c41bd331abe7052373adefd6625d134ee20bbce57417547ca30dd7c45cc5116b +size 384 diff --git a/margin_logs/step_0000114.npy b/margin_logs/step_0000114.npy new file mode 100644 index 0000000..ce4b897 --- /dev/null +++ b/margin_logs/step_0000114.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62075bff6f7c2a892042c57a1b39d950bcfafbe00cd24a8905b5585962ce2217 +size 384 diff --git a/margin_logs/step_0000115.npy b/margin_logs/step_0000115.npy new file mode 100644 index 0000000..7888dd7 --- /dev/null +++ b/margin_logs/step_0000115.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36ca85eadaf2b66d2a8b3df08d86dcf59351b3f1e32259e9e16bdc507279c9b3 +size 384 diff --git a/margin_logs/step_0000116.npy b/margin_logs/step_0000116.npy new file mode 100644 index 0000000..39bd61a --- /dev/null +++ b/margin_logs/step_0000116.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34552fd60f3f96d81cc264b21a58245e155698e76dd4850b2b190d075471e89a +size 384 diff --git a/margin_logs/step_0000117.npy b/margin_logs/step_0000117.npy new file mode 100644 index 0000000..ded2707 --- /dev/null +++ b/margin_logs/step_0000117.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ead37375a4c165d8a0b73cb8e19f4732a175d0cf8b2889f2f2b6ff5ea267fe67 +size 384 diff --git a/margin_logs/step_0000118.npy b/margin_logs/step_0000118.npy new file mode 100644 index 0000000..a3a9793 --- /dev/null +++ b/margin_logs/step_0000118.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc9909bc0bcce4a6d9efb12501e75af0639bbcba9978492b82614e6c8aef5cef +size 384 diff --git a/margin_logs/step_0000119.npy b/margin_logs/step_0000119.npy new file mode 100644 index 0000000..c015e68 --- /dev/null +++ b/margin_logs/step_0000119.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e4186b65c325c555483d81f38a6242571dc7ca52b7d4faed97bc1e8b203a31c +size 384 diff --git a/margin_logs/step_0000120.npy b/margin_logs/step_0000120.npy new file mode 100644 index 0000000..2ef2101 --- /dev/null +++ b/margin_logs/step_0000120.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704c73d424b834bb2c1c2e4ba1f1ce27a0fb888191ffdba0b2b5ca170c38959e +size 384 diff --git a/margin_logs/step_0000121.npy b/margin_logs/step_0000121.npy new file mode 100644 index 0000000..28d6576 --- /dev/null +++ b/margin_logs/step_0000121.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f66885078ea95d11d204d00ebce4cbc23274db1ffd2403f17cea2f224804d24 +size 384 diff --git a/margin_logs/step_0000122.npy b/margin_logs/step_0000122.npy new file mode 100644 index 0000000..a437ad2 --- /dev/null +++ b/margin_logs/step_0000122.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1253849089f5a732a96f4b7718eba9608c16eafeec456373a3e8deed4daaf9f0 +size 384 diff --git a/margin_logs/step_0000123.npy b/margin_logs/step_0000123.npy new file mode 100644 index 0000000..f2a7322 --- /dev/null +++ b/margin_logs/step_0000123.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:345d0d4b0a49100b4b9b4687f47305fd0ab2e79a2502d314279c8085af3062a7 +size 384 diff --git a/margin_logs/step_0000124.npy b/margin_logs/step_0000124.npy new file mode 100644 index 0000000..c190e45 --- /dev/null +++ b/margin_logs/step_0000124.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b16c1286ea1f720d19c2b2909765e45f5850ed233620060c459bf4f17b6b4b7 +size 384 diff --git a/margin_logs/step_0000125.npy b/margin_logs/step_0000125.npy new file mode 100644 index 0000000..47f4e8e --- /dev/null +++ b/margin_logs/step_0000125.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6483756c189d2cea58f4564dcdde22d926b87e9aac0f92bb844ed7e8db1f95fb +size 384 diff --git a/margin_logs/step_0000126.npy b/margin_logs/step_0000126.npy new file mode 100644 index 0000000..0d788b4 --- /dev/null +++ b/margin_logs/step_0000126.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b77177996511d76508513726ce97f737f46b019680208a061974ae6b4c3ff1c +size 384 diff --git a/margin_logs/step_0000127.npy b/margin_logs/step_0000127.npy new file mode 100644 index 0000000..190d82a --- /dev/null +++ b/margin_logs/step_0000127.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e5af812f8f1cbb6a0d4c6d1ac62c213609ea827c925d8b20bd116dc0b8b4506 +size 384 diff --git a/margin_logs/step_0000128.npy b/margin_logs/step_0000128.npy new file mode 100644 index 0000000..7e964d2 --- /dev/null +++ b/margin_logs/step_0000128.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03031506c599f42fad17f294aa560f8d7ad35924518ccfd13b96a2985ace2ddc +size 384 diff --git a/margin_logs/step_0000129.npy b/margin_logs/step_0000129.npy new file mode 100644 index 0000000..db9090b --- /dev/null +++ b/margin_logs/step_0000129.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a912068ac3b5f2b4af426703681a4f214042370962c91458a42ea6da2c676d78 +size 384 diff --git a/margin_logs/step_0000130.npy b/margin_logs/step_0000130.npy new file mode 100644 index 0000000..9720076 --- /dev/null +++ b/margin_logs/step_0000130.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66668a313fa063615886519ba8de5a3c0b2f151c2d18501490af30f8d18c3820 +size 384 diff --git a/margin_logs/step_0000131.npy b/margin_logs/step_0000131.npy new file mode 100644 index 0000000..2d70a29 --- /dev/null +++ b/margin_logs/step_0000131.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:763a44ec23712e813a2baf96db52907586bb8813fc7d340e86673c071a2590dc +size 384 diff --git a/margin_logs/step_0000132.npy b/margin_logs/step_0000132.npy new file mode 100644 index 0000000..44bc501 --- /dev/null +++ b/margin_logs/step_0000132.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fe16b4cbcca3bc5d03ce89bbef75c23191be1be8fc873cc57c83c3669fe061c +size 384 diff --git a/margin_logs/step_0000133.npy b/margin_logs/step_0000133.npy new file mode 100644 index 0000000..7c906a3 --- /dev/null +++ b/margin_logs/step_0000133.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348e8dfe4baa631344f958e94f54db69366c437133a2369be218d02d5bcd252a +size 384 diff --git a/margin_logs/step_0000134.npy b/margin_logs/step_0000134.npy new file mode 100644 index 0000000..165be77 --- /dev/null +++ b/margin_logs/step_0000134.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9175a528e9ee8b8f4f23c09b7ca30c9d869d56070165854120c9afab4944fc44 +size 384 diff --git a/margin_logs/step_0000135.npy b/margin_logs/step_0000135.npy new file mode 100644 index 0000000..777bab4 --- /dev/null +++ b/margin_logs/step_0000135.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab54202a61673011cbbd80abd3cd12f65d26c321ba58ff6f114e120657ca8f9f +size 384 diff --git a/margin_logs/step_0000136.npy b/margin_logs/step_0000136.npy new file mode 100644 index 0000000..70404ef --- /dev/null +++ b/margin_logs/step_0000136.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba78bed0ec92f4147d49f68ee214e3d92242c896dd5ae8ab5ebd83d2e4ba81b3 +size 384 diff --git a/margin_logs/step_0000137.npy b/margin_logs/step_0000137.npy new file mode 100644 index 0000000..fa516f9 --- /dev/null +++ b/margin_logs/step_0000137.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bfb5587282926c1f7d2bb7713bc771c47c84adfe760fb024dae464df67b057d +size 384 diff --git a/margin_logs/step_0000138.npy b/margin_logs/step_0000138.npy new file mode 100644 index 0000000..4967963 --- /dev/null +++ b/margin_logs/step_0000138.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86fbe5ad470195be0a54288cb54f694325bea934e9611291a688012ce0c9c528 +size 384 diff --git a/margin_logs/step_0000139.npy b/margin_logs/step_0000139.npy new file mode 100644 index 0000000..17432b4 --- /dev/null +++ b/margin_logs/step_0000139.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ddf9f1182d9d170615371886b9c58693eafbfbaa2af0457b1063a1cec00cc9b +size 384 diff --git a/margin_logs/step_0000140.npy b/margin_logs/step_0000140.npy new file mode 100644 index 0000000..0608352 --- /dev/null +++ b/margin_logs/step_0000140.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc059d8158f8c78e15a8072b9fce97ad3ea0bbe39e01bae305726080e15bea06 +size 384 diff --git a/margin_logs/step_0000141.npy b/margin_logs/step_0000141.npy new file mode 100644 index 0000000..55f8c4d --- /dev/null +++ b/margin_logs/step_0000141.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:502d4e28a786c4fb8286edd7fb0c33a6956c24571042bc0a1f85ec9ec0255a89 +size 384 diff --git a/margin_logs/step_0000142.npy b/margin_logs/step_0000142.npy new file mode 100644 index 0000000..80b2e22 --- /dev/null +++ b/margin_logs/step_0000142.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3a317585301e7ba1c1b5b653e12a7f907eef21b93d92c9270a7ddf126770c76 +size 384 diff --git a/margin_logs/step_0000143.npy b/margin_logs/step_0000143.npy new file mode 100644 index 0000000..0a01b72 --- /dev/null +++ b/margin_logs/step_0000143.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6123506915a7f96db0a155c47780cd4cc594d13a725c278967c27bab7fccc2b1 +size 384 diff --git a/margin_logs/step_0000144.npy b/margin_logs/step_0000144.npy new file mode 100644 index 0000000..69162da --- /dev/null +++ b/margin_logs/step_0000144.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ac86a7d5d518c2542e868367625753ef3c67a3045b22e916abe833fc0347178 +size 384 diff --git a/margin_logs/step_0000145.npy b/margin_logs/step_0000145.npy new file mode 100644 index 0000000..c71a964 --- /dev/null +++ b/margin_logs/step_0000145.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32995aaf3bed01a74ee4d086374c0083d775ed246599dbf59a3e8b6672ffef1a +size 384 diff --git a/margin_logs/step_0000146.npy b/margin_logs/step_0000146.npy new file mode 100644 index 0000000..f6f6a4b --- /dev/null +++ b/margin_logs/step_0000146.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2545d4b0cdc2d6fb3e984cab3e5248e3c64d1ecc60491d5f5e13ba6bda8ff02 +size 384 diff --git a/margin_logs/step_0000147.npy b/margin_logs/step_0000147.npy new file mode 100644 index 0000000..40eaf2e --- /dev/null +++ b/margin_logs/step_0000147.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fa8b0d8d4c5c820f809932978964fb7d95dffaa7392f5d0f466032577385aa2 +size 384 diff --git a/margin_logs/step_0000148.npy b/margin_logs/step_0000148.npy new file mode 100644 index 0000000..a14f0b1 --- /dev/null +++ b/margin_logs/step_0000148.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b643911b8bb29eba7f5c02c7ce3d632edca1401644a978f501dd702032936617 +size 384 diff --git a/margin_logs/step_0000149.npy b/margin_logs/step_0000149.npy new file mode 100644 index 0000000..86dc6e8 --- /dev/null +++ b/margin_logs/step_0000149.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5350185253bff2631f06285cc94e8adde898c0f969a71084d4e4f1501184e7fc +size 384 diff --git a/margin_logs/step_0000150.npy b/margin_logs/step_0000150.npy new file mode 100644 index 0000000..d410431 --- /dev/null +++ b/margin_logs/step_0000150.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0457556f756faea99891b8b2f08a4b5aafa3c239de9eb5f3412285f9e3a7322e +size 384 diff --git a/margin_logs/step_0000151.npy b/margin_logs/step_0000151.npy new file mode 100644 index 0000000..c2d279a --- /dev/null +++ b/margin_logs/step_0000151.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8564f083038477f925dab28b645c5e1c7ef57e57febfa6d9ab144892a6ecb7f5 +size 384 diff --git a/margin_logs/step_0000152.npy b/margin_logs/step_0000152.npy new file mode 100644 index 0000000..a5f7053 --- /dev/null +++ b/margin_logs/step_0000152.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:217bd5994b4886fcf1c679617d0450000a98f339081c8defc7115ea8e1bb91a4 +size 384 diff --git a/margin_logs/step_0000153.npy b/margin_logs/step_0000153.npy new file mode 100644 index 0000000..f73371d --- /dev/null +++ b/margin_logs/step_0000153.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdafa9b6a457ac578bfad19dd2ac2d7ecc5caf2dd005e610037988e27db6a1c +size 384 diff --git a/margin_logs/step_0000154.npy b/margin_logs/step_0000154.npy new file mode 100644 index 0000000..3734d9d --- /dev/null +++ b/margin_logs/step_0000154.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1193ede7580d435b64d318974ab4d01e9e660113682d87b540f24681bafa8bb7 +size 384 diff --git a/margin_logs/step_0000155.npy b/margin_logs/step_0000155.npy new file mode 100644 index 0000000..641c426 --- /dev/null +++ b/margin_logs/step_0000155.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bf071e1e0498ba63421506f1cb7a291974eca28d082c02efc84f576c588755c +size 384 diff --git a/margin_logs/step_0000156.npy b/margin_logs/step_0000156.npy new file mode 100644 index 0000000..cdcbcdd --- /dev/null +++ b/margin_logs/step_0000156.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a5591ffaaacfba5aefbb426fd39cfec9e3098628315831d06f38fa1fdeb2a9 +size 384 diff --git a/margin_logs/step_0000157.npy b/margin_logs/step_0000157.npy new file mode 100644 index 0000000..a5a2129 --- /dev/null +++ b/margin_logs/step_0000157.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfac7e4c107affdbf68544f2065c99862eddffc10de50826ab7e0a4fc82e66d9 +size 384 diff --git a/margin_logs/step_0000158.npy b/margin_logs/step_0000158.npy new file mode 100644 index 0000000..1dc80df --- /dev/null +++ b/margin_logs/step_0000158.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fda266abb9eeed867ec822f51766ec4bcf1e9a3b375ea07579ee6a8c2dc2f08 +size 384 diff --git a/margin_logs/step_0000159.npy b/margin_logs/step_0000159.npy new file mode 100644 index 0000000..665c1c2 --- /dev/null +++ b/margin_logs/step_0000159.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fdb7679804eac7f9a1175b0e35bb343b23a9e7e446877d484510f533e120140 +size 384 diff --git a/margin_logs/step_0000160.npy b/margin_logs/step_0000160.npy new file mode 100644 index 0000000..1f4eff7 --- /dev/null +++ b/margin_logs/step_0000160.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f7712a569f50537968f6beefc9c8945073438a7350070615e120a8d17d4178 +size 384 diff --git a/margin_logs/step_0000161.npy b/margin_logs/step_0000161.npy new file mode 100644 index 0000000..245412d --- /dev/null +++ b/margin_logs/step_0000161.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e090309fecb84259b63f9d8bf5968d7e24e4873f50d3f4ef87575f993a173dd1 +size 384 diff --git a/margin_logs/step_0000162.npy b/margin_logs/step_0000162.npy new file mode 100644 index 0000000..d1e3f78 --- /dev/null +++ b/margin_logs/step_0000162.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e4197ecb17f02bb47369d7ddc2ee3e8b629b605388c0b680925f8c2ad589e01 +size 384 diff --git a/margin_logs/step_0000163.npy b/margin_logs/step_0000163.npy new file mode 100644 index 0000000..70960f3 --- /dev/null +++ b/margin_logs/step_0000163.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d60130993d77a0360ac83b058e3f4f1d0509bbcb1ecc91a1016e9947eff2f4b +size 384 diff --git a/margin_logs/step_0000164.npy b/margin_logs/step_0000164.npy new file mode 100644 index 0000000..321005e --- /dev/null +++ b/margin_logs/step_0000164.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e00516f50cdc8f9ab27462626d9af3810fc084f079480fa1e73b4e803fef0c33 +size 384 diff --git a/margin_logs/step_0000165.npy b/margin_logs/step_0000165.npy new file mode 100644 index 0000000..17ab648 --- /dev/null +++ b/margin_logs/step_0000165.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d43a713bb997ad79d4bffa182dc225763e519d72e887b3e14671ce022f0c070 +size 384 diff --git a/margin_logs/step_0000166.npy b/margin_logs/step_0000166.npy new file mode 100644 index 0000000..1a7003b --- /dev/null +++ b/margin_logs/step_0000166.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd81a1c740dd443c2167e78126676b9bd584e983dbd096e7eeeefae2fd9ff5f2 +size 384 diff --git a/margin_logs/step_0000167.npy b/margin_logs/step_0000167.npy new file mode 100644 index 0000000..c1cc1f3 --- /dev/null +++ b/margin_logs/step_0000167.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52e8d1a7feb68d45e827af789e7826c6420a00d48d87f0f049b436201d69e142 +size 384 diff --git a/margin_logs/step_0000168.npy b/margin_logs/step_0000168.npy new file mode 100644 index 0000000..2032015 --- /dev/null +++ b/margin_logs/step_0000168.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c56fc83b3108bc447359a9b19a980a5fa95eaa5c8fdef20679255b727381185 +size 384 diff --git a/margin_logs/step_0000169.npy b/margin_logs/step_0000169.npy new file mode 100644 index 0000000..ab3f7a7 --- /dev/null +++ b/margin_logs/step_0000169.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bfad44990a5d9b89a14efd46147a5900e4d13f272667f5f6463ac1b1f84dfbd +size 384 diff --git a/margin_logs/step_0000170.npy b/margin_logs/step_0000170.npy new file mode 100644 index 0000000..365a087 --- /dev/null +++ b/margin_logs/step_0000170.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b11a6ed70fa15031ab2fb2c1e49a44c862410dd583171a2e880862dc4e8b1af7 +size 384 diff --git a/margin_logs/step_0000171.npy b/margin_logs/step_0000171.npy new file mode 100644 index 0000000..22b160f --- /dev/null +++ b/margin_logs/step_0000171.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:174c6fc62edce18758d09d562b12c3e62889dd690991464c383c17cff40afaad +size 384 diff --git a/margin_logs/step_0000172.npy b/margin_logs/step_0000172.npy new file mode 100644 index 0000000..19973de --- /dev/null +++ b/margin_logs/step_0000172.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19951fb6bf92402d8a28a2e2e359295b0cb5e61cd8b3051941c4148963371edc +size 384 diff --git a/margin_logs/step_0000173.npy b/margin_logs/step_0000173.npy new file mode 100644 index 0000000..3215a82 --- /dev/null +++ b/margin_logs/step_0000173.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ddbbbeb981fb21023fe91ec5f746cc845e7f11e5a43bf785ff2c781d0a9b204 +size 384 diff --git a/margin_logs/step_0000174.npy b/margin_logs/step_0000174.npy new file mode 100644 index 0000000..e778ee3 --- /dev/null +++ b/margin_logs/step_0000174.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db6870667a3e000caf6be222e16b43c137654988dfc7727b5e693398eb56ddaf +size 384 diff --git a/margin_logs/step_0000175.npy b/margin_logs/step_0000175.npy new file mode 100644 index 0000000..458dd76 --- /dev/null +++ b/margin_logs/step_0000175.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:125eba785b4159fbfd3c0e1ed1663896fde267ab44c781cde2577235fd00fdc2 +size 384 diff --git a/margin_logs/step_0000176.npy b/margin_logs/step_0000176.npy new file mode 100644 index 0000000..3d422fd --- /dev/null +++ b/margin_logs/step_0000176.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94375a2c4153cce55bba39532d6c13addd592453be6bd776457afaf6f6e67390 +size 384 diff --git a/margin_logs/step_0000177.npy b/margin_logs/step_0000177.npy new file mode 100644 index 0000000..0e721fe --- /dev/null +++ b/margin_logs/step_0000177.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93a59658ea84cc4a2f0b8bb96fab368dc84e62774ad03a9b97751ead39b1c0d8 +size 384 diff --git a/margin_logs/step_0000178.npy b/margin_logs/step_0000178.npy new file mode 100644 index 0000000..a5479e3 --- /dev/null +++ b/margin_logs/step_0000178.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3b019c4903bcd5319e407ea5ba58dba6375a2fd79c5fd4a36c14a526dabb65 +size 384 diff --git a/margin_logs/step_0000179.npy b/margin_logs/step_0000179.npy new file mode 100644 index 0000000..02ee441 --- /dev/null +++ b/margin_logs/step_0000179.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5285258d003fbaef6a9b6753d62eb80c04401a5831acb3dc190be5c84f2ffd8 +size 384 diff --git a/margin_logs/step_0000180.npy b/margin_logs/step_0000180.npy new file mode 100644 index 0000000..53d3a13 --- /dev/null +++ b/margin_logs/step_0000180.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64169bfc00d64f0b37399f7e78ab8dd24bc8462557c661c0268fcd61faab21ab +size 384 diff --git a/margin_logs/step_0000181.npy b/margin_logs/step_0000181.npy new file mode 100644 index 0000000..9cd5cdf --- /dev/null +++ b/margin_logs/step_0000181.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e820fdc57b7006b616876a40833f6c044c59bf3ad0d17270051fc45d1f7fa6e4 +size 384 diff --git a/margin_logs/step_0000182.npy b/margin_logs/step_0000182.npy new file mode 100644 index 0000000..02dc389 --- /dev/null +++ b/margin_logs/step_0000182.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c5210bb5d9693603a162cd163302791858b11a28d60d8f36f1b5725d58980fa +size 384 diff --git a/margin_logs/step_0000183.npy b/margin_logs/step_0000183.npy new file mode 100644 index 0000000..1649583 --- /dev/null +++ b/margin_logs/step_0000183.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:643b8e34896b71c17bf86b11201b0829e7bf083f5b2861f79fd4c416449db20e +size 384 diff --git a/margin_logs/step_0000184.npy b/margin_logs/step_0000184.npy new file mode 100644 index 0000000..ff84f0b --- /dev/null +++ b/margin_logs/step_0000184.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:451b7f12ac2220b7eb33c458556547ab50f23c52e604e31fd6a8b290e0c91618 +size 384 diff --git a/margin_logs/step_0000185.npy b/margin_logs/step_0000185.npy new file mode 100644 index 0000000..8327207 --- /dev/null +++ b/margin_logs/step_0000185.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380379403c19ed5ad62010e58a27da03e7cc62a36f2a692f2dc3fd8fbfb1c7b7 +size 384 diff --git a/margin_logs/step_0000186.npy b/margin_logs/step_0000186.npy new file mode 100644 index 0000000..6648e9b --- /dev/null +++ b/margin_logs/step_0000186.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2468c468d705d0e8abb0263ab9d0186731ccbc3bb42d0aab7ad1a8e86729055 +size 384 diff --git a/margin_logs/step_0000187.npy b/margin_logs/step_0000187.npy new file mode 100644 index 0000000..e6828c7 --- /dev/null +++ b/margin_logs/step_0000187.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1888f1348f3d5110beb8d67199a30f12cd7950cfa1ffdda676bf91fd64133cbb +size 384 diff --git a/margin_logs/step_0000188.npy b/margin_logs/step_0000188.npy new file mode 100644 index 0000000..2493f2d --- /dev/null +++ b/margin_logs/step_0000188.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f05b490f29519697859efa6fafa06dccb8dbea6e6086d7eb92736a5e589e7fa7 +size 384 diff --git a/margin_logs/step_0000189.npy b/margin_logs/step_0000189.npy new file mode 100644 index 0000000..35c65bf --- /dev/null +++ b/margin_logs/step_0000189.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3577e608c93433d70c2099a9790f6b06e36625f355d84b3ad1646eccf2969cf2 +size 384 diff --git a/margin_logs/step_0000190.npy b/margin_logs/step_0000190.npy new file mode 100644 index 0000000..4bb81af --- /dev/null +++ b/margin_logs/step_0000190.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a91c6b22f5d3f63d1477509532e6ec57b7715dbd6e2df6394338384aa3d64ae +size 384 diff --git a/margin_logs/step_0000191.npy b/margin_logs/step_0000191.npy new file mode 100644 index 0000000..1652293 --- /dev/null +++ b/margin_logs/step_0000191.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b60b3937b061d6dfc2115a6f67841b9879f03f1a6840b67e73807fa851c64546 +size 384 diff --git a/margin_logs/step_0000192.npy b/margin_logs/step_0000192.npy new file mode 100644 index 0000000..46afd2b --- /dev/null +++ b/margin_logs/step_0000192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:462d9a11571eefc42b0bfdbb44d17ea3fb5ad05566c7f1335cec6e33eef87f31 +size 384 diff --git a/margin_logs/step_0000193.npy b/margin_logs/step_0000193.npy new file mode 100644 index 0000000..a88c591 --- /dev/null +++ b/margin_logs/step_0000193.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba177223978594b8e7f0b0750410177fc18257edd606273ecc1578e7bc432780 +size 384 diff --git a/margin_logs/step_0000194.npy b/margin_logs/step_0000194.npy new file mode 100644 index 0000000..e7197d2 --- /dev/null +++ b/margin_logs/step_0000194.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22cbcf10fb2eae9519519f62f68b783a8437bda0fc2311f482372dda7f067b65 +size 384 diff --git a/margin_logs/step_0000195.npy b/margin_logs/step_0000195.npy new file mode 100644 index 0000000..1f499f6 --- /dev/null +++ b/margin_logs/step_0000195.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9676a83021d79b77f2e1b7e4d474fcbb43e6e6300f1653b32f43b013401c706 +size 384 diff --git a/margin_logs/step_0000196.npy b/margin_logs/step_0000196.npy new file mode 100644 index 0000000..8b80626 --- /dev/null +++ b/margin_logs/step_0000196.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48f3004cff9c35174b02c9a34d00981ed8e471851d662c51e1cc0037a105ad7e +size 384 diff --git a/margin_logs/step_0000197.npy b/margin_logs/step_0000197.npy new file mode 100644 index 0000000..c454453 --- /dev/null +++ b/margin_logs/step_0000197.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c194b2e9437ea7bdabe50e277d7710dc48c28d28fefcfd6b2256d91bb3571c7 +size 384 diff --git a/margin_logs/step_0000198.npy b/margin_logs/step_0000198.npy new file mode 100644 index 0000000..0ddea13 --- /dev/null +++ b/margin_logs/step_0000198.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deca021174df898a0165c34ad1eca1073b812b0cf3ffdc3242fbf37f894dc4c2 +size 384 diff --git a/margin_logs/step_0000199.npy b/margin_logs/step_0000199.npy new file mode 100644 index 0000000..8cc6b9f --- /dev/null +++ b/margin_logs/step_0000199.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0e30dcf77a8ce6609abde17f75c4820cd8d51675beeef414bc389cac39300e8 +size 384 diff --git a/margin_logs/step_0000200.npy b/margin_logs/step_0000200.npy new file mode 100644 index 0000000..7427899 --- /dev/null +++ b/margin_logs/step_0000200.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d9eed2aa32bf7e2a9ea69aff257dce74c9dc3afc0c0e7d8dd5e0858e9c9a3e7 +size 384 diff --git a/margin_logs/step_0000201.npy b/margin_logs/step_0000201.npy new file mode 100644 index 0000000..04de548 --- /dev/null +++ b/margin_logs/step_0000201.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6deb3bc8a04b90efadc45dcc88b9f8e164132f954bfc5699352f3a98096f8a +size 384 diff --git a/margin_logs/step_0000202.npy b/margin_logs/step_0000202.npy new file mode 100644 index 0000000..701e68e --- /dev/null +++ b/margin_logs/step_0000202.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3d3aa5e691774efc3eb2b0d9ff4c00a2d09c82ec49878d3f81501951d781a90 +size 384 diff --git a/margin_logs/step_0000203.npy b/margin_logs/step_0000203.npy new file mode 100644 index 0000000..c64b12d --- /dev/null +++ b/margin_logs/step_0000203.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a28fd61be71cd188848403c04c5a33c0c2be9532bc0bdb7c9eb0c48cc3d1f088 +size 384 diff --git a/margin_logs/step_0000204.npy b/margin_logs/step_0000204.npy new file mode 100644 index 0000000..c5144b8 --- /dev/null +++ b/margin_logs/step_0000204.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1da0328c4daaa3ed9a38283bc5fbefaa63f2fb43d20a5069321ee8572c6d20d +size 384 diff --git a/margin_logs/step_0000205.npy b/margin_logs/step_0000205.npy new file mode 100644 index 0000000..57443f2 --- /dev/null +++ b/margin_logs/step_0000205.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d07ccb0dfd080ba9a6b642a1e2ece31fb0a6a03f36e04f62dee144ba9250bfd +size 384 diff --git a/margin_logs/step_0000206.npy b/margin_logs/step_0000206.npy new file mode 100644 index 0000000..13070fa --- /dev/null +++ b/margin_logs/step_0000206.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a5e3004363ddce93d6d175469ecd53ff987e53daf6955796bc0a3cc8875a61a +size 384 diff --git a/margin_logs/step_0000207.npy b/margin_logs/step_0000207.npy new file mode 100644 index 0000000..0201966 --- /dev/null +++ b/margin_logs/step_0000207.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:093275929d22c07fc3ec28db50eb828892dc141e141e0ef2a3dc34aebea6ff19 +size 384 diff --git a/margin_logs/step_0000208.npy b/margin_logs/step_0000208.npy new file mode 100644 index 0000000..f0b1f7c --- /dev/null +++ b/margin_logs/step_0000208.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:965dbce8613c12df46b0c8aa884f27d53f8f22395752dc6ff7004b8d16389bf0 +size 384 diff --git a/margin_logs/step_0000209.npy b/margin_logs/step_0000209.npy new file mode 100644 index 0000000..4a522a8 --- /dev/null +++ b/margin_logs/step_0000209.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41d990f2d699049cc1858ee446d8b56fd1e52f2e3a42c2baec5a55bada0a6f54 +size 384 diff --git a/margin_logs/step_0000210.npy b/margin_logs/step_0000210.npy new file mode 100644 index 0000000..8deeda9 --- /dev/null +++ b/margin_logs/step_0000210.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:475fdcd7511a2c2d6f2ad3950811bdeb8ee980328c64a081da5cd1ad36e6edc5 +size 384 diff --git a/margin_logs/step_0000211.npy b/margin_logs/step_0000211.npy new file mode 100644 index 0000000..78296b2 --- /dev/null +++ b/margin_logs/step_0000211.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfaffc790d23134190b809fe8de27c28b50443c966599983e20dfb6e19463b68 +size 384 diff --git a/margin_logs/step_0000212.npy b/margin_logs/step_0000212.npy new file mode 100644 index 0000000..04296cf --- /dev/null +++ b/margin_logs/step_0000212.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b66d79f1bf5f99b386572135cb153f3a73d4e428562e93f458e028482e4a0f7d +size 384 diff --git a/margin_logs/step_0000213.npy b/margin_logs/step_0000213.npy new file mode 100644 index 0000000..463b96b --- /dev/null +++ b/margin_logs/step_0000213.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e5247a05dda3356de3bab2a8d43d31897b808bf63f52fcb2288a8f05691836a +size 384 diff --git a/margin_logs/step_0000214.npy b/margin_logs/step_0000214.npy new file mode 100644 index 0000000..7aa9b5c --- /dev/null +++ b/margin_logs/step_0000214.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aecccc2b9979f95d006f87955bd9a2d5bd71714685a0ca1792d771441b3751c4 +size 384 diff --git a/margin_logs/step_0000215.npy b/margin_logs/step_0000215.npy new file mode 100644 index 0000000..230646a --- /dev/null +++ b/margin_logs/step_0000215.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd764774e91607a3e7ae860fdd0c21695009dc96158baf085a0537741e73a5be +size 384 diff --git a/margin_logs/step_0000216.npy b/margin_logs/step_0000216.npy new file mode 100644 index 0000000..ba106db --- /dev/null +++ b/margin_logs/step_0000216.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e370ecdb941e8fe4db3b51597f47895a8492df9878f57c6edb58e5f01282789 +size 384 diff --git a/margin_logs/step_0000217.npy b/margin_logs/step_0000217.npy new file mode 100644 index 0000000..e5c427e --- /dev/null +++ b/margin_logs/step_0000217.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c05bc8439931b4fd7d8c9cff6ebdfed40fed64014d8588257a1d2c3f74791cf +size 384 diff --git a/margin_logs/step_0000218.npy b/margin_logs/step_0000218.npy new file mode 100644 index 0000000..d8068b1 --- /dev/null +++ b/margin_logs/step_0000218.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a66f34066be92608eb02e71dd0a08e718aa32bf82d939e1433b0b9bce4b61cd +size 384 diff --git a/margin_logs/step_0000219.npy b/margin_logs/step_0000219.npy new file mode 100644 index 0000000..86767fc --- /dev/null +++ b/margin_logs/step_0000219.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa412a5e9d13a276ccdc76c54525d94dc0b685d340c86adb4ce0e9608f9af699 +size 384 diff --git a/margin_logs/step_0000220.npy b/margin_logs/step_0000220.npy new file mode 100644 index 0000000..c9f303b --- /dev/null +++ b/margin_logs/step_0000220.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bdbd755cda32cfa5b41f0f4aa955ffde0ddc62f8934ed760f47210305c46799 +size 384 diff --git a/margin_logs/step_0000221.npy b/margin_logs/step_0000221.npy new file mode 100644 index 0000000..bf8a18e --- /dev/null +++ b/margin_logs/step_0000221.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32923ea35c0efc567c582b334a36263519e419ba37df86d54bee4d4e1a3fa0d5 +size 384 diff --git a/margin_logs/step_0000222.npy b/margin_logs/step_0000222.npy new file mode 100644 index 0000000..b988ebb --- /dev/null +++ b/margin_logs/step_0000222.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:196f6176e8b76ed5b2e6cb8eeb4e879002484d9e7a8189a2c7f2b9b274536c49 +size 384 diff --git a/margin_logs/step_0000223.npy b/margin_logs/step_0000223.npy new file mode 100644 index 0000000..a8cb752 --- /dev/null +++ b/margin_logs/step_0000223.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03d2960da28c0f7abbd8e155585582f1c45cad4771e86f90f7bc10f9103a7443 +size 384 diff --git a/margin_logs/step_0000224.npy b/margin_logs/step_0000224.npy new file mode 100644 index 0000000..86d2577 --- /dev/null +++ b/margin_logs/step_0000224.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115c8456e98d4b2df561126ff196925d101463b5452ac392591605c8c8622a6d +size 384 diff --git a/margin_logs/step_0000225.npy b/margin_logs/step_0000225.npy new file mode 100644 index 0000000..5ead0af --- /dev/null +++ b/margin_logs/step_0000225.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:797e72ebc898bfec2ac09e3cdcae54dbe52fde0e8900d0fd8ee6dafd77dfe28c +size 384 diff --git a/margin_logs/step_0000226.npy b/margin_logs/step_0000226.npy new file mode 100644 index 0000000..f034154 --- /dev/null +++ b/margin_logs/step_0000226.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aaeaa19068e587e78b059ff773216ed80feb34fffcf5486f3f4ab1a11ec7b78 +size 384 diff --git a/margin_logs/step_0000227.npy b/margin_logs/step_0000227.npy new file mode 100644 index 0000000..a63c87f --- /dev/null +++ b/margin_logs/step_0000227.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df36d82d9f11b904d1ed869698bdaf183f6012089967a6dc67f7ab0d597f1647 +size 384 diff --git a/margin_logs/step_0000228.npy b/margin_logs/step_0000228.npy new file mode 100644 index 0000000..8a89dc7 --- /dev/null +++ b/margin_logs/step_0000228.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfae7d8970d960a3184d511fc4241667a25ff8b36f861d43ae8dfe9ef71acc18 +size 384 diff --git a/margin_logs/step_0000229.npy b/margin_logs/step_0000229.npy new file mode 100644 index 0000000..e061099 --- /dev/null +++ b/margin_logs/step_0000229.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a1c2c5dcde33ad2fee144c072c0ddd3833a0033fbf278cb72a667443d64b010 +size 384 diff --git a/margin_logs/step_0000230.npy b/margin_logs/step_0000230.npy new file mode 100644 index 0000000..56121e6 --- /dev/null +++ b/margin_logs/step_0000230.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:371224f8241dbfa2b02b42cd4689958c72d69d527c37e344111cbb97646e2070 +size 384 diff --git a/margin_logs/step_0000231.npy b/margin_logs/step_0000231.npy new file mode 100644 index 0000000..f9e3fea --- /dev/null +++ b/margin_logs/step_0000231.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d79d8f57f5cbe80e21b24abfe2756a636282ccafe45f681417ab0ca9a8a89a29 +size 384 diff --git a/margin_logs/step_0000232.npy b/margin_logs/step_0000232.npy new file mode 100644 index 0000000..d71f5f6 --- /dev/null +++ b/margin_logs/step_0000232.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbf5cd966b46ef5a0d54816851fbe91a8353deb1253c99b908a699696e8af243 +size 384 diff --git a/margin_logs/step_0000233.npy b/margin_logs/step_0000233.npy new file mode 100644 index 0000000..94fd2bb --- /dev/null +++ b/margin_logs/step_0000233.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f02323c12e94bfe81ac8f128951a9451098f8edcb5ace91b9f2e67b2ecc3057b +size 384 diff --git a/margin_logs/step_0000234.npy b/margin_logs/step_0000234.npy new file mode 100644 index 0000000..26a98ca --- /dev/null +++ b/margin_logs/step_0000234.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ef20c7e4075f3646d9d35d6a501016f59c25d72c861afa106c0267574d44754 +size 384 diff --git a/margin_logs/step_0000235.npy b/margin_logs/step_0000235.npy new file mode 100644 index 0000000..d43fe8c --- /dev/null +++ b/margin_logs/step_0000235.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c9415ebd43e65820dabf4f75fb05915f0d1d176a29b26bb6a2ceb16236cbada +size 384 diff --git a/margin_logs/step_0000236.npy b/margin_logs/step_0000236.npy new file mode 100644 index 0000000..dc88ea6 --- /dev/null +++ b/margin_logs/step_0000236.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22a50af29649e54c1130121dc3e5cd52b31f4b2d00640252e76454122802914c +size 384 diff --git a/margin_logs/step_0000237.npy b/margin_logs/step_0000237.npy new file mode 100644 index 0000000..c82417a --- /dev/null +++ b/margin_logs/step_0000237.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08c73b148286d4714b68390bbc499e5b2862da30d12acfe5ea9598f8164ee325 +size 384 diff --git a/margin_logs/step_0000238.npy b/margin_logs/step_0000238.npy new file mode 100644 index 0000000..19bf104 --- /dev/null +++ b/margin_logs/step_0000238.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdce4ed1c4cbcc41f18262b0e8ccbcbf788183f20ab9202d77aa91ab64c37a50 +size 384 diff --git a/margin_logs/step_0000239.npy b/margin_logs/step_0000239.npy new file mode 100644 index 0000000..210cb50 --- /dev/null +++ b/margin_logs/step_0000239.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ac33808e4f2ccae36864d2993e074648eb953746929e4950d214b5f26bc7512 +size 384 diff --git a/margin_logs/step_0000240.npy b/margin_logs/step_0000240.npy new file mode 100644 index 0000000..2a64f40 --- /dev/null +++ b/margin_logs/step_0000240.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf79d004e689510ff4f9f0704d1145f40e627fcff77eead855a083db5427a106 +size 384 diff --git a/margin_logs/step_0000241.npy b/margin_logs/step_0000241.npy new file mode 100644 index 0000000..59e9949 --- /dev/null +++ b/margin_logs/step_0000241.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010b279cb5e3b55884ad1045ef095795cb2606a176773384a9d6811d0a0ee5dc +size 384 diff --git a/margin_logs/step_0000242.npy b/margin_logs/step_0000242.npy new file mode 100644 index 0000000..c0b5359 --- /dev/null +++ b/margin_logs/step_0000242.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9de7690d28df96432e83175be3775fee45ff424f24ed2ccfacf80e727a455093 +size 384 diff --git a/margin_logs/step_0000243.npy b/margin_logs/step_0000243.npy new file mode 100644 index 0000000..2829e30 --- /dev/null +++ b/margin_logs/step_0000243.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42d213ffd5d89bcaa64270136c738401bb54254afee90a9a01b078e8fec7e084 +size 384 diff --git a/margin_logs/step_0000244.npy b/margin_logs/step_0000244.npy new file mode 100644 index 0000000..bfca305 --- /dev/null +++ b/margin_logs/step_0000244.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14cfb88a19436ea1c5f954201c6b8a784cdc5dfaee32689c3219894962b99b49 +size 384 diff --git a/margin_logs/step_0000245.npy b/margin_logs/step_0000245.npy new file mode 100644 index 0000000..c2215da --- /dev/null +++ b/margin_logs/step_0000245.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e7ad157172d7076d7edfd1c4bfefd206878ecd0fa540f965f9e0713f29f0964 +size 384 diff --git a/margin_logs/step_0000246.npy b/margin_logs/step_0000246.npy new file mode 100644 index 0000000..f077fea --- /dev/null +++ b/margin_logs/step_0000246.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dfdaf16875caf2aedcc4147a4378b7acdfd16a6e53d6e25afec1f312105a979 +size 384 diff --git a/margin_logs/step_0000247.npy b/margin_logs/step_0000247.npy new file mode 100644 index 0000000..acfb675 --- /dev/null +++ b/margin_logs/step_0000247.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202462bbec8749609b0b846eb7a6f63540afa316a631e33ba22cec1d6a1f3a5a +size 384 diff --git a/margin_logs/step_0000248.npy b/margin_logs/step_0000248.npy new file mode 100644 index 0000000..144e31a --- /dev/null +++ b/margin_logs/step_0000248.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47d73ad73805c21b02ba618368e86867457a0cb453f7a52ba34fd55e14bae244 +size 384 diff --git a/margin_logs/step_0000249.npy b/margin_logs/step_0000249.npy new file mode 100644 index 0000000..9303a0a --- /dev/null +++ b/margin_logs/step_0000249.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e546b07083aded02c63e0fac1a6d438ba603fb9be18af957d1d53519ca768f98 +size 384 diff --git a/margin_logs/step_0000250.npy b/margin_logs/step_0000250.npy new file mode 100644 index 0000000..a8bb590 --- /dev/null +++ b/margin_logs/step_0000250.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc7cda534738445b57e484dc057a1d9c8536f19dea6486c9da93ff8a48db97f +size 384 diff --git a/margin_logs/step_0000251.npy b/margin_logs/step_0000251.npy new file mode 100644 index 0000000..7a3c297 --- /dev/null +++ b/margin_logs/step_0000251.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24d19bece32ea41c461f4ab52330fb123d5f59acd8108eac14fc8555a3a95d47 +size 384 diff --git a/margin_logs/step_0000252.npy b/margin_logs/step_0000252.npy new file mode 100644 index 0000000..abba09c --- /dev/null +++ b/margin_logs/step_0000252.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45cd2467a938e86c48df0716b0a3d0218916b7aaed1597b314f4803ade43b828 +size 384 diff --git a/margin_logs/step_0000253.npy b/margin_logs/step_0000253.npy new file mode 100644 index 0000000..20d7eba --- /dev/null +++ b/margin_logs/step_0000253.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b921b9ebf86e3fdd0fb2aa9e76ba5184e15bfcbd05ab996de8fa6d071788103b +size 384 diff --git a/margin_logs/step_0000254.npy b/margin_logs/step_0000254.npy new file mode 100644 index 0000000..71bf05f --- /dev/null +++ b/margin_logs/step_0000254.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:085508740bb030d402d46e2393e34422b2c6503a8a8817b5e18845e2a98b60b2 +size 384 diff --git a/margin_logs/step_0000255.npy b/margin_logs/step_0000255.npy new file mode 100644 index 0000000..649495b --- /dev/null +++ b/margin_logs/step_0000255.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a69825b45b5083345c76c2d1468ecd512906f7a13f776ffb4bacfbcb8456037f +size 384 diff --git a/margin_logs/step_0000256.npy b/margin_logs/step_0000256.npy new file mode 100644 index 0000000..606a5d8 --- /dev/null +++ b/margin_logs/step_0000256.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:881fd73253950f0c474d922c16c7d95f91ff6e783671e4aeedc864d5603e4a0f +size 384 diff --git a/margin_logs/step_0000257.npy b/margin_logs/step_0000257.npy new file mode 100644 index 0000000..6ae6974 --- /dev/null +++ b/margin_logs/step_0000257.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66801166bdd5ddce3d97aec90bf73438e492c989a1bf8cbb64fe35dd0d970af8 +size 384 diff --git a/margin_logs/step_0000258.npy b/margin_logs/step_0000258.npy new file mode 100644 index 0000000..a450e79 --- /dev/null +++ b/margin_logs/step_0000258.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f0c0311c1c5d0d426286288734840cadfb225fdd687751fafb5df866b4db62d +size 384 diff --git a/margin_logs/step_0000259.npy b/margin_logs/step_0000259.npy new file mode 100644 index 0000000..d0db477 --- /dev/null +++ b/margin_logs/step_0000259.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9963ab1ff6b962e9f4fdf10de879550dd5544a513c88c771cf91ebe7461c35f +size 384 diff --git a/margin_logs/step_0000260.npy b/margin_logs/step_0000260.npy new file mode 100644 index 0000000..2537363 --- /dev/null +++ b/margin_logs/step_0000260.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc11728d94fc315496965e8cd62bb9b77b65487dd0ef8b6e4cccbc31b4f1b7b1 +size 384 diff --git a/margin_logs/step_0000261.npy b/margin_logs/step_0000261.npy new file mode 100644 index 0000000..c5829e8 --- /dev/null +++ b/margin_logs/step_0000261.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e95046993463c298c1b6dffc204bfa66d585adb5971e92abd860dd277be8c626 +size 384 diff --git a/margin_logs/step_0000262.npy b/margin_logs/step_0000262.npy new file mode 100644 index 0000000..ca1fcd2 --- /dev/null +++ b/margin_logs/step_0000262.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a56cb340c96ce7167c77f01807f25f907df4eb42a4b90404d2af6cb0c6be4815 +size 384 diff --git a/margin_logs/step_0000263.npy b/margin_logs/step_0000263.npy new file mode 100644 index 0000000..411c9e4 --- /dev/null +++ b/margin_logs/step_0000263.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff9300b42ac068add8aac9565cd701c2540d206a6a3223614edc85b446167e84 +size 384 diff --git a/margin_logs/step_0000264.npy b/margin_logs/step_0000264.npy new file mode 100644 index 0000000..141973a --- /dev/null +++ b/margin_logs/step_0000264.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27ee778cc30de872eb11353b9ff7b8c582715edc88db30598042c70ac4252a84 +size 384 diff --git a/margin_logs/step_0000265.npy b/margin_logs/step_0000265.npy new file mode 100644 index 0000000..77625fb --- /dev/null +++ b/margin_logs/step_0000265.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f6f1016b2c162236ba15a46be576684b162035c658e16c1b754b72766a0a511 +size 384 diff --git a/margin_logs/step_0000266.npy b/margin_logs/step_0000266.npy new file mode 100644 index 0000000..58f1274 --- /dev/null +++ b/margin_logs/step_0000266.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:002fbd440c7c8598b8a22157ea20b35317f8e6b34437dcb9e374a49b690cf515 +size 384 diff --git a/margin_logs/step_0000267.npy b/margin_logs/step_0000267.npy new file mode 100644 index 0000000..4941d7e --- /dev/null +++ b/margin_logs/step_0000267.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6804ba9d2c0e9020ca6beff1242b2855b62f772ed80df013f5e584d3f58fceb +size 384 diff --git a/margin_logs/step_0000268.npy b/margin_logs/step_0000268.npy new file mode 100644 index 0000000..4b0cd2f --- /dev/null +++ b/margin_logs/step_0000268.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fb6118b722dc7e6eee004b567e83022109dd1973b8c5cb97e340565f3b77b39 +size 384 diff --git a/margin_logs/step_0000269.npy b/margin_logs/step_0000269.npy new file mode 100644 index 0000000..d39ea3b --- /dev/null +++ b/margin_logs/step_0000269.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:510e786239eda150c83b358cb468f556beddbe58de27e02efac98b8df930a438 +size 384 diff --git a/margin_logs/step_0000270.npy b/margin_logs/step_0000270.npy new file mode 100644 index 0000000..777f8da --- /dev/null +++ b/margin_logs/step_0000270.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ddb72ceca1f0ec27b87f4bdd22dc24a529a003fe37cd60ec78907f13ef12794 +size 384 diff --git a/margin_logs/step_0000271.npy b/margin_logs/step_0000271.npy new file mode 100644 index 0000000..a05ecd0 --- /dev/null +++ b/margin_logs/step_0000271.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57c75f0144305eb781571a6d02c3cda6dbb844c064090a92d7639bc4bb1c1319 +size 384 diff --git a/margin_logs/step_0000272.npy b/margin_logs/step_0000272.npy new file mode 100644 index 0000000..3dd4255 --- /dev/null +++ b/margin_logs/step_0000272.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f14c098cefcfe73e7db324c49007d47744eb08801a2146307635205a1a7f710c +size 384 diff --git a/margin_logs/step_0000273.npy b/margin_logs/step_0000273.npy new file mode 100644 index 0000000..270ffc1 --- /dev/null +++ b/margin_logs/step_0000273.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23001d8b17246d7bdc85d524161f8b412cb27005ba248441027dbfa05b85f144 +size 384 diff --git a/margin_logs/step_0000274.npy b/margin_logs/step_0000274.npy new file mode 100644 index 0000000..49bbc24 --- /dev/null +++ b/margin_logs/step_0000274.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a8470ba6b2aabc086c61e1fea48bd68327790a1005801010adf96167fa661fa +size 384 diff --git a/margin_logs/step_0000275.npy b/margin_logs/step_0000275.npy new file mode 100644 index 0000000..c48ad1a --- /dev/null +++ b/margin_logs/step_0000275.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b11e1758c6dbf226913a9db4fb875fe526b8d6ca5698694e13f0a14b1dbb426 +size 384 diff --git a/margin_logs/step_0000276.npy b/margin_logs/step_0000276.npy new file mode 100644 index 0000000..74d1d7a --- /dev/null +++ b/margin_logs/step_0000276.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e51a27b84e9a3bea9d6d0d786a1039070c8cf515c8dfb27222c2c685bc266d9 +size 384 diff --git a/margin_logs/step_0000277.npy b/margin_logs/step_0000277.npy new file mode 100644 index 0000000..ec11f54 --- /dev/null +++ b/margin_logs/step_0000277.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f656de2c6a1238a2cea7837a95916876c9d54397890d12606b1c4dae80778b4c +size 384 diff --git a/margin_logs/step_0000278.npy b/margin_logs/step_0000278.npy new file mode 100644 index 0000000..6c6fb63 --- /dev/null +++ b/margin_logs/step_0000278.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b07a05a005bc9e06444e09229f47cf816b65f4a9337ec4137c779cb35bc62862 +size 384 diff --git a/margin_logs/step_0000279.npy b/margin_logs/step_0000279.npy new file mode 100644 index 0000000..85b1bb5 --- /dev/null +++ b/margin_logs/step_0000279.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d64879e5f48165165d5864dbd020005a40957bb82bb203bf005861216c64cc8b +size 384 diff --git a/margin_logs/step_0000280.npy b/margin_logs/step_0000280.npy new file mode 100644 index 0000000..cd0cacd --- /dev/null +++ b/margin_logs/step_0000280.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e8d1af61279a938b7bfbc20cf32b7ac790efb49da7ae26d6502159da8b7a41 +size 384 diff --git a/margin_logs/step_0000281.npy b/margin_logs/step_0000281.npy new file mode 100644 index 0000000..b076331 --- /dev/null +++ b/margin_logs/step_0000281.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c92df8bd3c355d38dc4b09e6f014a75e1adccd186001bb8bde24416897ed8636 +size 384 diff --git a/margin_logs/step_0000282.npy b/margin_logs/step_0000282.npy new file mode 100644 index 0000000..909f135 --- /dev/null +++ b/margin_logs/step_0000282.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:532223cc111c41952b2441156e24498db88ccf2ba47494671c644f3d9292f56f +size 384 diff --git a/margin_logs/step_0000283.npy b/margin_logs/step_0000283.npy new file mode 100644 index 0000000..3933cf9 --- /dev/null +++ b/margin_logs/step_0000283.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5850c6ed7be595f2a00587d128f05d5dd8526f914619697b307ac7cf81dd0436 +size 384 diff --git a/margin_logs/step_0000284.npy b/margin_logs/step_0000284.npy new file mode 100644 index 0000000..d887720 --- /dev/null +++ b/margin_logs/step_0000284.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c0d1eb6ae17af486e0c0c58ab794b059e44c10ff9e46431843e2c1786eea8bb +size 384 diff --git a/margin_logs/step_0000285.npy b/margin_logs/step_0000285.npy new file mode 100644 index 0000000..5f89a9d --- /dev/null +++ b/margin_logs/step_0000285.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce4ba3fee79ff1e3aab3f6f5bc57c6b536b9dcf7a2f4614f243cd393d74267b1 +size 384 diff --git a/margin_logs/step_0000286.npy b/margin_logs/step_0000286.npy new file mode 100644 index 0000000..3deeba1 --- /dev/null +++ b/margin_logs/step_0000286.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02691e683d9f0a1ed7d765e76ba3f513edab035a1a35786f9c82e48a4db184f7 +size 384 diff --git a/margin_logs/step_0000287.npy b/margin_logs/step_0000287.npy new file mode 100644 index 0000000..5340e3b --- /dev/null +++ b/margin_logs/step_0000287.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3af9534ac2c9194a6ef00995ec413279ce3c72f9103fb13252532fd98b5344e6 +size 384 diff --git a/margin_logs/step_0000288.npy b/margin_logs/step_0000288.npy new file mode 100644 index 0000000..9a8409a --- /dev/null +++ b/margin_logs/step_0000288.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35dd89049f472f37fb1ad8c61b8f276dafb80700eaaacd230157dc09e99df9a0 +size 384 diff --git a/margin_logs/step_0000289.npy b/margin_logs/step_0000289.npy new file mode 100644 index 0000000..f05d5f3 --- /dev/null +++ b/margin_logs/step_0000289.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2988e0b08685774f894c7cdc6b7b2af74c9b14a883b90e49a090f77f93a03a48 +size 384 diff --git a/margin_logs/step_0000290.npy b/margin_logs/step_0000290.npy new file mode 100644 index 0000000..e521c72 --- /dev/null +++ b/margin_logs/step_0000290.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7fbf9438fbc35f26537bea8b758217a2b03faa506a0b68d70eade439e07b72b +size 384 diff --git a/margin_logs/step_0000291.npy b/margin_logs/step_0000291.npy new file mode 100644 index 0000000..9871424 --- /dev/null +++ b/margin_logs/step_0000291.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09928f7d3ca242a3c8360aa9414e560dc646a449093ce33e5835815719967b57 +size 384 diff --git a/margin_logs/step_0000292.npy b/margin_logs/step_0000292.npy new file mode 100644 index 0000000..ab8fecf --- /dev/null +++ b/margin_logs/step_0000292.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f51bed592e329f40167fe0df39b5d89b28db909a8bfaa44d0565736eebcf290 +size 384 diff --git a/margin_logs/step_0000293.npy b/margin_logs/step_0000293.npy new file mode 100644 index 0000000..0ed78c7 --- /dev/null +++ b/margin_logs/step_0000293.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d249af06da52c20660ae5d9d00d5c05cd4c9165e8200819052c3cdfa408e839 +size 384 diff --git a/margin_logs/step_0000294.npy b/margin_logs/step_0000294.npy new file mode 100644 index 0000000..f250c29 --- /dev/null +++ b/margin_logs/step_0000294.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7044abbf54f450fa66f3a1aa424aa8adb0c254bbfe962332b3f47b6455a1f026 +size 384 diff --git a/margin_logs/step_0000295.npy b/margin_logs/step_0000295.npy new file mode 100644 index 0000000..0e7f4c7 --- /dev/null +++ b/margin_logs/step_0000295.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b6d27908ce1955cbc5bf07fee3f5579cb927ceb7c1ea8428cf2e01444fde0e +size 384 diff --git a/margin_logs/step_0000296.npy b/margin_logs/step_0000296.npy new file mode 100644 index 0000000..6e4151e --- /dev/null +++ b/margin_logs/step_0000296.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c99ea2eb48e8a951cde0123fed3340cb82ce608da5500dc32327387f0093939 +size 384 diff --git a/margin_logs/step_0000297.npy b/margin_logs/step_0000297.npy new file mode 100644 index 0000000..c74be83 --- /dev/null +++ b/margin_logs/step_0000297.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2e47d3886f3fb5e6fff6f1b578ed0fb4fb7c4718ef4d42675ad7eb101009b1a +size 384 diff --git a/margin_logs/step_0000298.npy b/margin_logs/step_0000298.npy new file mode 100644 index 0000000..d2d94a0 --- /dev/null +++ b/margin_logs/step_0000298.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee2f76af12d35b8486c22880dd87f1c1b64b93bf8de865ac3b2b430dec68e4e +size 384 diff --git a/margin_logs/step_0000299.npy b/margin_logs/step_0000299.npy new file mode 100644 index 0000000..9d314ff --- /dev/null +++ b/margin_logs/step_0000299.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5e1d9c80841dda955aa6c24c845237c7832ebb275790c853236ada7bd86b524 +size 384 diff --git a/margin_logs/step_0000300.npy b/margin_logs/step_0000300.npy new file mode 100644 index 0000000..5c4c6ed --- /dev/null +++ b/margin_logs/step_0000300.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98b97641b086835a2f4d32e72d1611143d7a7a3aea0a01b75e6960a1ac3e9f5e +size 384 diff --git a/margin_logs/step_0000301.npy b/margin_logs/step_0000301.npy new file mode 100644 index 0000000..af3552c --- /dev/null +++ b/margin_logs/step_0000301.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3400406c6a72bf5bf98c38b0e9ba96a951ea8b91c5fb1a55436f0738f4f319f7 +size 384 diff --git a/margin_logs/step_0000302.npy b/margin_logs/step_0000302.npy new file mode 100644 index 0000000..7c6e006 --- /dev/null +++ b/margin_logs/step_0000302.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2ba8f126fca8e89ab12e88fd344cc3c41efeee8b23f9ec07d33e4347fbb2b1f +size 384 diff --git a/margin_logs/step_0000303.npy b/margin_logs/step_0000303.npy new file mode 100644 index 0000000..723dbe8 --- /dev/null +++ b/margin_logs/step_0000303.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52a4339556d69f1ab328612eac1619deb06b37a6dda2667bdef3b8351213e9a7 +size 384 diff --git a/margin_logs/step_0000304.npy b/margin_logs/step_0000304.npy new file mode 100644 index 0000000..07980d8 --- /dev/null +++ b/margin_logs/step_0000304.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c76f120d4b3900c83966d9981307addb6d52507103b6284f39a013fea35b63ed +size 384 diff --git a/margin_logs/step_0000305.npy b/margin_logs/step_0000305.npy new file mode 100644 index 0000000..ba984cd --- /dev/null +++ b/margin_logs/step_0000305.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d827459ff6bfb11bb680d951f7c9fd5c4112f1c50db52e38998af2b3fbafc1bf +size 384 diff --git a/margin_logs/step_0000306.npy b/margin_logs/step_0000306.npy new file mode 100644 index 0000000..1130399 --- /dev/null +++ b/margin_logs/step_0000306.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:461d1ed50663ff988e6a8cf1bb80dc8b45e4a655c7247e76d386804f3247e3c3 +size 384 diff --git a/margin_logs/step_0000307.npy b/margin_logs/step_0000307.npy new file mode 100644 index 0000000..1efd810 --- /dev/null +++ b/margin_logs/step_0000307.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29114d1f785b76fa6fb0e9d3fe43af52b36b03644ff387a961b6c920616a3abb +size 384 diff --git a/margin_logs/step_0000308.npy b/margin_logs/step_0000308.npy new file mode 100644 index 0000000..239a285 --- /dev/null +++ b/margin_logs/step_0000308.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a18bdd90c6cd5eac555d93a3e364a36b8e71b14c7deef4fb13c77c7b71c6aee6 +size 384 diff --git a/margin_logs/step_0000309.npy b/margin_logs/step_0000309.npy new file mode 100644 index 0000000..810c538 --- /dev/null +++ b/margin_logs/step_0000309.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed5a597d1e28c0aa439e142903f482823d271b41ba193eb84b09246e0e26772 +size 384 diff --git a/margin_logs/step_0000310.npy b/margin_logs/step_0000310.npy new file mode 100644 index 0000000..eab8897 --- /dev/null +++ b/margin_logs/step_0000310.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c5b05ea9824d2d6d171d6ce07171eeb09b396dfa5bfbcd27bb744dd5c622fb3 +size 384 diff --git a/margin_logs/step_0000311.npy b/margin_logs/step_0000311.npy new file mode 100644 index 0000000..12d344a --- /dev/null +++ b/margin_logs/step_0000311.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e0ad813672ea2da8e26fac935eb4553135fa6dbd9931cd8941d70b62b7269fb +size 384 diff --git a/margin_logs/step_0000312.npy b/margin_logs/step_0000312.npy new file mode 100644 index 0000000..61eea6b --- /dev/null +++ b/margin_logs/step_0000312.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:074e40b4ea33af6b9d02c4a6e3c356f8171d5f9a3e994be0a91c96e1ccbd4c9c +size 384 diff --git a/margin_logs/step_0000313.npy b/margin_logs/step_0000313.npy new file mode 100644 index 0000000..a717561 --- /dev/null +++ b/margin_logs/step_0000313.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ae701a6e08eb3d80d856f530096fc35745fc08ead18722e272e31015e9f3a7c +size 384 diff --git a/margin_logs/step_0000314.npy b/margin_logs/step_0000314.npy new file mode 100644 index 0000000..14b84f1 --- /dev/null +++ b/margin_logs/step_0000314.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20202e6bfa9ea524c8ce95a045b04ccc7c3cc185241e4542b729e4411f5f527c +size 384 diff --git a/margin_logs/step_0000315.npy b/margin_logs/step_0000315.npy new file mode 100644 index 0000000..89335dd --- /dev/null +++ b/margin_logs/step_0000315.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59ccb1971b945671bffd0712302b2923a88f3e702b4b27c8a402a76667d25860 +size 384 diff --git a/margin_logs/step_0000316.npy b/margin_logs/step_0000316.npy new file mode 100644 index 0000000..5fff014 --- /dev/null +++ b/margin_logs/step_0000316.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cc336c1366efbc37f17fe737443ae5e907d33b5546667b221c9c59f076146c7 +size 384 diff --git a/margin_logs/step_0000317.npy b/margin_logs/step_0000317.npy new file mode 100644 index 0000000..68852d6 --- /dev/null +++ b/margin_logs/step_0000317.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:242e7da782ca1bb5d557d2642a6c241f8698c2c35bbc2b47f3a802c6bd172125 +size 384 diff --git a/margin_logs/step_0000318.npy b/margin_logs/step_0000318.npy new file mode 100644 index 0000000..833ab76 --- /dev/null +++ b/margin_logs/step_0000318.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a4bd769a2c1c2282fbd0e0e1bf98fefce1667c0452166ee27e74727e3f2a287 +size 384 diff --git a/margin_logs/step_0000319.npy b/margin_logs/step_0000319.npy new file mode 100644 index 0000000..e0e4e61 --- /dev/null +++ b/margin_logs/step_0000319.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd8726b6c8457ff08b34a3427733184bd8644e068eb98d9aaa6d7e263d832789 +size 384 diff --git a/margin_logs/step_0000320.npy b/margin_logs/step_0000320.npy new file mode 100644 index 0000000..bb9591d --- /dev/null +++ b/margin_logs/step_0000320.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f6feedd86dd2fd3ede809c16a07620a188cf2b3feede1e010607f921bc890c0 +size 384 diff --git a/margin_logs/step_0000321.npy b/margin_logs/step_0000321.npy new file mode 100644 index 0000000..af42158 --- /dev/null +++ b/margin_logs/step_0000321.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b825d6369b6e787d17a95f6cbbc88832cd27f54fa37dc742c926c067d6afdcf2 +size 384 diff --git a/margin_logs/step_0000322.npy b/margin_logs/step_0000322.npy new file mode 100644 index 0000000..b3eb538 --- /dev/null +++ b/margin_logs/step_0000322.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ddabde4e8661ab282d61fe4129e4e10c6a7b5398e551eb55aa487b33b62c739 +size 384 diff --git a/margin_logs/step_0000323.npy b/margin_logs/step_0000323.npy new file mode 100644 index 0000000..b1d8538 --- /dev/null +++ b/margin_logs/step_0000323.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:506504484269641a311f63c9e7b80372b46de77cc8a8aeb1dff96d12a10bd9a2 +size 384 diff --git a/margin_logs/step_0000324.npy b/margin_logs/step_0000324.npy new file mode 100644 index 0000000..6018198 --- /dev/null +++ b/margin_logs/step_0000324.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd0e98c13ed4b19dfe209b1536a0dbe8267fa1237c150706ef2777ca4dee5617 +size 384 diff --git a/margin_logs/step_0000325.npy b/margin_logs/step_0000325.npy new file mode 100644 index 0000000..12ae135 --- /dev/null +++ b/margin_logs/step_0000325.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aac6847a6639f92a1ec48ded69fee035d3f9f5ae81cbccdcca02ddb75113c936 +size 384 diff --git a/margin_logs/step_0000326.npy b/margin_logs/step_0000326.npy new file mode 100644 index 0000000..35a8d41 --- /dev/null +++ b/margin_logs/step_0000326.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:398c30c02478c89ebc7e39e1d4bab5a12735578d87dcc93f8e089e4333c489cd +size 384 diff --git a/margin_logs/step_0000327.npy b/margin_logs/step_0000327.npy new file mode 100644 index 0000000..b6f28e0 --- /dev/null +++ b/margin_logs/step_0000327.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01a1d280ead883e8715150d4a0aa2ddbe5ce47b1aa1797efbbbd81e6c2fba374 +size 384 diff --git a/margin_logs/step_0000328.npy b/margin_logs/step_0000328.npy new file mode 100644 index 0000000..8cafe29 --- /dev/null +++ b/margin_logs/step_0000328.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc23f4b4e81e90d98b0981f3ded2fa334180e186e730763f5196cf71ffaaf12e +size 384 diff --git a/margin_logs/step_0000329.npy b/margin_logs/step_0000329.npy new file mode 100644 index 0000000..cc0ba44 --- /dev/null +++ b/margin_logs/step_0000329.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c71cb4cf01c43dae7ae2249b88ff5d51e889b473c3205f29771b4c34400a4e1 +size 384 diff --git a/margin_logs/step_0000330.npy b/margin_logs/step_0000330.npy new file mode 100644 index 0000000..3bac67d --- /dev/null +++ b/margin_logs/step_0000330.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957db4b7bba6013faf732e4f4c37e69a8b079ea557d26b8036d05dcf1ce24c29 +size 384 diff --git a/margin_logs/step_0000331.npy b/margin_logs/step_0000331.npy new file mode 100644 index 0000000..9f4f4cd --- /dev/null +++ b/margin_logs/step_0000331.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5354bd0a15879824cf2eb110ac5be47a528142f9c262261c7df4277f5e62f84a +size 384 diff --git a/margin_logs/step_0000332.npy b/margin_logs/step_0000332.npy new file mode 100644 index 0000000..ebe4c03 --- /dev/null +++ b/margin_logs/step_0000332.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b9f37d734a19f6a5440a5c460cb39da678ba9933b40d689db2b495f7f07b124 +size 384 diff --git a/margin_logs/step_0000333.npy b/margin_logs/step_0000333.npy new file mode 100644 index 0000000..a3d53e2 --- /dev/null +++ b/margin_logs/step_0000333.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f4b55a43f7e08e5428321bb97e41fbb4237ccb7896ef3ba2bca8a7c33325a11 +size 384 diff --git a/margin_logs/step_0000334.npy b/margin_logs/step_0000334.npy new file mode 100644 index 0000000..aa4da55 --- /dev/null +++ b/margin_logs/step_0000334.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c70d638a4efc2fcd5481f46ca62e5e2accc1f9a18ee8d4a7c92b4b94cfc95043 +size 384 diff --git a/margin_logs/step_0000335.npy b/margin_logs/step_0000335.npy new file mode 100644 index 0000000..4874866 --- /dev/null +++ b/margin_logs/step_0000335.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c257878d866a5640abfa9d4aed7e3c7739a23ff4422ef3b998cf597779438ed +size 384 diff --git a/margin_logs/step_0000336.npy b/margin_logs/step_0000336.npy new file mode 100644 index 0000000..27acd2a --- /dev/null +++ b/margin_logs/step_0000336.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19e0ede1647c8a79cda1ab2e8ea34f8061118f06d0140ff9c16d9825e71cc064 +size 384 diff --git a/margin_logs/step_0000337.npy b/margin_logs/step_0000337.npy new file mode 100644 index 0000000..31a5d09 --- /dev/null +++ b/margin_logs/step_0000337.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ee536762a203e0fe27a33f0ea63f0655389294294bb978134d29f8e82b8228b +size 384 diff --git a/margin_logs/step_0000338.npy b/margin_logs/step_0000338.npy new file mode 100644 index 0000000..d7d6270 --- /dev/null +++ b/margin_logs/step_0000338.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:642525dfc831e34b349abb901c05d368786a8a79c3c6267e26242810d679d7be +size 384 diff --git a/margin_logs/step_0000339.npy b/margin_logs/step_0000339.npy new file mode 100644 index 0000000..ebcb1d0 --- /dev/null +++ b/margin_logs/step_0000339.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72ae5a5adfc08799abe0c3b58aa510ad25ef6164918a5ad0869ee29045a50ede +size 384 diff --git a/margin_logs/step_0000340.npy b/margin_logs/step_0000340.npy new file mode 100644 index 0000000..e6aca65 --- /dev/null +++ b/margin_logs/step_0000340.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48b7447164a97ffa02bce0f50dc2664e380d6dd4a480723a3f682094795df732 +size 384 diff --git a/margin_logs/step_0000341.npy b/margin_logs/step_0000341.npy new file mode 100644 index 0000000..cb70f30 --- /dev/null +++ b/margin_logs/step_0000341.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9780249c88c968f0ccb503f1cd8a08f596af23e6d3ac1583028b0cb946e4abbe +size 384 diff --git a/margin_logs/step_0000342.npy b/margin_logs/step_0000342.npy new file mode 100644 index 0000000..af7e314 --- /dev/null +++ b/margin_logs/step_0000342.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3179b6459704a1e5d9d3271b501b6744060ee04b00e29441d17038f08844505b +size 384 diff --git a/margin_logs/step_0000343.npy b/margin_logs/step_0000343.npy new file mode 100644 index 0000000..3f5483d --- /dev/null +++ b/margin_logs/step_0000343.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a685f43a44c67c7a937f78cc5a21063f2273bbcfacb2b62a0d40cb6ca49b7358 +size 384 diff --git a/margin_logs/step_0000344.npy b/margin_logs/step_0000344.npy new file mode 100644 index 0000000..cdbfbe2 --- /dev/null +++ b/margin_logs/step_0000344.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef89758c376aa8481d5bf5e51872a62265ddb6a3e718d1b2dafdf03cbe8cc01b +size 384 diff --git a/margin_logs/step_0000345.npy b/margin_logs/step_0000345.npy new file mode 100644 index 0000000..f9f92f5 --- /dev/null +++ b/margin_logs/step_0000345.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f9ed22e7ee2fea10f00cc32b8eebe912162fd9cf0a85c65fd3b23897a0b4ec5 +size 384 diff --git a/margin_logs/step_0000346.npy b/margin_logs/step_0000346.npy new file mode 100644 index 0000000..1061f51 --- /dev/null +++ b/margin_logs/step_0000346.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab758e0d5a27c2fc05ac64de0b3ce40a18d1b71d5ba4707888c7602cafad48fe +size 384 diff --git a/margin_logs/step_0000347.npy b/margin_logs/step_0000347.npy new file mode 100644 index 0000000..1cd3f57 --- /dev/null +++ b/margin_logs/step_0000347.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bd7afd29a3d2f9ccfb8622ae02232967942a46f0905d3316fbc34a869c4a5dd +size 384 diff --git a/margin_logs/step_0000348.npy b/margin_logs/step_0000348.npy new file mode 100644 index 0000000..3bca30f --- /dev/null +++ b/margin_logs/step_0000348.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e0e27153e3013cdf53d393f9c5a0e58ce124f99c72fcd9c22d2f4699dd79219 +size 384 diff --git a/margin_logs/step_0000349.npy b/margin_logs/step_0000349.npy new file mode 100644 index 0000000..1e73287 --- /dev/null +++ b/margin_logs/step_0000349.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18a9d6ced9fca61363852d62781f1e9ba86db31a1597ea1b492c764a343fce51 +size 384 diff --git a/margin_logs/step_0000350.npy b/margin_logs/step_0000350.npy new file mode 100644 index 0000000..2b62abf --- /dev/null +++ b/margin_logs/step_0000350.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8455d8a9d2362ebf591f63470c4cd073f58a2719371947342ba9c9757aeb92b5 +size 384 diff --git a/margin_logs/step_0000351.npy b/margin_logs/step_0000351.npy new file mode 100644 index 0000000..6a8d0cb --- /dev/null +++ b/margin_logs/step_0000351.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f6a62cdbed0780b410f4efbdd9b24b5274fe17d4027a44dcce95dff4096048b +size 384 diff --git a/margin_logs/step_0000352.npy b/margin_logs/step_0000352.npy new file mode 100644 index 0000000..ad904e3 --- /dev/null +++ b/margin_logs/step_0000352.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7f68d4b7dc283e1668706d34c1f8059b258124f45192a72545d79cdaab8e848 +size 384 diff --git a/margin_logs/step_0000353.npy b/margin_logs/step_0000353.npy new file mode 100644 index 0000000..94b856e --- /dev/null +++ b/margin_logs/step_0000353.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33b80714a9e3194174348dd5ee4ddd2d44c673c947990160f43bfe85d79cf232 +size 384 diff --git a/margin_logs/step_0000354.npy b/margin_logs/step_0000354.npy new file mode 100644 index 0000000..53d1e42 --- /dev/null +++ b/margin_logs/step_0000354.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe4073f4fd30e1dda75d5850ddb5d2734efa852afc574ff3ffe00f35980d5757 +size 384 diff --git a/margin_logs/step_0000355.npy b/margin_logs/step_0000355.npy new file mode 100644 index 0000000..32c51e2 --- /dev/null +++ b/margin_logs/step_0000355.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ce0986731793655564f9d37432e078e0111e13175580b30d3d1581ba8bf20b5 +size 384 diff --git a/margin_logs/step_0000356.npy b/margin_logs/step_0000356.npy new file mode 100644 index 0000000..d02465c --- /dev/null +++ b/margin_logs/step_0000356.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e2644f5318fa610adc6ec6367657b527efffc1dac158ff8f83ca373d6bcee56 +size 384 diff --git a/margin_logs/step_0000357.npy b/margin_logs/step_0000357.npy new file mode 100644 index 0000000..1e3455d --- /dev/null +++ b/margin_logs/step_0000357.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df7e327e7f5bdc1fcd8e55fcb4d13208575b266cb445cd6cc19565c791099fab +size 384 diff --git a/margin_logs/step_0000358.npy b/margin_logs/step_0000358.npy new file mode 100644 index 0000000..a522d0a --- /dev/null +++ b/margin_logs/step_0000358.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3398ef8439359d4aa7f622dc649c87e9ac7df377f12ff7dd5bb797a5b17f6387 +size 384 diff --git a/margin_logs/step_0000359.npy b/margin_logs/step_0000359.npy new file mode 100644 index 0000000..9647a1e --- /dev/null +++ b/margin_logs/step_0000359.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0c6c96fa268a8ad9b2e580896f3dd9356c7eadde550179ee8d040c9748f3fa1 +size 384 diff --git a/margin_logs/step_0000360.npy b/margin_logs/step_0000360.npy new file mode 100644 index 0000000..fe7ddb1 --- /dev/null +++ b/margin_logs/step_0000360.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c65150a841d223ad0084e6cc62c142a50ff0d07641981d1043f2af1a803c7d9 +size 384 diff --git a/margin_logs/step_0000361.npy b/margin_logs/step_0000361.npy new file mode 100644 index 0000000..f75338f --- /dev/null +++ b/margin_logs/step_0000361.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba8235c665a1a747f34c1970f3536ce6f12c4f5b4edb57f4248a6af19477ab4 +size 384 diff --git a/margin_logs/step_0000362.npy b/margin_logs/step_0000362.npy new file mode 100644 index 0000000..6915a76 --- /dev/null +++ b/margin_logs/step_0000362.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfd73086d141039b51e45379452c3329bc431be66aa4606a926593da70df51f9 +size 384 diff --git a/margin_logs/step_0000363.npy b/margin_logs/step_0000363.npy new file mode 100644 index 0000000..ab9e472 --- /dev/null +++ b/margin_logs/step_0000363.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85ba6456b446903900168405c733fa5083518a056ee0cbf245c7e4f7a2281413 +size 384 diff --git a/margin_logs/step_0000364.npy b/margin_logs/step_0000364.npy new file mode 100644 index 0000000..3e81866 --- /dev/null +++ b/margin_logs/step_0000364.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed3ce4b73aa84a7b31fbe8acd03abc7c046abdc7495e16a8c6094cac00734cc +size 384 diff --git a/margin_logs/step_0000365.npy b/margin_logs/step_0000365.npy new file mode 100644 index 0000000..044a79b --- /dev/null +++ b/margin_logs/step_0000365.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83af62fd587c46d8d41b482d3d1991aea6d94109d4fc3a7afd398b2a6dc4467b +size 384 diff --git a/margin_logs/step_0000366.npy b/margin_logs/step_0000366.npy new file mode 100644 index 0000000..004b745 --- /dev/null +++ b/margin_logs/step_0000366.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45105e09c69294243e0c5b15936bb207a5e0afddc6dfc565c5319f2fc1ebb68a +size 384 diff --git a/margin_logs/step_0000367.npy b/margin_logs/step_0000367.npy new file mode 100644 index 0000000..356c662 --- /dev/null +++ b/margin_logs/step_0000367.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4f8ce98bd203b5458f2691dd3c520ff85ca2dec5ae85fd9cf012f50d5bba177 +size 384 diff --git a/margin_logs/step_0000368.npy b/margin_logs/step_0000368.npy new file mode 100644 index 0000000..aebc525 --- /dev/null +++ b/margin_logs/step_0000368.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa432fb02a71ef86b2401f447541c868b1a1b9ed09669bcc2d0aaa1219c82856 +size 384 diff --git a/margin_logs/step_0000369.npy b/margin_logs/step_0000369.npy new file mode 100644 index 0000000..3a96642 --- /dev/null +++ b/margin_logs/step_0000369.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e52364dba0d7d5664a6ffea44fd64136042a3d4131585b987d697127ab95a65 +size 384 diff --git a/margin_logs/step_0000370.npy b/margin_logs/step_0000370.npy new file mode 100644 index 0000000..1edea39 --- /dev/null +++ b/margin_logs/step_0000370.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:771689775de4d18d7ed8d2ba23f8154783077f553114840db1c6d2668310583f +size 384 diff --git a/margin_logs/step_0000371.npy b/margin_logs/step_0000371.npy new file mode 100644 index 0000000..cc57156 --- /dev/null +++ b/margin_logs/step_0000371.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:140b7773b1e1e19863f0288501c4389067b62f363fed2f317fa3a2624bc77479 +size 384 diff --git a/margin_logs/step_0000372.npy b/margin_logs/step_0000372.npy new file mode 100644 index 0000000..c673162 --- /dev/null +++ b/margin_logs/step_0000372.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06664eab871489597197309b94f0ce673f32d9a3675f40ee96dd3d9c30a82510 +size 384 diff --git a/margin_logs/step_0000373.npy b/margin_logs/step_0000373.npy new file mode 100644 index 0000000..879d147 --- /dev/null +++ b/margin_logs/step_0000373.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3350d63c5587b062a9765e367f1cb60b3e598b34ba56b79a42eebbf99bddf4f +size 384 diff --git a/margin_logs/step_0000374.npy b/margin_logs/step_0000374.npy new file mode 100644 index 0000000..74b36cb --- /dev/null +++ b/margin_logs/step_0000374.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdfb083c081c4d592a4dd4caa565f826e0f40fcb4daacc04f749ccacd6a435d0 +size 384 diff --git a/margin_logs/step_0000375.npy b/margin_logs/step_0000375.npy new file mode 100644 index 0000000..1364cd4 --- /dev/null +++ b/margin_logs/step_0000375.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb339256e68a7b7effebdd5f9c3cab707fa7503962dabea8aca9dddef6a4040 +size 384 diff --git a/margin_logs/step_0000376.npy b/margin_logs/step_0000376.npy new file mode 100644 index 0000000..166520e --- /dev/null +++ b/margin_logs/step_0000376.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f075de7f14b5ee44e4454bc319fa8a4688eb9762456a8c6576a19f241c26fd07 +size 384 diff --git a/margin_logs/step_0000377.npy b/margin_logs/step_0000377.npy new file mode 100644 index 0000000..ce59b1c --- /dev/null +++ b/margin_logs/step_0000377.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d760ae0cd74301261b96b40ca0569f2fcfcb74d508b51fdbc1fc94190109166 +size 384 diff --git a/margin_logs/step_0000378.npy b/margin_logs/step_0000378.npy new file mode 100644 index 0000000..799120f --- /dev/null +++ b/margin_logs/step_0000378.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fac53d56d5253303a9b4c4f2e44f44f51ea93de78a76a34136753e35807246e6 +size 384 diff --git a/margin_logs/step_0000379.npy b/margin_logs/step_0000379.npy new file mode 100644 index 0000000..1b64d41 --- /dev/null +++ b/margin_logs/step_0000379.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c927d2e0e1608cbe670327948b097190b0215bf01df5843d04955030cb9db07 +size 384 diff --git a/margin_logs/step_0000380.npy b/margin_logs/step_0000380.npy new file mode 100644 index 0000000..cb05e67 --- /dev/null +++ b/margin_logs/step_0000380.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f4e1e6b9a4dca2f9826f24b67360ac0b61f05986d916b074379688a8377974f +size 384 diff --git a/margin_logs/step_0000381.npy b/margin_logs/step_0000381.npy new file mode 100644 index 0000000..9f67cc2 --- /dev/null +++ b/margin_logs/step_0000381.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a29aaea499f1493ba022613f7801e4490098435b6237d3a8cb2acb4e9cd9992a +size 384 diff --git a/margin_logs/step_0000382.npy b/margin_logs/step_0000382.npy new file mode 100644 index 0000000..1dde430 --- /dev/null +++ b/margin_logs/step_0000382.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89daaf97d926c2a71940de9337b94163f9e54a048b7bbfb48b39f6d72ce4d385 +size 384 diff --git a/margin_logs/step_0000383.npy b/margin_logs/step_0000383.npy new file mode 100644 index 0000000..19c5adb --- /dev/null +++ b/margin_logs/step_0000383.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c3d8fdfb78d72336816ddc0bcd9a7673f18309a7279ac564489a5b6495c8ba0 +size 384 diff --git a/margin_logs/step_0000384.npy b/margin_logs/step_0000384.npy new file mode 100644 index 0000000..862a067 --- /dev/null +++ b/margin_logs/step_0000384.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2144a0ff3ef3c69a0f3966afea93b879cc2cb3570c6e70cf893274b525d1596e +size 384 diff --git a/margin_logs/step_0000385.npy b/margin_logs/step_0000385.npy new file mode 100644 index 0000000..08ed0d2 --- /dev/null +++ b/margin_logs/step_0000385.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616a8c4736f0352e3d494041ff2964edc2beac1e076baceb20462f9680dc97b6 +size 384 diff --git a/margin_logs/step_0000386.npy b/margin_logs/step_0000386.npy new file mode 100644 index 0000000..be90ec1 --- /dev/null +++ b/margin_logs/step_0000386.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c19c67520352371dbae5b38a9ceb21fb48d6367fd43ade11d439cca5beeb18a0 +size 384 diff --git a/margin_logs/step_0000387.npy b/margin_logs/step_0000387.npy new file mode 100644 index 0000000..1228c0e --- /dev/null +++ b/margin_logs/step_0000387.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91a0ba026d78e4dc843e6ff8add4d2146bfa1f58a14b645a14e5559e888247a4 +size 384 diff --git a/margin_logs/step_0000388.npy b/margin_logs/step_0000388.npy new file mode 100644 index 0000000..4689444 --- /dev/null +++ b/margin_logs/step_0000388.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1807525198c15b8095bcf51f60277330928c552579aa9701960d280301cc5b1 +size 384 diff --git a/margin_logs/step_0000389.npy b/margin_logs/step_0000389.npy new file mode 100644 index 0000000..69b6f39 --- /dev/null +++ b/margin_logs/step_0000389.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:950018f1319805140e7b8bfb76c86546d9c209db70343687e3180a8da41ebb24 +size 384 diff --git a/margin_logs/step_0000390.npy b/margin_logs/step_0000390.npy new file mode 100644 index 0000000..e889f21 --- /dev/null +++ b/margin_logs/step_0000390.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5685101492892b02b6a3d7c3e1cf87d019c2981663c9ad4446d7ddb1d27f7056 +size 384 diff --git a/margin_logs/step_0000391.npy b/margin_logs/step_0000391.npy new file mode 100644 index 0000000..6d5ff94 --- /dev/null +++ b/margin_logs/step_0000391.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a35abf5174f85da6b6bd9007a650791287a0a9f81f9c73c2a4bc512b68f95f4c +size 384 diff --git a/margin_logs/step_0000392.npy b/margin_logs/step_0000392.npy new file mode 100644 index 0000000..2dfd669 --- /dev/null +++ b/margin_logs/step_0000392.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72f60fcf48d0aaccae7f1443e4de6ef47569fb4b0da890251742340a9ebec87e +size 384 diff --git a/margin_logs/step_0000393.npy b/margin_logs/step_0000393.npy new file mode 100644 index 0000000..d356c75 --- /dev/null +++ b/margin_logs/step_0000393.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97bdf7bdd093717272ef1f4e793c1e3f884ecad2311dc7e7e9e2fcf57729c0f0 +size 384 diff --git a/margin_logs/step_0000394.npy b/margin_logs/step_0000394.npy new file mode 100644 index 0000000..37103bc --- /dev/null +++ b/margin_logs/step_0000394.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00830193b0c7368ea77e7b5c2dc5ef7eda8144235311eedcccf59295586aa227 +size 384 diff --git a/margin_logs/step_0000395.npy b/margin_logs/step_0000395.npy new file mode 100644 index 0000000..a67c74b --- /dev/null +++ b/margin_logs/step_0000395.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa6df1f23b9a06e08c40ba271d19d7dd9edaa215e2fffdd1227f5058e163e2bb +size 384 diff --git a/margin_logs/step_0000396.npy b/margin_logs/step_0000396.npy new file mode 100644 index 0000000..61c57f5 --- /dev/null +++ b/margin_logs/step_0000396.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb9af52ba8d3348d205b3df13858fa835a6c4bcca22ad131b153d486338be46f +size 384 diff --git a/margin_logs/step_0000397.npy b/margin_logs/step_0000397.npy new file mode 100644 index 0000000..95e91a5 --- /dev/null +++ b/margin_logs/step_0000397.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16261a7d5b0d071ccaeb8c41d28711013a97a7cbf8c504aeb87b5303c5469244 +size 384 diff --git a/margin_logs/step_0000398.npy b/margin_logs/step_0000398.npy new file mode 100644 index 0000000..8f3a8c0 --- /dev/null +++ b/margin_logs/step_0000398.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb7aa41237ae7de0c3785fbee861fb4cc83f9f109caa9f0ff270f4292cf0cfa4 +size 384 diff --git a/margin_logs/step_0000399.npy b/margin_logs/step_0000399.npy new file mode 100644 index 0000000..4696dab --- /dev/null +++ b/margin_logs/step_0000399.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c46764c909771c484b80da387ca0560e25c70f9511e0dc068fefbcbde16d97ba +size 384 diff --git a/margin_logs/step_0000400.npy b/margin_logs/step_0000400.npy new file mode 100644 index 0000000..7b5f101 --- /dev/null +++ b/margin_logs/step_0000400.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ae43a132a2281ca8d64948fccb85f7dd0f285a3d938f8502e0b3fe5ca5a13a5 +size 384 diff --git a/margin_logs/step_0000401.npy b/margin_logs/step_0000401.npy new file mode 100644 index 0000000..a994800 --- /dev/null +++ b/margin_logs/step_0000401.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1af4b1f15aaf25b39c8b0542f7282b72e909f1c83cda214090c5bf44024d3c04 +size 384 diff --git a/margin_logs/step_0000402.npy b/margin_logs/step_0000402.npy new file mode 100644 index 0000000..e9805cc --- /dev/null +++ b/margin_logs/step_0000402.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c4d1bd555c412e4e0210983cd92ded087cfb2885f1173f15668ba649573d9f8 +size 384 diff --git a/margin_logs/step_0000403.npy b/margin_logs/step_0000403.npy new file mode 100644 index 0000000..aec5065 --- /dev/null +++ b/margin_logs/step_0000403.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d6f00eef157c8752a00dde198a3427e508c3d1575a4c082e27c8f36d985469d +size 384 diff --git a/margin_logs/step_0000404.npy b/margin_logs/step_0000404.npy new file mode 100644 index 0000000..169bfc7 --- /dev/null +++ b/margin_logs/step_0000404.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:832266ffe2af4f7666ce09fd128e2bdad31013fc32623d881909c6beb6bef0bb +size 384 diff --git a/margin_logs/step_0000405.npy b/margin_logs/step_0000405.npy new file mode 100644 index 0000000..6c9a8d5 --- /dev/null +++ b/margin_logs/step_0000405.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cbfada2592576dee6160746a28c37913470140aa1384e72cb54cfe5881194f7 +size 384 diff --git a/margin_logs/step_0000406.npy b/margin_logs/step_0000406.npy new file mode 100644 index 0000000..f645f84 --- /dev/null +++ b/margin_logs/step_0000406.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3057cf6a81e963b7c633aa99d9446459f73fd0336702d124c415baa6d9a7ad12 +size 384 diff --git a/margin_logs/step_0000407.npy b/margin_logs/step_0000407.npy new file mode 100644 index 0000000..2ba3b46 --- /dev/null +++ b/margin_logs/step_0000407.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecdd23adc5ea519ad9995e1f783b936f571ece38322be30e6f4749726834f16c +size 384 diff --git a/margin_logs/step_0000408.npy b/margin_logs/step_0000408.npy new file mode 100644 index 0000000..42e7737 --- /dev/null +++ b/margin_logs/step_0000408.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce7ef5dd0af7c7e2c088931c38fde74be60e1b55fbc66f04cdeec74caee33faf +size 384 diff --git a/margin_logs/step_0000409.npy b/margin_logs/step_0000409.npy new file mode 100644 index 0000000..d7e8e79 --- /dev/null +++ b/margin_logs/step_0000409.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a6c2d62228ea34b9a1e8d1f45570b7ee358df34790931b815ee6acbf54dd4a1 +size 384 diff --git a/margin_logs/step_0000410.npy b/margin_logs/step_0000410.npy new file mode 100644 index 0000000..1371a6d --- /dev/null +++ b/margin_logs/step_0000410.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77c72998ee271d1dc0aca65eb5530cf8f79a121264651f0f359e864dd960511a +size 384 diff --git a/margin_logs/step_0000411.npy b/margin_logs/step_0000411.npy new file mode 100644 index 0000000..a938812 --- /dev/null +++ b/margin_logs/step_0000411.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdabac7267d72801f558a35b2dfe57f4f4f4111f1b8fa7955b7539dcb9ec0d1a +size 384 diff --git a/margin_logs/step_0000412.npy b/margin_logs/step_0000412.npy new file mode 100644 index 0000000..681b55d --- /dev/null +++ b/margin_logs/step_0000412.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a10befa3e6d96512226499f756d96a3ef05ad73451f7d6f46ca299a3855fb27 +size 384 diff --git a/margin_logs/step_0000413.npy b/margin_logs/step_0000413.npy new file mode 100644 index 0000000..851c07d --- /dev/null +++ b/margin_logs/step_0000413.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e14e4f93b96d9b81a4fc9220774b65d775cdb85f9e36f35579575e80b04c73bd +size 384 diff --git a/margin_logs/step_0000414.npy b/margin_logs/step_0000414.npy new file mode 100644 index 0000000..f8bf4a7 --- /dev/null +++ b/margin_logs/step_0000414.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac3186086985b0445ffa14622015ef630fc1e8593ea7d8faa18ae09fb1f0bbdb +size 384 diff --git a/margin_logs/step_0000415.npy b/margin_logs/step_0000415.npy new file mode 100644 index 0000000..21160db --- /dev/null +++ b/margin_logs/step_0000415.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f33a80035b60645dff981a18afdfb0017f6f10119d558f8043dd8b5e478c428f +size 384 diff --git a/margin_logs/step_0000416.npy b/margin_logs/step_0000416.npy new file mode 100644 index 0000000..da13ffe --- /dev/null +++ b/margin_logs/step_0000416.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4694a915d518f1b050c4802ee5de89624f7e9614ca859497d8f25e82918f1df4 +size 384 diff --git a/margin_logs/step_0000417.npy b/margin_logs/step_0000417.npy new file mode 100644 index 0000000..c8947af --- /dev/null +++ b/margin_logs/step_0000417.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98af3a012da43765c9cb232c2ba24c980c50c09ccf3b2a4852b17817544cb402 +size 384 diff --git a/margin_logs/step_0000418.npy b/margin_logs/step_0000418.npy new file mode 100644 index 0000000..bdd6906 --- /dev/null +++ b/margin_logs/step_0000418.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec89c59f10f3efd1d1ddcda8147536b6a7ac5d5225f1a6b08d3d00bb6a23a3db +size 384 diff --git a/margin_logs/step_0000419.npy b/margin_logs/step_0000419.npy new file mode 100644 index 0000000..06e50b1 --- /dev/null +++ b/margin_logs/step_0000419.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d8474ea4aa561746272989765ff0919221d53ef6bf06b34897bf9f7f02d71f +size 384 diff --git a/margin_logs/step_0000420.npy b/margin_logs/step_0000420.npy new file mode 100644 index 0000000..df9304c --- /dev/null +++ b/margin_logs/step_0000420.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c84c80de2c2c59928eb61b9881f1a19e17c9d7546fcf6e702eb4dededc760e5 +size 384 diff --git a/margin_logs/step_0000421.npy b/margin_logs/step_0000421.npy new file mode 100644 index 0000000..3154e2c --- /dev/null +++ b/margin_logs/step_0000421.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac966fc5b5ce52a9e0149d1c841614771954bc1144f0447e4f80f9d9ed2be54c +size 384 diff --git a/margin_logs/step_0000422.npy b/margin_logs/step_0000422.npy new file mode 100644 index 0000000..7c51d7a --- /dev/null +++ b/margin_logs/step_0000422.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea64d7efd77caa7c526ac1f8732a55e5ef22a669fccf3e1f1116fbdd047825e +size 384 diff --git a/margin_logs/step_0000423.npy b/margin_logs/step_0000423.npy new file mode 100644 index 0000000..bb9b31b --- /dev/null +++ b/margin_logs/step_0000423.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67713a96d464eae3849e2c7efb89bbdf0959bb10cf0176afb85034a7ba0f5909 +size 384 diff --git a/margin_logs/step_0000424.npy b/margin_logs/step_0000424.npy new file mode 100644 index 0000000..c6a394a --- /dev/null +++ b/margin_logs/step_0000424.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2003a0cf18ac81d891f9689b97e15322bf18906e64cc755668f62624523d548 +size 384 diff --git a/margin_logs/step_0000425.npy b/margin_logs/step_0000425.npy new file mode 100644 index 0000000..2ac5aed --- /dev/null +++ b/margin_logs/step_0000425.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc63274321e03c7c013fa853cb6b4b526f26f8b05e189e664b05cec8554b1ed1 +size 384 diff --git a/margin_logs/step_0000426.npy b/margin_logs/step_0000426.npy new file mode 100644 index 0000000..472aa89 --- /dev/null +++ b/margin_logs/step_0000426.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:306db1fc6aa17771dae67cf021d8386372c617dbe2b1be53b674e032e299c2d8 +size 384 diff --git a/margin_logs/step_0000427.npy b/margin_logs/step_0000427.npy new file mode 100644 index 0000000..5a5a578 --- /dev/null +++ b/margin_logs/step_0000427.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aca4632f3361f3265158876653b37d72c4cfa31bd58bbd4f1ff8cf0479f9289 +size 384 diff --git a/margin_logs/step_0000428.npy b/margin_logs/step_0000428.npy new file mode 100644 index 0000000..b58a027 --- /dev/null +++ b/margin_logs/step_0000428.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b21c068e9b151b49ea8c4e5fdd0c3b573ed6c5945eaa9eb57231a8cbcdaee6a +size 384 diff --git a/margin_logs/step_0000429.npy b/margin_logs/step_0000429.npy new file mode 100644 index 0000000..4eabb50 --- /dev/null +++ b/margin_logs/step_0000429.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c30d1d744b44fa7e723b7f9902a3d473248aabacbed4710f8a96bc766fefadb1 +size 384 diff --git a/margin_logs/step_0000430.npy b/margin_logs/step_0000430.npy new file mode 100644 index 0000000..c657316 --- /dev/null +++ b/margin_logs/step_0000430.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e7c85c1e911686f18637f6dd5a77168ead26d227d0143584ebd47d55c81650 +size 384 diff --git a/margin_logs/step_0000431.npy b/margin_logs/step_0000431.npy new file mode 100644 index 0000000..9a1b7f5 --- /dev/null +++ b/margin_logs/step_0000431.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85cf8418ff33295e820e9cae0c78f03f113bb76b6751dd8c2d9364f5a0455180 +size 384 diff --git a/margin_logs/step_0000432.npy b/margin_logs/step_0000432.npy new file mode 100644 index 0000000..daddd9a --- /dev/null +++ b/margin_logs/step_0000432.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b6510a4ea51115392d7d7ca6fc0778e81887ad059b25087cd77a3ec8f5d8bd +size 384 diff --git a/margin_logs/step_0000433.npy b/margin_logs/step_0000433.npy new file mode 100644 index 0000000..6f0f0d2 --- /dev/null +++ b/margin_logs/step_0000433.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abd3b7639f9a98bb12dd4a722fcd14c99d98e03127a4601d89177eb2dbd37d40 +size 384 diff --git a/margin_logs/step_0000434.npy b/margin_logs/step_0000434.npy new file mode 100644 index 0000000..197096c --- /dev/null +++ b/margin_logs/step_0000434.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:375dc3bdcfaba72591023eba8db20b3a1250428b20de79a8a1ee326a94896ca1 +size 384 diff --git a/margin_logs/step_0000435.npy b/margin_logs/step_0000435.npy new file mode 100644 index 0000000..237eb8d --- /dev/null +++ b/margin_logs/step_0000435.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2145b2d8111730bab3703bc4a9b56699547c30c4018036febd31eea2f9af8aa +size 384 diff --git a/margin_logs/step_0000436.npy b/margin_logs/step_0000436.npy new file mode 100644 index 0000000..37d7ab2 --- /dev/null +++ b/margin_logs/step_0000436.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d32d9f407a461aa727efc6f3380761f6da67b3a38196192370e3b9ba6d0e2e13 +size 384 diff --git a/margin_logs/step_0000437.npy b/margin_logs/step_0000437.npy new file mode 100644 index 0000000..e7f2cbe --- /dev/null +++ b/margin_logs/step_0000437.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72b05dcca221d4d8f3bd5409c3b982f40c431c525be9a0647792dae5a8c99fd7 +size 384 diff --git a/margin_logs/step_0000438.npy b/margin_logs/step_0000438.npy new file mode 100644 index 0000000..c1737d7 --- /dev/null +++ b/margin_logs/step_0000438.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c652459e228351a03454ece28a62ec1d236cfd6424b7ef5465e8b598431b0f6a +size 384 diff --git a/margin_logs/step_0000439.npy b/margin_logs/step_0000439.npy new file mode 100644 index 0000000..d8cc369 --- /dev/null +++ b/margin_logs/step_0000439.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1698bc915644cccfab38efe3f58faa86b0b2fc94212bd436e66131af5f8092c6 +size 384 diff --git a/margin_logs/step_0000440.npy b/margin_logs/step_0000440.npy new file mode 100644 index 0000000..7a6ca04 --- /dev/null +++ b/margin_logs/step_0000440.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:541a70612173970c128f47c978f3e395638072f01922982881a74551b3ac1b6e +size 384 diff --git a/margin_logs/step_0000441.npy b/margin_logs/step_0000441.npy new file mode 100644 index 0000000..2c491f7 --- /dev/null +++ b/margin_logs/step_0000441.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3107bc97f870b8b30f455feca679f5d24094220da291afc4628f941730b333c5 +size 384 diff --git a/margin_logs/step_0000442.npy b/margin_logs/step_0000442.npy new file mode 100644 index 0000000..e87dbed --- /dev/null +++ b/margin_logs/step_0000442.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d22cf800a882dfb942a5568f598fa27cdf216e6cee1416c979272ef1ef7d289 +size 384 diff --git a/margin_logs/step_0000443.npy b/margin_logs/step_0000443.npy new file mode 100644 index 0000000..f2e7919 --- /dev/null +++ b/margin_logs/step_0000443.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c087bdf106acfa5a9879991259902d5d7e62901fa415668500cef04abfdaaf3 +size 384 diff --git a/margin_logs/step_0000444.npy b/margin_logs/step_0000444.npy new file mode 100644 index 0000000..d9a0c7a --- /dev/null +++ b/margin_logs/step_0000444.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be62f3506d222cd5b9b910093c4904f555ff0ea750141853357a19c84d89e539 +size 384 diff --git a/margin_logs/step_0000445.npy b/margin_logs/step_0000445.npy new file mode 100644 index 0000000..b29404c --- /dev/null +++ b/margin_logs/step_0000445.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:861837196eba36e5c080bd57940505b39ce36f50545a3c4af9a19540f5276977 +size 384 diff --git a/margin_logs/step_0000446.npy b/margin_logs/step_0000446.npy new file mode 100644 index 0000000..825f1a8 --- /dev/null +++ b/margin_logs/step_0000446.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d96a724c3a78a63730b1504fa25417065336bcc28ee575f7b9a25599ffb6bc4 +size 384 diff --git a/margin_logs/step_0000447.npy b/margin_logs/step_0000447.npy new file mode 100644 index 0000000..d21b10d --- /dev/null +++ b/margin_logs/step_0000447.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:924de275e21a0a89b9948184b1364fc0cd363e4baa8a613385a13c04121dbece +size 384 diff --git a/margin_logs/step_0000448.npy b/margin_logs/step_0000448.npy new file mode 100644 index 0000000..c3405ac --- /dev/null +++ b/margin_logs/step_0000448.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:678cec57beec6afe21875300237561f527550f2604a7330950d14da86e425933 +size 384 diff --git a/margin_logs/step_0000449.npy b/margin_logs/step_0000449.npy new file mode 100644 index 0000000..bb4242e --- /dev/null +++ b/margin_logs/step_0000449.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61801445d03873340fedeb363fa4d7edb518ddf21b9b87626f3bee54fecae7ce +size 384 diff --git a/margin_logs/step_0000450.npy b/margin_logs/step_0000450.npy new file mode 100644 index 0000000..4759696 --- /dev/null +++ b/margin_logs/step_0000450.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7399605c697fd2e917509893c63eb35c30066e82ea0efba0ff78330cd61fd1 +size 384 diff --git a/margin_logs/step_0000451.npy b/margin_logs/step_0000451.npy new file mode 100644 index 0000000..4650577 --- /dev/null +++ b/margin_logs/step_0000451.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7084fa5e12efa18579a1a3d1ad34e50f68b4ff3dbe9dddd0d204155336b65372 +size 384 diff --git a/margin_logs/step_0000452.npy b/margin_logs/step_0000452.npy new file mode 100644 index 0000000..b81fdfc --- /dev/null +++ b/margin_logs/step_0000452.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8550ac16c08f93d9fab553ee0f653b7edbb24c72758162946e58937f8a78036e +size 384 diff --git a/margin_logs/step_0000453.npy b/margin_logs/step_0000453.npy new file mode 100644 index 0000000..95af260 --- /dev/null +++ b/margin_logs/step_0000453.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fd402346c8fc8ec76b4a970e3bca014fc3166746f558d3c262e58bd667781d0 +size 384 diff --git a/margin_logs/step_0000454.npy b/margin_logs/step_0000454.npy new file mode 100644 index 0000000..38c2d0f --- /dev/null +++ b/margin_logs/step_0000454.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aad35870b5b18628ee7107caa17d77c747d8d63217574dcb604d497620cb6e5b +size 384 diff --git a/margin_logs/step_0000455.npy b/margin_logs/step_0000455.npy new file mode 100644 index 0000000..8842c59 --- /dev/null +++ b/margin_logs/step_0000455.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0d79fa19e4e6dec7efb4dc54ccc59efe18f272419cf6214b58227efd156308b +size 384 diff --git a/margin_logs/step_0000456.npy b/margin_logs/step_0000456.npy new file mode 100644 index 0000000..588faad --- /dev/null +++ b/margin_logs/step_0000456.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9632fc011529c5a8c7cec2be154ed36ddd36e4d2c2e661ef38dc377dd44e562f +size 384 diff --git a/margin_logs/step_0000457.npy b/margin_logs/step_0000457.npy new file mode 100644 index 0000000..8a49c65 --- /dev/null +++ b/margin_logs/step_0000457.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b566ca045de98959f5085694ecbd3f84613ea71f40c8019aa711ffb8fcabb9c8 +size 384 diff --git a/margin_logs/step_0000458.npy b/margin_logs/step_0000458.npy new file mode 100644 index 0000000..b998de1 --- /dev/null +++ b/margin_logs/step_0000458.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f935e778f8846d504d1e2064e5217075ed53803a32285bd35b587f45117771ed +size 384 diff --git a/margin_logs/step_0000459.npy b/margin_logs/step_0000459.npy new file mode 100644 index 0000000..ddfdff4 --- /dev/null +++ b/margin_logs/step_0000459.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ca6b311d3d0b7a192cbbd4a91252d6099bcb6547ef44aa3776e4b673ed6e547 +size 384 diff --git a/margin_logs/step_0000460.npy b/margin_logs/step_0000460.npy new file mode 100644 index 0000000..40317f2 --- /dev/null +++ b/margin_logs/step_0000460.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa34dd9fb93444b4b6817dc660e9825251bc94bfb272cab59439115f028351ca +size 384 diff --git a/margin_logs/step_0000461.npy b/margin_logs/step_0000461.npy new file mode 100644 index 0000000..c965c69 --- /dev/null +++ b/margin_logs/step_0000461.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a76222267c6bcbb784781c6f7c4633053bf879984c8fe85f33baa2548a8b894 +size 384 diff --git a/margin_logs/step_0000462.npy b/margin_logs/step_0000462.npy new file mode 100644 index 0000000..d75981b --- /dev/null +++ b/margin_logs/step_0000462.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c73cd6c86771c0ecec998963a5832d32cacc9a972cf273f8634a75bd35d5041e +size 384 diff --git a/margin_logs/step_0000463.npy b/margin_logs/step_0000463.npy new file mode 100644 index 0000000..5b6cdc5 --- /dev/null +++ b/margin_logs/step_0000463.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a38742450101d8fecfded1f4acdd8cee233a1ffec9fff135b642bf8ccb7ae96 +size 384 diff --git a/margin_logs/step_0000464.npy b/margin_logs/step_0000464.npy new file mode 100644 index 0000000..b8cfd0c --- /dev/null +++ b/margin_logs/step_0000464.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a91b36ce14627f8664a0d78a1dff5e026e368728266b97d16e977e2d2cc2275 +size 384 diff --git a/margin_logs/step_0000465.npy b/margin_logs/step_0000465.npy new file mode 100644 index 0000000..94d27b1 --- /dev/null +++ b/margin_logs/step_0000465.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a88dab82643ebfd363122892e5573d95721c3aab0621d516ab8123b6793d298 +size 384 diff --git a/margin_logs/step_0000466.npy b/margin_logs/step_0000466.npy new file mode 100644 index 0000000..a840347 --- /dev/null +++ b/margin_logs/step_0000466.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca30ebbc9e23aa9de6fe9483766c171ae844c9d6643b18e971307ad80272c43c +size 384 diff --git a/margin_logs/step_0000467.npy b/margin_logs/step_0000467.npy new file mode 100644 index 0000000..7d9414f --- /dev/null +++ b/margin_logs/step_0000467.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c677eded3eb4dab3521d0128bc4469b5170a1e09c08a62d8a23dac29d59a175a +size 384 diff --git a/margin_logs/step_0000468.npy b/margin_logs/step_0000468.npy new file mode 100644 index 0000000..26fff39 --- /dev/null +++ b/margin_logs/step_0000468.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6409f9feec24bd03463301a9bf9d18353b3162d7afe6b4373044a15facf473f2 +size 384 diff --git a/margin_logs/step_0000469.npy b/margin_logs/step_0000469.npy new file mode 100644 index 0000000..4077a64 --- /dev/null +++ b/margin_logs/step_0000469.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a52d25179db278c1fc5813e5d1c6170dd55b7c365886429a79684d2a219053c4 +size 384 diff --git a/margin_logs/step_0000470.npy b/margin_logs/step_0000470.npy new file mode 100644 index 0000000..ce8337b --- /dev/null +++ b/margin_logs/step_0000470.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b4cf6a7694062ffc059bc2dfd9770b963dae76b328447964be46b99f752825b +size 384 diff --git a/margin_logs/step_0000471.npy b/margin_logs/step_0000471.npy new file mode 100644 index 0000000..5fc0370 --- /dev/null +++ b/margin_logs/step_0000471.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9149220413a1abff164af7377d06fcca0185c2928b5b803ab1c8ea1baf2f98d4 +size 384 diff --git a/margin_logs/step_0000472.npy b/margin_logs/step_0000472.npy new file mode 100644 index 0000000..7ae5ad0 --- /dev/null +++ b/margin_logs/step_0000472.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcb5f960ceaa1e67d5297bd40bb166095b7af5a306d2d9cc0851cfbfb79d6dec +size 384 diff --git a/margin_logs/step_0000473.npy b/margin_logs/step_0000473.npy new file mode 100644 index 0000000..74bd126 --- /dev/null +++ b/margin_logs/step_0000473.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2bfc76cbea2eef7665317bc304b6888f658d04304fe75de34a5b434fb5ffeea +size 384 diff --git a/margin_logs/step_0000474.npy b/margin_logs/step_0000474.npy new file mode 100644 index 0000000..f6c228c --- /dev/null +++ b/margin_logs/step_0000474.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3c301181ff6d5285456debd7b631b46e222a023b085d631ebdd44e41f6bceb7 +size 384 diff --git a/margin_logs/step_0000475.npy b/margin_logs/step_0000475.npy new file mode 100644 index 0000000..7c8ac37 --- /dev/null +++ b/margin_logs/step_0000475.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:877b3168ccc38a924e1f2dde932c545234f5f4940fc128a7ba038f38a70b2eb5 +size 384 diff --git a/margin_logs/step_0000476.npy b/margin_logs/step_0000476.npy new file mode 100644 index 0000000..a2d83d4 --- /dev/null +++ b/margin_logs/step_0000476.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e467806f36f485815b1d9c23524e3412e584c22e57770c1c9ce13949e9de95c +size 384 diff --git a/margin_logs/step_0000477.npy b/margin_logs/step_0000477.npy new file mode 100644 index 0000000..4eec422 --- /dev/null +++ b/margin_logs/step_0000477.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5be3a4c2431e3b1cbbae4f43e9cf2750a222d056ec30fe506366dfdeb15a22c3 +size 384 diff --git a/margin_logs/step_0000478.npy b/margin_logs/step_0000478.npy new file mode 100644 index 0000000..a69acf4 --- /dev/null +++ b/margin_logs/step_0000478.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64b3e1f444c50dceb53c7112fcf1544dd9417705c0019fd327cf495829d0ae06 +size 384 diff --git a/margin_logs/step_0000479.npy b/margin_logs/step_0000479.npy new file mode 100644 index 0000000..c880b86 --- /dev/null +++ b/margin_logs/step_0000479.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbe1f0e056868a021abd7c6aadd2b35c9c09013b8f18fc49c25d1847d258b23f +size 384 diff --git a/margin_logs/step_0000480.npy b/margin_logs/step_0000480.npy new file mode 100644 index 0000000..480ab5c --- /dev/null +++ b/margin_logs/step_0000480.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cebddcfa9ba2c320c625267d0bdd1e61dba88be99106cd26b2f221d562f8c4c +size 384 diff --git a/margin_logs/step_0000481.npy b/margin_logs/step_0000481.npy new file mode 100644 index 0000000..84446dd --- /dev/null +++ b/margin_logs/step_0000481.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d507080aa7f8d4844eefc6db907f9a2a7891ab8c85478e7a5de49113658b0a3c +size 384 diff --git a/margin_logs/step_0000482.npy b/margin_logs/step_0000482.npy new file mode 100644 index 0000000..850e2f3 --- /dev/null +++ b/margin_logs/step_0000482.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3134bb0cb9757a54aeafada4145012aab66cab81a186864557a2562e0bd2f546 +size 384 diff --git a/margin_logs/step_0000483.npy b/margin_logs/step_0000483.npy new file mode 100644 index 0000000..8244aad --- /dev/null +++ b/margin_logs/step_0000483.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2efda18c419cfc7341f9ac62c7e1fccc75cdc5ab26f3e751ad2af695eef3f6 +size 384 diff --git a/margin_logs/step_0000484.npy b/margin_logs/step_0000484.npy new file mode 100644 index 0000000..f574c07 --- /dev/null +++ b/margin_logs/step_0000484.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddb8c08c174361dfd16bee25b8fefaaa070920d6cc8839c51a2ba1e9361abb02 +size 384 diff --git a/margin_logs/step_0000485.npy b/margin_logs/step_0000485.npy new file mode 100644 index 0000000..f0b213d --- /dev/null +++ b/margin_logs/step_0000485.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e2364f1dc0336f60f2acbd8f9ef65ebad169fa6584beb2b5bf454159d37ad05 +size 384 diff --git a/margin_logs/step_0000486.npy b/margin_logs/step_0000486.npy new file mode 100644 index 0000000..4a8f94c --- /dev/null +++ b/margin_logs/step_0000486.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1b0d9ba7b56b584de8b912f233a23744c93d6df62962ebcb5305c741f53819b +size 384 diff --git a/margin_logs/step_0000487.npy b/margin_logs/step_0000487.npy new file mode 100644 index 0000000..e62425e --- /dev/null +++ b/margin_logs/step_0000487.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cdb9671866d2a9b6f2c35c2d68adebb71b1df316497c66b06cca0222c86e002 +size 384 diff --git a/margin_logs/step_0000488.npy b/margin_logs/step_0000488.npy new file mode 100644 index 0000000..433dcf1 --- /dev/null +++ b/margin_logs/step_0000488.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b0dd2fa98151f7fe7222b3f3d2ca97f0cfbbf8082ce8723b27b65c92d3ccfa +size 384 diff --git a/margin_logs/step_0000489.npy b/margin_logs/step_0000489.npy new file mode 100644 index 0000000..a575289 --- /dev/null +++ b/margin_logs/step_0000489.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08d7389d71e353c478d8a8e4f50bd3a8e19104ba25108d41f277f1b38ab30dd0 +size 384 diff --git a/margin_logs/step_0000490.npy b/margin_logs/step_0000490.npy new file mode 100644 index 0000000..f42fa61 --- /dev/null +++ b/margin_logs/step_0000490.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a5496acc41be50443c9d073af8983be2ba73d2002f5317e699911a35499220f +size 384 diff --git a/margin_logs/step_0000491.npy b/margin_logs/step_0000491.npy new file mode 100644 index 0000000..6ffa5ab --- /dev/null +++ b/margin_logs/step_0000491.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdb8b4620ec9f07a6749207fca123be0bd1bba058e0e4a5da132f9f087cbf233 +size 384 diff --git a/margin_logs/step_0000492.npy b/margin_logs/step_0000492.npy new file mode 100644 index 0000000..8c02cac --- /dev/null +++ b/margin_logs/step_0000492.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d64e00139cd493bf961a7037a2734197c63b36d0bc6880b4eea04c7466c29b51 +size 384 diff --git a/margin_logs/step_0000493.npy b/margin_logs/step_0000493.npy new file mode 100644 index 0000000..2e72f55 --- /dev/null +++ b/margin_logs/step_0000493.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1951de484e4c60580c2aa0f0103dd99a53e3d52438b04fd6c27466dda3ff657b +size 384 diff --git a/margin_logs/step_0000494.npy b/margin_logs/step_0000494.npy new file mode 100644 index 0000000..45e2124 --- /dev/null +++ b/margin_logs/step_0000494.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a083e7c37b481953f32ff0d2b276e793b630b6ece5800ad5d00bb177ae922c3 +size 384 diff --git a/margin_logs/step_0000495.npy b/margin_logs/step_0000495.npy new file mode 100644 index 0000000..7cf3de8 --- /dev/null +++ b/margin_logs/step_0000495.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f2182d0788016e2043a65764ea0aa5f0940a32fe961394cd8bd5845682f34f6 +size 384 diff --git a/margin_logs/step_0000496.npy b/margin_logs/step_0000496.npy new file mode 100644 index 0000000..15e766a --- /dev/null +++ b/margin_logs/step_0000496.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ba944ee2475d4526b0acb1efe45544579f2ad5261def9f604f9991e51d81dc8 +size 384 diff --git a/margin_logs/step_0000497.npy b/margin_logs/step_0000497.npy new file mode 100644 index 0000000..6462e32 --- /dev/null +++ b/margin_logs/step_0000497.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d7b1f2a6260334d1a9d3e686e1fd701a7436bf7b323e8479b35d0a328d4ef30 +size 384 diff --git a/margin_logs/step_0000498.npy b/margin_logs/step_0000498.npy new file mode 100644 index 0000000..52984de --- /dev/null +++ b/margin_logs/step_0000498.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99cb27186e0a4c02486aa0a77eb524ded6bc14a823971028ecf2b805cb6990c4 +size 384 diff --git a/margin_logs/step_0000499.npy b/margin_logs/step_0000499.npy new file mode 100644 index 0000000..98524aa --- /dev/null +++ b/margin_logs/step_0000499.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c522f9a69fb544796bb4a7eed75fae05cd1f8ad91b2fcb0e9a99a3a505a9add +size 384 diff --git a/margin_logs/step_0000500.npy b/margin_logs/step_0000500.npy new file mode 100644 index 0000000..117e3a1 --- /dev/null +++ b/margin_logs/step_0000500.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a632a1803ec2b744ffeb4d9816191ffaa59bf5106ca0db7ee936bb16abd1171c +size 384 diff --git a/margin_logs/step_0000501.npy b/margin_logs/step_0000501.npy new file mode 100644 index 0000000..fc075af --- /dev/null +++ b/margin_logs/step_0000501.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:359c1efddbfae50037d2d39dc9d4c34b5acf3c37c427be51941d97aef831fb7a +size 384 diff --git a/margin_logs/step_0000502.npy b/margin_logs/step_0000502.npy new file mode 100644 index 0000000..d6dbaae --- /dev/null +++ b/margin_logs/step_0000502.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98e6dffa31d67a0aa959e55b3f8a410acd307793ea1bbc890bd366017ccbabc1 +size 384 diff --git a/margin_logs/step_0000503.npy b/margin_logs/step_0000503.npy new file mode 100644 index 0000000..f98de06 --- /dev/null +++ b/margin_logs/step_0000503.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e2a510ad4c208ae1031395b94bbdd80621b50a1042ede4b0b92e92e0dc7ade3 +size 384 diff --git a/margin_logs/step_0000504.npy b/margin_logs/step_0000504.npy new file mode 100644 index 0000000..96d48d5 --- /dev/null +++ b/margin_logs/step_0000504.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ce63568f317b73b5ab2acf98ea95ce6c2fd26fd0dcdbc3b77428e2d39f2e4b +size 384 diff --git a/margin_logs/step_0000505.npy b/margin_logs/step_0000505.npy new file mode 100644 index 0000000..6285388 --- /dev/null +++ b/margin_logs/step_0000505.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a26c2dbd79d7a8384e589c0876261c9df8611ae13d5b7d2ec0b4f6c1702f85c +size 384 diff --git a/margin_logs/step_0000506.npy b/margin_logs/step_0000506.npy new file mode 100644 index 0000000..cfeb87d --- /dev/null +++ b/margin_logs/step_0000506.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bf5fd6eb7b0574235a4db60863723f7147a09fbf6fa1cda9adcc56c5d8ada4a +size 384 diff --git a/margin_logs/step_0000507.npy b/margin_logs/step_0000507.npy new file mode 100644 index 0000000..8d6a74f --- /dev/null +++ b/margin_logs/step_0000507.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1268bc95e864921ec1f64f4b42b4682f7f513d7648d56a8f660a7f0ecde86d66 +size 384 diff --git a/margin_logs/step_0000508.npy b/margin_logs/step_0000508.npy new file mode 100644 index 0000000..1ea5d17 --- /dev/null +++ b/margin_logs/step_0000508.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa4162d70070b5249ec901ef3769659584cb64ff08f7afbdcbe3e31dab1c0a0c +size 384 diff --git a/margin_logs/step_0000509.npy b/margin_logs/step_0000509.npy new file mode 100644 index 0000000..744b633 --- /dev/null +++ b/margin_logs/step_0000509.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d99b841b51c9cf5bb7132006fe819f29c55c1bd732b7cf5fd9a669a04655ba8e +size 384 diff --git a/margin_logs/step_0000510.npy b/margin_logs/step_0000510.npy new file mode 100644 index 0000000..73f6e31 --- /dev/null +++ b/margin_logs/step_0000510.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4446dd6f1f22a8b91e0722a18aca89139e4a7c89934fa8b92a2a3da68d1a7d7a +size 384 diff --git a/margin_logs/step_0000511.npy b/margin_logs/step_0000511.npy new file mode 100644 index 0000000..64fbcd7 --- /dev/null +++ b/margin_logs/step_0000511.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55eef6a9ef0069c1e066e8c0ae11d2278fe9a5b5b5b4a4353b34f94f224de934 +size 384 diff --git a/margin_logs/step_0000512.npy b/margin_logs/step_0000512.npy new file mode 100644 index 0000000..f8fe37b --- /dev/null +++ b/margin_logs/step_0000512.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9718b02f8ce660deffc3ddb723e35d37eedab671b59177d6d673f301a3a5c5f0 +size 384 diff --git a/margin_logs/step_0000513.npy b/margin_logs/step_0000513.npy new file mode 100644 index 0000000..8245dff --- /dev/null +++ b/margin_logs/step_0000513.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3178b2524d0f2fed798cef58132b4e95d52d9a5723c1ffdf25570ca48caf8d20 +size 384 diff --git a/margin_logs/step_0000514.npy b/margin_logs/step_0000514.npy new file mode 100644 index 0000000..48e981c --- /dev/null +++ b/margin_logs/step_0000514.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c5b3e19e67721971938613d09fb6555920fecfb00b67ad02df0af02d6b8e133 +size 384 diff --git a/margin_logs/step_0000515.npy b/margin_logs/step_0000515.npy new file mode 100644 index 0000000..0c573d2 --- /dev/null +++ b/margin_logs/step_0000515.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e62b9dbc1696761fcae21020f57552449537881ff0d8fd85673b2cf267c800f7 +size 384 diff --git a/margin_logs/step_0000516.npy b/margin_logs/step_0000516.npy new file mode 100644 index 0000000..323f18d --- /dev/null +++ b/margin_logs/step_0000516.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6823b22897e86b007ceccc2a4b8562000892d441f7aae6990760c7d642ce25f1 +size 384 diff --git a/margin_logs/step_0000517.npy b/margin_logs/step_0000517.npy new file mode 100644 index 0000000..5a5f89c --- /dev/null +++ b/margin_logs/step_0000517.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4bfc9a7b36291d4ff6d4a11f76c0a5787a61c4e7861c5b25c3ea03a6fd859f6 +size 384 diff --git a/margin_logs/step_0000518.npy b/margin_logs/step_0000518.npy new file mode 100644 index 0000000..ee0c33b --- /dev/null +++ b/margin_logs/step_0000518.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff3247c65073bc485729c702f080d22b8343294f2e0a84fbed37255985e7f1ca +size 384 diff --git a/margin_logs/step_0000519.npy b/margin_logs/step_0000519.npy new file mode 100644 index 0000000..e818052 --- /dev/null +++ b/margin_logs/step_0000519.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b024b7ee0c0a0f2d778fb5e1c4b768755d0d296b386a1f8808f163818ced571 +size 384 diff --git a/margin_logs/step_0000520.npy b/margin_logs/step_0000520.npy new file mode 100644 index 0000000..b51ff0e --- /dev/null +++ b/margin_logs/step_0000520.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd5f6a1ebf60d0e84b1ee57c1b6bcd19437049d1102f210a61a89dc134c1545b +size 384 diff --git a/margin_logs/step_0000521.npy b/margin_logs/step_0000521.npy new file mode 100644 index 0000000..d64b772 --- /dev/null +++ b/margin_logs/step_0000521.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4561ea1bb1cf50ae14abdc0c8cf448a793fb0d4cf7a095140cc5ebce7ddc1727 +size 384 diff --git a/margin_logs/step_0000522.npy b/margin_logs/step_0000522.npy new file mode 100644 index 0000000..9f769de --- /dev/null +++ b/margin_logs/step_0000522.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09b2ecc3a07ddf54bd41abc93c1d753df885404a71ccab38400fa9a628f6c1ea +size 384 diff --git a/margin_logs/step_0000523.npy b/margin_logs/step_0000523.npy new file mode 100644 index 0000000..9f04f20 --- /dev/null +++ b/margin_logs/step_0000523.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:450ca372cb8bfe85ce4c9e3d0fb1b5e8aa349b0ecd6d9cbd9d4536f5b1303cde +size 384 diff --git a/margin_logs/step_0000524.npy b/margin_logs/step_0000524.npy new file mode 100644 index 0000000..45b6ce2 --- /dev/null +++ b/margin_logs/step_0000524.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6de7a3577bfe3bdb17f85c32823a253664022e417f2bb54a83615172ebc8315 +size 384 diff --git a/margin_logs/step_0000525.npy b/margin_logs/step_0000525.npy new file mode 100644 index 0000000..8284388 --- /dev/null +++ b/margin_logs/step_0000525.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da34bf322311b788d07d056f389d8e4f24e2dc1e5463690020961123c0447173 +size 384 diff --git a/margin_logs/step_0000526.npy b/margin_logs/step_0000526.npy new file mode 100644 index 0000000..5c9414a --- /dev/null +++ b/margin_logs/step_0000526.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b5ee610c2d1a5fe0aa934427347ff9f9635e90fdb7f56d77d949a1dc181dfc9 +size 384 diff --git a/margin_logs/step_0000527.npy b/margin_logs/step_0000527.npy new file mode 100644 index 0000000..f9e8082 --- /dev/null +++ b/margin_logs/step_0000527.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ca816c76c174024f9b30ae22acff54a8489eb377953a3ac5befc698a5d23ca +size 384 diff --git a/margin_logs/step_0000528.npy b/margin_logs/step_0000528.npy new file mode 100644 index 0000000..cf8c3b7 --- /dev/null +++ b/margin_logs/step_0000528.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06734e13986dfff74b9986a13b48090a855110b8a03e3e8c674780e49e4a893e +size 384 diff --git a/margin_logs/step_0000529.npy b/margin_logs/step_0000529.npy new file mode 100644 index 0000000..d9c3dd4 --- /dev/null +++ b/margin_logs/step_0000529.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dc46ea208f9edbf5da53605bda7e3d509609b69b4775425c2b9325ffd6c1375 +size 384 diff --git a/margin_logs/step_0000530.npy b/margin_logs/step_0000530.npy new file mode 100644 index 0000000..d9039fc --- /dev/null +++ b/margin_logs/step_0000530.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ea5819b37a28e47937abcaf6a4d789d6a41a67f947b418b679dcda1447377e9 +size 384 diff --git a/margin_logs/step_0000531.npy b/margin_logs/step_0000531.npy new file mode 100644 index 0000000..96c1cf6 --- /dev/null +++ b/margin_logs/step_0000531.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea3f1ec78ce6606a94b80eb7501880343b14974d539159a6c77788cb06703e9 +size 384 diff --git a/margin_logs/step_0000532.npy b/margin_logs/step_0000532.npy new file mode 100644 index 0000000..4e72bf8 --- /dev/null +++ b/margin_logs/step_0000532.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cf1ef052fdc42405723a8502e28f213ad22d7147bbadb1fc1c38888fe3fae75 +size 384 diff --git a/margin_logs/step_0000533.npy b/margin_logs/step_0000533.npy new file mode 100644 index 0000000..a8d2406 --- /dev/null +++ b/margin_logs/step_0000533.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fcf618bf3fef866575c61122e8b28db1a5525b883fe587ce9f3a60aef96a55a +size 384 diff --git a/margin_logs/step_0000534.npy b/margin_logs/step_0000534.npy new file mode 100644 index 0000000..824b035 --- /dev/null +++ b/margin_logs/step_0000534.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dca91a994397f34c537887289a87829d982106d289e22af7cef3d35c44af019 +size 384 diff --git a/margin_logs/step_0000535.npy b/margin_logs/step_0000535.npy new file mode 100644 index 0000000..fdb07aa --- /dev/null +++ b/margin_logs/step_0000535.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:776335a0edc1278bba0a939859a703cfd8633d5df80d5661d026593a1bcc2fa6 +size 384 diff --git a/margin_logs/step_0000536.npy b/margin_logs/step_0000536.npy new file mode 100644 index 0000000..ea1b515 --- /dev/null +++ b/margin_logs/step_0000536.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0e07231eb6bdbc5ace14cfd02ed7476692669c3d2e8be34fed54be44378450d +size 384 diff --git a/margin_logs/step_0000537.npy b/margin_logs/step_0000537.npy new file mode 100644 index 0000000..e18d2a2 --- /dev/null +++ b/margin_logs/step_0000537.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7a2fd0abe116925bebe245ad7ea3f67ec06d6ed7d8fabc701a27acc8f90aa1 +size 384 diff --git a/margin_logs/step_0000538.npy b/margin_logs/step_0000538.npy new file mode 100644 index 0000000..e57c7bc --- /dev/null +++ b/margin_logs/step_0000538.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45b67e1f7a1c2bb4195b0eabf6eb39cc2967d01a94e1a28b23a702e73a4dd741 +size 384 diff --git a/margin_logs/step_0000539.npy b/margin_logs/step_0000539.npy new file mode 100644 index 0000000..5a40a04 --- /dev/null +++ b/margin_logs/step_0000539.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db664dc3ebe62fd1ab841923dfa7d7028f3415767e7cc4581a592e242bde717b +size 384 diff --git a/margin_logs/step_0000540.npy b/margin_logs/step_0000540.npy new file mode 100644 index 0000000..3c891e2 --- /dev/null +++ b/margin_logs/step_0000540.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4db14ff118f3b21549d6bb10a01861ee84cc2a8f053908325da33e65fe30ec55 +size 384 diff --git a/margin_logs/step_0000541.npy b/margin_logs/step_0000541.npy new file mode 100644 index 0000000..ffaed9e --- /dev/null +++ b/margin_logs/step_0000541.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e63c43ae5d4a1363a7b1d6d6e9f33d8da6826f875769db728a9529efb68f66 +size 384 diff --git a/margin_logs/step_0000542.npy b/margin_logs/step_0000542.npy new file mode 100644 index 0000000..8dcdd98 --- /dev/null +++ b/margin_logs/step_0000542.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b63616f7d9d99f955a57ae1e2d4750aed8461e73d095e907af033618cdf6b26 +size 384 diff --git a/margin_logs/step_0000543.npy b/margin_logs/step_0000543.npy new file mode 100644 index 0000000..f05e79c --- /dev/null +++ b/margin_logs/step_0000543.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f035f025e9be0b6bdd23aed5e86385411cd7f46f99540fd80d32a040f4a3f2ff +size 384 diff --git a/margin_logs/step_0000544.npy b/margin_logs/step_0000544.npy new file mode 100644 index 0000000..3dc3e0d --- /dev/null +++ b/margin_logs/step_0000544.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be5413dd877435a9b307a52aa224f08eca818dd350409398c29b5e56ef523a0a +size 384 diff --git a/margin_logs/step_0000545.npy b/margin_logs/step_0000545.npy new file mode 100644 index 0000000..34e71dd --- /dev/null +++ b/margin_logs/step_0000545.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7561b2c4543ffbd4a66c87c95dc4fc605bb0351003d383ced1cb8ebf726147fb +size 384 diff --git a/margin_logs/step_0000546.npy b/margin_logs/step_0000546.npy new file mode 100644 index 0000000..fac4fd4 --- /dev/null +++ b/margin_logs/step_0000546.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47af8aad26a8ea10433cea9c2a8bfb94120ade23c1918d355927ccc98ff3c774 +size 384 diff --git a/margin_logs/step_0000547.npy b/margin_logs/step_0000547.npy new file mode 100644 index 0000000..0ce53f4 --- /dev/null +++ b/margin_logs/step_0000547.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6db828eb5170ab9e5ca72c9ab6deee4262d7112eb851345bb548b5e57eb37695 +size 384 diff --git a/margin_logs/step_0000548.npy b/margin_logs/step_0000548.npy new file mode 100644 index 0000000..cdb0f99 --- /dev/null +++ b/margin_logs/step_0000548.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:783ffe4b550294f7bb8337566ff833316c95cb085597534e921a659859c6f4eb +size 384 diff --git a/margin_logs/step_0000549.npy b/margin_logs/step_0000549.npy new file mode 100644 index 0000000..ce96ece --- /dev/null +++ b/margin_logs/step_0000549.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e455dc48cdbc96fa4d705973c7c25906b432d353eee2f4bc67a5e07dfc6d9926 +size 384 diff --git a/margin_logs/step_0000550.npy b/margin_logs/step_0000550.npy new file mode 100644 index 0000000..8373bd8 --- /dev/null +++ b/margin_logs/step_0000550.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87bb89fafcfadcba13f26b5a0cd9bbcabf3c7fc49c8dedab65bea8d778cf44e8 +size 384 diff --git a/margin_logs/step_0000551.npy b/margin_logs/step_0000551.npy new file mode 100644 index 0000000..7e5bd4d --- /dev/null +++ b/margin_logs/step_0000551.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3618f808ff456c15c07f523d22d39df4d4df555073f4b8be82291cf13eb75d26 +size 384 diff --git a/margin_logs/step_0000552.npy b/margin_logs/step_0000552.npy new file mode 100644 index 0000000..d9874ca --- /dev/null +++ b/margin_logs/step_0000552.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a924d499ce28517f90bfa8057f114f7696918092cb2871b5535925b3b9a4ac +size 384 diff --git a/margin_logs/step_0000553.npy b/margin_logs/step_0000553.npy new file mode 100644 index 0000000..911c40b --- /dev/null +++ b/margin_logs/step_0000553.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:593e4607f72b500b9a34ff187b8d81b546ac068757a9709a369c1fbdb4c50bdb +size 384 diff --git a/margin_logs/step_0000554.npy b/margin_logs/step_0000554.npy new file mode 100644 index 0000000..19849b0 --- /dev/null +++ b/margin_logs/step_0000554.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8a1321d573850233f817172da8dbb0ffe481557e3dd67a65656f5ecb5904132 +size 384 diff --git a/margin_logs/step_0000555.npy b/margin_logs/step_0000555.npy new file mode 100644 index 0000000..32068ba --- /dev/null +++ b/margin_logs/step_0000555.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82034d0e44d07b2197fb0df4af7c2316694afe3835b4dfb0e121a66810bc2e08 +size 384 diff --git a/margin_logs/step_0000556.npy b/margin_logs/step_0000556.npy new file mode 100644 index 0000000..c46aab4 --- /dev/null +++ b/margin_logs/step_0000556.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ac1bf3842cf20e9df2cfb390891f418b0b05926315031a76a6164dd2047559 +size 384 diff --git a/margin_logs/step_0000557.npy b/margin_logs/step_0000557.npy new file mode 100644 index 0000000..4cb00f6 --- /dev/null +++ b/margin_logs/step_0000557.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:690fbb47ab46bc4c11ce967fb7c94fc95914f4d85ca974e692cb02f2c993bf3b +size 384 diff --git a/margin_logs/step_0000558.npy b/margin_logs/step_0000558.npy new file mode 100644 index 0000000..9bf6619 --- /dev/null +++ b/margin_logs/step_0000558.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85dc8de5db8aadf7ca481c0b66bf75dbe9c284b2781a36ea44534b68d44f4e90 +size 384 diff --git a/margin_logs/step_0000559.npy b/margin_logs/step_0000559.npy new file mode 100644 index 0000000..d625b96 --- /dev/null +++ b/margin_logs/step_0000559.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6119f99cfa9163ab538f7d85e6cde81540f8a14f7afe990fb21b8d81ba4d3c22 +size 384 diff --git a/margin_logs/step_0000560.npy b/margin_logs/step_0000560.npy new file mode 100644 index 0000000..d0aae72 --- /dev/null +++ b/margin_logs/step_0000560.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fe2323d57d1211eeacf375efc8177fdf4994a61af64ba37f7d4999e767f579d +size 384 diff --git a/margin_logs/step_0000561.npy b/margin_logs/step_0000561.npy new file mode 100644 index 0000000..99e376b --- /dev/null +++ b/margin_logs/step_0000561.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:154ebc4fc84e4a204b7f4ead1eed1c636d7cfcd6d1455e9d9a71f0bf4826eecd +size 384 diff --git a/margin_logs/step_0000562.npy b/margin_logs/step_0000562.npy new file mode 100644 index 0000000..0ef2872 --- /dev/null +++ b/margin_logs/step_0000562.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ef7db66fb28879c748b050968eda774ebf22839fdf44ba51fc64dd18373e6b +size 384 diff --git a/margin_logs/step_0000563.npy b/margin_logs/step_0000563.npy new file mode 100644 index 0000000..21b8832 --- /dev/null +++ b/margin_logs/step_0000563.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4c298c4723f90342de15c58915d1c3cbf0b8aa015c1aba925724170b6268608 +size 384 diff --git a/margin_logs/step_0000564.npy b/margin_logs/step_0000564.npy new file mode 100644 index 0000000..9075832 --- /dev/null +++ b/margin_logs/step_0000564.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4497c57766d2eca0c36c6a199a600d765581d9a960198d7b5692deae16272db7 +size 384 diff --git a/margin_logs/step_0000565.npy b/margin_logs/step_0000565.npy new file mode 100644 index 0000000..2235370 --- /dev/null +++ b/margin_logs/step_0000565.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4adc8bce198100a18a7eae10f3b3b9310377f34043127d9a15f02e12eda3035 +size 384 diff --git a/margin_logs/step_0000566.npy b/margin_logs/step_0000566.npy new file mode 100644 index 0000000..767e8e3 --- /dev/null +++ b/margin_logs/step_0000566.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3048ed6a1df7bd8f9dc427e96a791236a75c375cc948ceadf2b4ab85d2b19bff +size 384 diff --git a/margin_logs/step_0000567.npy b/margin_logs/step_0000567.npy new file mode 100644 index 0000000..089491d --- /dev/null +++ b/margin_logs/step_0000567.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcbe13dcba6f5921b7ee9b5d5c407e24aaf1b26d399b9cf3e93c45b279351992 +size 384 diff --git a/margin_logs/step_0000568.npy b/margin_logs/step_0000568.npy new file mode 100644 index 0000000..0f24f22 --- /dev/null +++ b/margin_logs/step_0000568.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5202117a27db4c06eaad16bab0e7d0f2b3a8e55c77756332bf10c2d710bf88c6 +size 384 diff --git a/margin_logs/step_0000569.npy b/margin_logs/step_0000569.npy new file mode 100644 index 0000000..7627d3a --- /dev/null +++ b/margin_logs/step_0000569.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e918671f81db38dd875f00852cd44342f6a9259959c0415ec6358a40462a8135 +size 384 diff --git a/margin_logs/step_0000570.npy b/margin_logs/step_0000570.npy new file mode 100644 index 0000000..625f7dd --- /dev/null +++ b/margin_logs/step_0000570.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eff1a48a7f5ba419cb696bc1c548c1e4a1133d510ca143143ff02c7b197a15a8 +size 384 diff --git a/margin_logs/step_0000571.npy b/margin_logs/step_0000571.npy new file mode 100644 index 0000000..610033a --- /dev/null +++ b/margin_logs/step_0000571.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b360afe2794b2ab69ce56aafe32a870a7d8f5c63f5b065b1cbca433ea404490b +size 384 diff --git a/margin_logs/step_0000572.npy b/margin_logs/step_0000572.npy new file mode 100644 index 0000000..e23c908 --- /dev/null +++ b/margin_logs/step_0000572.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a388f558f5eeb8d95e16c9118870e2cc3b773ba27a6cf5004e20d90e52aede9 +size 384 diff --git a/margin_logs/step_0000573.npy b/margin_logs/step_0000573.npy new file mode 100644 index 0000000..4d44c57 --- /dev/null +++ b/margin_logs/step_0000573.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:295551b83e04edd68d741d13db60126eea3226ea384f3a25b069731abd7334d2 +size 384 diff --git a/margin_logs/step_0000574.npy b/margin_logs/step_0000574.npy new file mode 100644 index 0000000..da5e32c --- /dev/null +++ b/margin_logs/step_0000574.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:237fb87b7be58a996f6e1301e4fc73ba6342ca9c3601c7b6e172cd20f32581d7 +size 384 diff --git a/margin_logs/step_0000575.npy b/margin_logs/step_0000575.npy new file mode 100644 index 0000000..ab96d9d --- /dev/null +++ b/margin_logs/step_0000575.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebff3f7bd9080826b008c221e53234bbff1baa43695bb9a443a2f06cda6e3611 +size 384 diff --git a/margin_logs/step_0000576.npy b/margin_logs/step_0000576.npy new file mode 100644 index 0000000..32b2f5c --- /dev/null +++ b/margin_logs/step_0000576.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36b42827827b3687fbf0d4d1944924066f609385ebaf431c1856abf2c328d587 +size 384 diff --git a/margin_logs/step_0000577.npy b/margin_logs/step_0000577.npy new file mode 100644 index 0000000..479e991 --- /dev/null +++ b/margin_logs/step_0000577.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:254cb5e30b5e981b681e88b5451f23eb8e94c44aa337a3bf3a56fc222437f8d5 +size 384 diff --git a/margin_logs/step_0000578.npy b/margin_logs/step_0000578.npy new file mode 100644 index 0000000..b965cf2 --- /dev/null +++ b/margin_logs/step_0000578.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7bb62baf17514931e67d0a9970fe1aafcc28d24556c582352492fef43702e5f +size 384 diff --git a/margin_logs/step_0000579.npy b/margin_logs/step_0000579.npy new file mode 100644 index 0000000..75ae259 --- /dev/null +++ b/margin_logs/step_0000579.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b84dd2466393c81cfa9c37e9bb6139bfa1a74344a615d1ad145c3eda309133ac +size 384 diff --git a/margin_logs/step_0000580.npy b/margin_logs/step_0000580.npy new file mode 100644 index 0000000..3484eb2 --- /dev/null +++ b/margin_logs/step_0000580.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2f9c5ed8edcc7e03e39436d2980852ea894d5d27d82ea7f02a546cbc3d87c3a +size 384 diff --git a/margin_logs/step_0000581.npy b/margin_logs/step_0000581.npy new file mode 100644 index 0000000..62b2760 --- /dev/null +++ b/margin_logs/step_0000581.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4083dde896649d164460c8d51a520f09d390ba50752c5d51445f2f0ca0f32d90 +size 384 diff --git a/margin_logs/step_0000582.npy b/margin_logs/step_0000582.npy new file mode 100644 index 0000000..dccb0b4 --- /dev/null +++ b/margin_logs/step_0000582.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:854597c194d7994f844c27ee77b45ec323215953c20fdf85a4b348d8d0c05eae +size 384 diff --git a/margin_logs/step_0000583.npy b/margin_logs/step_0000583.npy new file mode 100644 index 0000000..f7201ae --- /dev/null +++ b/margin_logs/step_0000583.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b68edbcc7f8832b31d1a51f8648d7ae2a8450ba87f3deca71203b875a3ac4b81 +size 384 diff --git a/margin_logs/step_0000584.npy b/margin_logs/step_0000584.npy new file mode 100644 index 0000000..0d80353 --- /dev/null +++ b/margin_logs/step_0000584.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628a80443d217be71e02efb1ccc56c09b448d630d3635e91325d86170ce56133 +size 384 diff --git a/margin_logs/step_0000585.npy b/margin_logs/step_0000585.npy new file mode 100644 index 0000000..f70164e --- /dev/null +++ b/margin_logs/step_0000585.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a26aaaab916d483894097cd8ef3bfbc7c04e33513cfa81e4b1981078eb2424bc +size 384 diff --git a/margin_logs/step_0000586.npy b/margin_logs/step_0000586.npy new file mode 100644 index 0000000..2961d28 --- /dev/null +++ b/margin_logs/step_0000586.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a25d954beb6dd554ffa693ca9524291456f8a1afb326c6be0efa4b339ab201a +size 384 diff --git a/margin_logs/step_0000587.npy b/margin_logs/step_0000587.npy new file mode 100644 index 0000000..eb96695 --- /dev/null +++ b/margin_logs/step_0000587.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6dec3645823adf3474ac01c4a809c417f01a39012e52df883f78068b90b8bba +size 384 diff --git a/margin_logs/step_0000588.npy b/margin_logs/step_0000588.npy new file mode 100644 index 0000000..29b821a --- /dev/null +++ b/margin_logs/step_0000588.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f626b95032644820ee396c234166d7dda1faf787c4f2eeaa139b5c20afc8a6a +size 384 diff --git a/margin_logs/step_0000589.npy b/margin_logs/step_0000589.npy new file mode 100644 index 0000000..9f40c4b --- /dev/null +++ b/margin_logs/step_0000589.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36dcf78c617b93abd820f6fe2729ba50934d3e962168438cf0f7aff011edcf7e +size 384 diff --git a/margin_logs/step_0000590.npy b/margin_logs/step_0000590.npy new file mode 100644 index 0000000..54c5a94 --- /dev/null +++ b/margin_logs/step_0000590.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83f90863f98bd6e4e8c9a79624fddc8ea96e801e382a4adecabfa7d023231efb +size 384 diff --git a/margin_logs/step_0000591.npy b/margin_logs/step_0000591.npy new file mode 100644 index 0000000..922fd63 --- /dev/null +++ b/margin_logs/step_0000591.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:318ea6610ffeeab85ff155bfe27c92d14c00e85623a4143ea50b9dbe8db69d46 +size 384 diff --git a/margin_logs/step_0000592.npy b/margin_logs/step_0000592.npy new file mode 100644 index 0000000..1cdced9 --- /dev/null +++ b/margin_logs/step_0000592.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:774b2ae3bdaa28fcaa50eb8717b4a795d08b89aa6bb28c40d98f70349601a019 +size 384 diff --git a/margin_logs/step_0000593.npy b/margin_logs/step_0000593.npy new file mode 100644 index 0000000..df8b727 --- /dev/null +++ b/margin_logs/step_0000593.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:890b2945b81b51eedda0e492d83ea6d194ca9be68c75ced46b944d1bf6570e98 +size 384 diff --git a/margin_logs/step_0000594.npy b/margin_logs/step_0000594.npy new file mode 100644 index 0000000..6c3ad3d --- /dev/null +++ b/margin_logs/step_0000594.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b3d26532f54798535ebc027c3097a215e58b8079858fffc1b0b8595e4299a13 +size 384 diff --git a/margin_logs/step_0000595.npy b/margin_logs/step_0000595.npy new file mode 100644 index 0000000..fc70156 --- /dev/null +++ b/margin_logs/step_0000595.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:706733cc77128c61032423e103e7c54bedd50464c1a665da036a7b18746f261f +size 384 diff --git a/margin_logs/step_0000596.npy b/margin_logs/step_0000596.npy new file mode 100644 index 0000000..fe56442 --- /dev/null +++ b/margin_logs/step_0000596.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf1985a26d3eb2298cebc2297b0dc227bd2062c9742923f3b54918e79f10cc2 +size 384 diff --git a/margin_logs/step_0000597.npy b/margin_logs/step_0000597.npy new file mode 100644 index 0000000..3d675ab --- /dev/null +++ b/margin_logs/step_0000597.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4255bb6f0ef79d899defd82a9b6a857bb34a019c35ac756af95b9175c8f2ba8b +size 384 diff --git a/margin_logs/step_0000598.npy b/margin_logs/step_0000598.npy new file mode 100644 index 0000000..cdc682f --- /dev/null +++ b/margin_logs/step_0000598.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71452552c30507459f5e2c7ce3fc1d4ae656a43ac06da1024a368ffafa465464 +size 384 diff --git a/margin_logs/step_0000599.npy b/margin_logs/step_0000599.npy new file mode 100644 index 0000000..82d4bd7 --- /dev/null +++ b/margin_logs/step_0000599.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:956cc47e34ecefb93ee2112b5cee163b47d0bfa69d0154306c0725d05a864a3e +size 384 diff --git a/margin_logs/step_0000600.npy b/margin_logs/step_0000600.npy new file mode 100644 index 0000000..305f32f --- /dev/null +++ b/margin_logs/step_0000600.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b15fc4c32cf87af60c456b4085474aba8cedc9ebf8dcdd5c5c3b3d57b7aca33b +size 384 diff --git a/margin_logs/step_0000601.npy b/margin_logs/step_0000601.npy new file mode 100644 index 0000000..40af0c1 --- /dev/null +++ b/margin_logs/step_0000601.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5713ceaa3d2c6aad49119c1c6d88cac05af35c718277067ea542eca27215d8a +size 384 diff --git a/margin_logs/step_0000602.npy b/margin_logs/step_0000602.npy new file mode 100644 index 0000000..0f471e0 --- /dev/null +++ b/margin_logs/step_0000602.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5540fe426a23679765abb3e5e45747d90c904ebc6f55db47087962e579157fc0 +size 384 diff --git a/margin_logs/step_0000603.npy b/margin_logs/step_0000603.npy new file mode 100644 index 0000000..a19b850 --- /dev/null +++ b/margin_logs/step_0000603.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24c2421fff2ee0a331ff3a44ff6745d85c75f400d904eab49a65cdef388a5c91 +size 384 diff --git a/margin_logs/step_0000604.npy b/margin_logs/step_0000604.npy new file mode 100644 index 0000000..c235caf --- /dev/null +++ b/margin_logs/step_0000604.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a320fc298ba7b41fcbf92aea6c657bfcc7abb501b33bd68a7d50cccf89422d98 +size 384 diff --git a/margin_logs/step_0000605.npy b/margin_logs/step_0000605.npy new file mode 100644 index 0000000..f488b3b --- /dev/null +++ b/margin_logs/step_0000605.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd463a1f4b5f092e94acfb1ddff489059af6443794964eabb9ccd547d41f7231 +size 384 diff --git a/margin_logs/step_0000606.npy b/margin_logs/step_0000606.npy new file mode 100644 index 0000000..1f516b2 --- /dev/null +++ b/margin_logs/step_0000606.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea14dc267197ee375fea5fcfbf72a14ae2520a00a15f44852d9b310a00390706 +size 384 diff --git a/margin_logs/step_0000607.npy b/margin_logs/step_0000607.npy new file mode 100644 index 0000000..e71e205 --- /dev/null +++ b/margin_logs/step_0000607.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055b28fc1d345ce1cc91c987697cbd8f9fd51d4d8acf5e3cae7dea177d82e4f0 +size 384 diff --git a/margin_logs/step_0000608.npy b/margin_logs/step_0000608.npy new file mode 100644 index 0000000..b2c6439 --- /dev/null +++ b/margin_logs/step_0000608.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95a759866064cc259ad7e85173bd6084bed88f3e08f0e6ce42063d276ea025d6 +size 384 diff --git a/margin_logs/step_0000609.npy b/margin_logs/step_0000609.npy new file mode 100644 index 0000000..2af564d --- /dev/null +++ b/margin_logs/step_0000609.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de73d294e89c254308f16bf4831dc26f78c26892e7361f94ce0619bc15d3d54c +size 384 diff --git a/margin_logs/step_0000610.npy b/margin_logs/step_0000610.npy new file mode 100644 index 0000000..71117fa --- /dev/null +++ b/margin_logs/step_0000610.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e3b8b564edc1055c763ccafaf3a743177b9527df003b8daf8ec0ba460a91077 +size 384 diff --git a/margin_logs/step_0000611.npy b/margin_logs/step_0000611.npy new file mode 100644 index 0000000..76d7f64 --- /dev/null +++ b/margin_logs/step_0000611.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:792b1359c2a7260a6491f161766cddc10d44acddc3741923e65ef2bb11508271 +size 384 diff --git a/margin_logs/step_0000612.npy b/margin_logs/step_0000612.npy new file mode 100644 index 0000000..1fb1197 --- /dev/null +++ b/margin_logs/step_0000612.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4497aff816f72f6bd8c10cab7ef0030ca7f72cc4b9c4ca2a73db162bb5aa1f69 +size 384 diff --git a/margin_logs/step_0000613.npy b/margin_logs/step_0000613.npy new file mode 100644 index 0000000..f1a936c --- /dev/null +++ b/margin_logs/step_0000613.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d19c4f3af512e545212de864ef198d3d1edb02f4ff4f5f0b80cb78790bd9c07 +size 384 diff --git a/margin_logs/step_0000614.npy b/margin_logs/step_0000614.npy new file mode 100644 index 0000000..d7dc9dc --- /dev/null +++ b/margin_logs/step_0000614.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31d3259e90ffd2b5d2a97e08f9d45133d3ddb727ccfc518190ec7723b08c4f41 +size 384 diff --git a/margin_logs/step_0000615.npy b/margin_logs/step_0000615.npy new file mode 100644 index 0000000..842eabb --- /dev/null +++ b/margin_logs/step_0000615.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7713d3728b50cf97ca1689d73e9e9f54a878c17988fbee8581133b8412bbdfc0 +size 384 diff --git a/margin_logs/step_0000616.npy b/margin_logs/step_0000616.npy new file mode 100644 index 0000000..d301d88 --- /dev/null +++ b/margin_logs/step_0000616.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cab2dc8a569a0dce10d56f11ee3840de118fb6b129bdf8aa8dd7a4e0d4731154 +size 384 diff --git a/margin_logs/step_0000617.npy b/margin_logs/step_0000617.npy new file mode 100644 index 0000000..e732cde --- /dev/null +++ b/margin_logs/step_0000617.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:914564c29ea57f1829648f9038f1bf7a9db57c60c840d833540151cde59360c6 +size 384 diff --git a/margin_logs/step_0000618.npy b/margin_logs/step_0000618.npy new file mode 100644 index 0000000..8eaa163 --- /dev/null +++ b/margin_logs/step_0000618.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a82d03c04002973ca2f4483ca46acfb01313e68e8446bb856d62e3bcda70d38b +size 384 diff --git a/margin_logs/step_0000619.npy b/margin_logs/step_0000619.npy new file mode 100644 index 0000000..c3247d9 --- /dev/null +++ b/margin_logs/step_0000619.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d060ecc53f9f20b83c478761b0a617f02f54e9ba54c362a9cfe52a283f4faa73 +size 384 diff --git a/margin_logs/step_0000620.npy b/margin_logs/step_0000620.npy new file mode 100644 index 0000000..822895a --- /dev/null +++ b/margin_logs/step_0000620.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4f47270b7652c359ef884618e84b812169dd2a0cc42d6fca2355520c6033e6c +size 384 diff --git a/margin_logs/step_0000621.npy b/margin_logs/step_0000621.npy new file mode 100644 index 0000000..25025f9 --- /dev/null +++ b/margin_logs/step_0000621.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:247a0f706ff0eed4f8367f18c16c7fd31f3a1360e6d040d80453decd8a653124 +size 384 diff --git a/margin_logs/step_0000622.npy b/margin_logs/step_0000622.npy new file mode 100644 index 0000000..7ffe231 --- /dev/null +++ b/margin_logs/step_0000622.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c9b9f419c445f24c40afb7356e29468fce493956c0e39bf1123af6bc9bce042 +size 384 diff --git a/margin_logs/step_0000623.npy b/margin_logs/step_0000623.npy new file mode 100644 index 0000000..388d54b --- /dev/null +++ b/margin_logs/step_0000623.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfa44ca15e5dae49882b3613c2a025452fb2ee0968d3eb0b5964373b2275ef1b +size 384 diff --git a/margin_logs/step_0000624.npy b/margin_logs/step_0000624.npy new file mode 100644 index 0000000..39ab6f8 --- /dev/null +++ b/margin_logs/step_0000624.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:366f55f6a6d55051d0a81bcd37b2743b4cf48d97633990554ee28c9184192c68 +size 384 diff --git a/margin_logs/step_0000625.npy b/margin_logs/step_0000625.npy new file mode 100644 index 0000000..463f65f --- /dev/null +++ b/margin_logs/step_0000625.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5243283f94d39e54704cebb09c9e71f43033fe4f3d9e6bc2c9edecb0d61478a7 +size 384 diff --git a/margin_logs/step_0000626.npy b/margin_logs/step_0000626.npy new file mode 100644 index 0000000..6d64f39 --- /dev/null +++ b/margin_logs/step_0000626.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2a0eaa98051911ae1b20cab866871ef542a387ccf0c2b9a675b02493242fa3e +size 384 diff --git a/margin_logs/step_0000627.npy b/margin_logs/step_0000627.npy new file mode 100644 index 0000000..02ab1c4 --- /dev/null +++ b/margin_logs/step_0000627.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c67a627f755b268bfd3235f5050eff88cfb14042d87ea6da6fcf4b1ac296444 +size 384 diff --git a/margin_logs/step_0000628.npy b/margin_logs/step_0000628.npy new file mode 100644 index 0000000..7febfab --- /dev/null +++ b/margin_logs/step_0000628.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99bc2872d06b73cd85831db205679a9f04051477701961b802b6a69ae6b9d1ac +size 384 diff --git a/margin_logs/step_0000629.npy b/margin_logs/step_0000629.npy new file mode 100644 index 0000000..f703db9 --- /dev/null +++ b/margin_logs/step_0000629.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d5d8e4445e9d16f966ec76659cf2d1ff3ca5251ac0af458e8b7ea31b9ffbdec +size 384 diff --git a/margin_logs/step_0000630.npy b/margin_logs/step_0000630.npy new file mode 100644 index 0000000..76c6d0e --- /dev/null +++ b/margin_logs/step_0000630.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8143e936a2b3fc61aeb0860dff9b684a8ebdd94dffcd20c7b7e163392af66b4e +size 384 diff --git a/margin_logs/step_0000631.npy b/margin_logs/step_0000631.npy new file mode 100644 index 0000000..0b6e43b --- /dev/null +++ b/margin_logs/step_0000631.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2122194c492a23addd3ce7e15ca0589c11ef79947a1c78156cab915792a6ebf0 +size 384 diff --git a/margin_logs/step_0000632.npy b/margin_logs/step_0000632.npy new file mode 100644 index 0000000..308cea8 --- /dev/null +++ b/margin_logs/step_0000632.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad755c0607a6a1fa7206c95ab87b3afcd90c14df6e70deb8da658e6877cb52d +size 384 diff --git a/margin_logs/step_0000633.npy b/margin_logs/step_0000633.npy new file mode 100644 index 0000000..ac25fd2 --- /dev/null +++ b/margin_logs/step_0000633.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c205e53ffe9dada67031b932ff466cb00d56de0896ffb39fb35f9fec6310faf8 +size 384 diff --git a/margin_logs/step_0000634.npy b/margin_logs/step_0000634.npy new file mode 100644 index 0000000..6e5dba4 --- /dev/null +++ b/margin_logs/step_0000634.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b562f44dec65fb307c7a969145c6b014c16c86f5593806ad35cc31da04541680 +size 384 diff --git a/margin_logs/step_0000635.npy b/margin_logs/step_0000635.npy new file mode 100644 index 0000000..9e6b410 --- /dev/null +++ b/margin_logs/step_0000635.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e942ff4ce7720d8a9598801ec725450ba9f6dc18a4d05be21bd147d7db9cd834 +size 384 diff --git a/margin_logs/step_0000636.npy b/margin_logs/step_0000636.npy new file mode 100644 index 0000000..fd267c9 --- /dev/null +++ b/margin_logs/step_0000636.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa291ed197c3f7d6c23e7c9d571ac34e76afe3aa5ed21cbda584bea92571307c +size 384 diff --git a/margin_logs/step_0000637.npy b/margin_logs/step_0000637.npy new file mode 100644 index 0000000..67bc10d --- /dev/null +++ b/margin_logs/step_0000637.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21e8813e2fa6b2b3c6de821e1c1fe437c976c431669524b913b8084228757215 +size 384 diff --git a/margin_logs/step_0000638.npy b/margin_logs/step_0000638.npy new file mode 100644 index 0000000..281f7cd --- /dev/null +++ b/margin_logs/step_0000638.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a785d7a317f4fe67249ab4d34a9d64cef49090f7ff5cf036da83b169e96e6dee +size 384 diff --git a/margin_logs/step_0000639.npy b/margin_logs/step_0000639.npy new file mode 100644 index 0000000..6414335 --- /dev/null +++ b/margin_logs/step_0000639.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9038dfbc56c7affd0a797bd025ee5ffc618e9e39b257719984808ed03f61920d +size 384 diff --git a/margin_logs/step_0000640.npy b/margin_logs/step_0000640.npy new file mode 100644 index 0000000..fee9994 --- /dev/null +++ b/margin_logs/step_0000640.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b70a04b22229f0c2ff794a39f589c46480c615ff074be124dbcdab34fb46d9af +size 384 diff --git a/margin_logs/step_0000641.npy b/margin_logs/step_0000641.npy new file mode 100644 index 0000000..df88002 --- /dev/null +++ b/margin_logs/step_0000641.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bff4a3665e4640a85ce281defae2d095fbf2ee75e9601571973f97593a17188 +size 384 diff --git a/margin_logs/step_0000642.npy b/margin_logs/step_0000642.npy new file mode 100644 index 0000000..bbefdfc --- /dev/null +++ b/margin_logs/step_0000642.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4693dc61f6faf97275180303b6df7b11c3adc82b6cd00bb63724c4087a6b2032 +size 384 diff --git a/margin_logs/step_0000643.npy b/margin_logs/step_0000643.npy new file mode 100644 index 0000000..7532812 --- /dev/null +++ b/margin_logs/step_0000643.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0374707ede73f398f581c30f5def95c7a7ef97b06c97bc26546c0d8b3838563 +size 384 diff --git a/margin_logs/step_0000644.npy b/margin_logs/step_0000644.npy new file mode 100644 index 0000000..7c017e5 --- /dev/null +++ b/margin_logs/step_0000644.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6001c7ee127d141a18f224cbfc1d1f1fd15149b7d15f4c3dcb8df64d71f712a +size 384 diff --git a/margin_logs/step_0000645.npy b/margin_logs/step_0000645.npy new file mode 100644 index 0000000..ae8a837 --- /dev/null +++ b/margin_logs/step_0000645.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b382d13e933ac68c3e28e4be41ee8399717fff3400b518394bb05e15acc1b8fb +size 384 diff --git a/margin_logs/step_0000646.npy b/margin_logs/step_0000646.npy new file mode 100644 index 0000000..2747086 --- /dev/null +++ b/margin_logs/step_0000646.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4cc5532ed2caf5c253c53e7e2d6ef4978ebde330df70d98600e6564574e18b +size 384 diff --git a/margin_logs/step_0000647.npy b/margin_logs/step_0000647.npy new file mode 100644 index 0000000..c5f97f1 --- /dev/null +++ b/margin_logs/step_0000647.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5093a620c534c952d3c652b386a08b74a0bd67e09ed581056d7c9fd0597c280e +size 384 diff --git a/margin_logs/step_0000648.npy b/margin_logs/step_0000648.npy new file mode 100644 index 0000000..9e4953e --- /dev/null +++ b/margin_logs/step_0000648.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6bbcda08085e1ece824ccadf36a3ac720282e0a34f325505f6b76b5821285a9 +size 384 diff --git a/margin_logs/step_0000649.npy b/margin_logs/step_0000649.npy new file mode 100644 index 0000000..9e41c3d --- /dev/null +++ b/margin_logs/step_0000649.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49cf037e74b84c2a5488c5843d2cfdff9a7af3f9520935b31116fbaf9d66062a +size 384 diff --git a/margin_logs/step_0000650.npy b/margin_logs/step_0000650.npy new file mode 100644 index 0000000..8cec81c --- /dev/null +++ b/margin_logs/step_0000650.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa9aa9920701e4d605afb7ccf24a42361ea44419d674ca1cbfd5b378b60a7a5c +size 384 diff --git a/margin_logs/step_0000651.npy b/margin_logs/step_0000651.npy new file mode 100644 index 0000000..f82639c --- /dev/null +++ b/margin_logs/step_0000651.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f04f71338a6a895474e8ca71ac02e0fc798737f30e4728d6fa6069c3a306356 +size 384 diff --git a/margin_logs/step_0000652.npy b/margin_logs/step_0000652.npy new file mode 100644 index 0000000..653b56f --- /dev/null +++ b/margin_logs/step_0000652.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827e956a1bef7c5d376d30bb362b563f479c5c6c0a860036614ffe20dbd7109b +size 384 diff --git a/margin_logs/step_0000653.npy b/margin_logs/step_0000653.npy new file mode 100644 index 0000000..721c157 --- /dev/null +++ b/margin_logs/step_0000653.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9da6f114c2afb7612e7f25314608f13e1ba42fb122aba841be8c8f1d3bee625 +size 384 diff --git a/margin_logs/step_0000654.npy b/margin_logs/step_0000654.npy new file mode 100644 index 0000000..7b93298 --- /dev/null +++ b/margin_logs/step_0000654.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cedb4e83e87229c0afd68a498acdfd34785841c0316755d20bbfdc2df16496bc +size 384 diff --git a/margin_logs/step_0000655.npy b/margin_logs/step_0000655.npy new file mode 100644 index 0000000..b0f4b21 --- /dev/null +++ b/margin_logs/step_0000655.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31c79009d036a80a9718fe4419c1330102d7e11c1b6cd24727b66affd65a507e +size 384 diff --git a/margin_logs/step_0000656.npy b/margin_logs/step_0000656.npy new file mode 100644 index 0000000..d059b82 --- /dev/null +++ b/margin_logs/step_0000656.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54d894a6bb55480e198725d8e9c01e22627a860ff5a13788fe11acc2bdaa5e7d +size 384 diff --git a/margin_logs/step_0000657.npy b/margin_logs/step_0000657.npy new file mode 100644 index 0000000..36b4945 --- /dev/null +++ b/margin_logs/step_0000657.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0302dbb711e03c756748ed267065230290b9f74b9e4eafc41ede1378a1d8a40f +size 384 diff --git a/margin_logs/step_0000658.npy b/margin_logs/step_0000658.npy new file mode 100644 index 0000000..aff7e0e --- /dev/null +++ b/margin_logs/step_0000658.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aef920a182315daa2e4bb2106cf38e73c3442fac0e2117b1dac32291bb9218f +size 384 diff --git a/margin_logs/step_0000659.npy b/margin_logs/step_0000659.npy new file mode 100644 index 0000000..653a3d2 --- /dev/null +++ b/margin_logs/step_0000659.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4df7066f8913988531b94dd9e2d58d282e9c7aca997ebf46f87d5b2ef79ac202 +size 384 diff --git a/margin_logs/step_0000660.npy b/margin_logs/step_0000660.npy new file mode 100644 index 0000000..91bebb9 --- /dev/null +++ b/margin_logs/step_0000660.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01efbb6f5fb3d485d9bd3c63de889a3e638f45551ec3234687cc798ddff5e606 +size 384 diff --git a/margin_logs/step_0000661.npy b/margin_logs/step_0000661.npy new file mode 100644 index 0000000..26f2008 --- /dev/null +++ b/margin_logs/step_0000661.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cebabda07bf97e263b74c72e4b88b97eb6f50f2df8570a0a1724effd77c68c2b +size 384 diff --git a/margin_logs/step_0000662.npy b/margin_logs/step_0000662.npy new file mode 100644 index 0000000..ce53959 --- /dev/null +++ b/margin_logs/step_0000662.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a4cc1c34170183102cab5a802e4fc7d9610cc4a6f3412807546f7533a65474a +size 384 diff --git a/margin_logs/step_0000663.npy b/margin_logs/step_0000663.npy new file mode 100644 index 0000000..1f6f421 --- /dev/null +++ b/margin_logs/step_0000663.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1949abf7af7da8ff11befc564750b942b57a5c3a4b2625e8c7e2787bc3c606a +size 384 diff --git a/margin_logs/step_0000664.npy b/margin_logs/step_0000664.npy new file mode 100644 index 0000000..8502634 --- /dev/null +++ b/margin_logs/step_0000664.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7d41a9558f115e97e387e4e83bbe94dc5de354422d547f7e2b8d1baf6e08190 +size 384 diff --git a/margin_logs/step_0000665.npy b/margin_logs/step_0000665.npy new file mode 100644 index 0000000..47e4b89 --- /dev/null +++ b/margin_logs/step_0000665.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b7b6eeef89f5afdfd4fa531760461f6502fde1725a913651f0f04bb421f7e47 +size 384 diff --git a/margin_logs/step_0000666.npy b/margin_logs/step_0000666.npy new file mode 100644 index 0000000..ce00705 --- /dev/null +++ b/margin_logs/step_0000666.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bf8c7957bcfc5edc54f56b0cabec495fd467cab247f89e673f92dc43e29b062 +size 384 diff --git a/margin_logs/step_0000667.npy b/margin_logs/step_0000667.npy new file mode 100644 index 0000000..23144b1 --- /dev/null +++ b/margin_logs/step_0000667.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935b9712e80f81156887ef3e9d6732709297141407520170d8ec138c00c1fffa +size 384 diff --git a/margin_logs/step_0000668.npy b/margin_logs/step_0000668.npy new file mode 100644 index 0000000..45e45c4 --- /dev/null +++ b/margin_logs/step_0000668.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e38107891b9bdfe9a0ee2ae2987df337fc21349158f093550bcf52ac7dd08c0f +size 384 diff --git a/margin_logs/step_0000669.npy b/margin_logs/step_0000669.npy new file mode 100644 index 0000000..5e3f571 --- /dev/null +++ b/margin_logs/step_0000669.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6df95722a394c25f225766adbb6bda215dde6446fbd28f358458d526198ba60a +size 384 diff --git a/margin_logs/step_0000670.npy b/margin_logs/step_0000670.npy new file mode 100644 index 0000000..5894bb0 --- /dev/null +++ b/margin_logs/step_0000670.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ac6f3463d6d529ee721c7605895bdff55c5fce3837f48f91cc01635e4082068 +size 384 diff --git a/margin_logs/step_0000671.npy b/margin_logs/step_0000671.npy new file mode 100644 index 0000000..c3387c6 --- /dev/null +++ b/margin_logs/step_0000671.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:823390a42bd11332942ed213485b09b2a86f44e68edcf90dc0e24ad067b2f876 +size 384 diff --git a/margin_logs/step_0000672.npy b/margin_logs/step_0000672.npy new file mode 100644 index 0000000..83a2f54 --- /dev/null +++ b/margin_logs/step_0000672.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aacb10d55d527af0f9790e35637f1c2f1359238deb543a39248fdfd4508ef3e +size 384 diff --git a/margin_logs/step_0000673.npy b/margin_logs/step_0000673.npy new file mode 100644 index 0000000..88ace47 --- /dev/null +++ b/margin_logs/step_0000673.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6262affc441b5dc51e07b3879b52c041ffab43197e733bf16b079d7c34c0dafb +size 384 diff --git a/margin_logs/step_0000674.npy b/margin_logs/step_0000674.npy new file mode 100644 index 0000000..009940b --- /dev/null +++ b/margin_logs/step_0000674.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c19ce67c79ff57af68d8546723630d8af6cc0522ebddf2c889e893337e666bd +size 384 diff --git a/margin_logs/step_0000675.npy b/margin_logs/step_0000675.npy new file mode 100644 index 0000000..ab65d6a --- /dev/null +++ b/margin_logs/step_0000675.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3a8bca1cee20e39b1d401b893b1a8c55c96c098c28ee8f98158478e36c90e5d +size 384 diff --git a/margin_logs/step_0000676.npy b/margin_logs/step_0000676.npy new file mode 100644 index 0000000..69d6289 --- /dev/null +++ b/margin_logs/step_0000676.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72c76181059ac5485b4b812a8e331f21c852c8c0dee17688b171139184cc93ae +size 384 diff --git a/margin_logs/step_0000677.npy b/margin_logs/step_0000677.npy new file mode 100644 index 0000000..f091bff --- /dev/null +++ b/margin_logs/step_0000677.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6a32e411bdce3af34ef36ed38f1a772548c8667f74c074c9b99d234ba75a463 +size 384 diff --git a/margin_logs/step_0000678.npy b/margin_logs/step_0000678.npy new file mode 100644 index 0000000..1fbd211 --- /dev/null +++ b/margin_logs/step_0000678.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ccafcf7f425287a2be6369a289332835f63fd3f51de58c0916bfe059e553ef1 +size 384 diff --git a/margin_logs/step_0000679.npy b/margin_logs/step_0000679.npy new file mode 100644 index 0000000..914ed5d --- /dev/null +++ b/margin_logs/step_0000679.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0165eadd4eec7658f9a2488d995a59d0f8a45e985f6dd257617a0f12eb76b291 +size 384 diff --git a/margin_logs/step_0000680.npy b/margin_logs/step_0000680.npy new file mode 100644 index 0000000..c557e75 --- /dev/null +++ b/margin_logs/step_0000680.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de7e99197579b0f4c905b7c91233c9a73193f6d9e258b5a88640b8aaddae8db6 +size 384 diff --git a/margin_logs/step_0000681.npy b/margin_logs/step_0000681.npy new file mode 100644 index 0000000..1836012 --- /dev/null +++ b/margin_logs/step_0000681.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11937457d1d7b744ae5214a5998d9f09a9a5f72c3503eb3aa41a048ebb4e4589 +size 384 diff --git a/model-00001-of-00007.safetensors b/model-00001-of-00007.safetensors new file mode 100644 index 0000000..c493b77 --- /dev/null +++ b/model-00001-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c6625705d16762198540d301d9a7eda7d73cda4a88336cf8c92a62c4b50ef5b +size 4886466168 diff --git a/model-00002-of-00007.safetensors b/model-00002-of-00007.safetensors new file mode 100644 index 0000000..d8597aa --- /dev/null +++ b/model-00002-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ef90bcf9ae59c07e339e681d76689ca8f4cc2b3a02a5b86369e93e792ff5a2 +size 4832007448 diff --git a/model-00003-of-00007.safetensors b/model-00003-of-00007.safetensors new file mode 100644 index 0000000..c9c55e3 --- /dev/null +++ b/model-00003-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd327f00b5c41b283336d1400406e149ea4223da54f36308cb269ea8dbbd2f65 +size 4999813112 diff --git a/model-00004-of-00007.safetensors b/model-00004-of-00007.safetensors new file mode 100644 index 0000000..f51cca6 --- /dev/null +++ b/model-00004-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac2e00e872222bb882e948cc03ec728e8d287d8cb54bff91b679c1aedfd1819d +size 4999813128 diff --git a/model-00005-of-00007.safetensors b/model-00005-of-00007.safetensors new file mode 100644 index 0000000..fdfb3fd --- /dev/null +++ b/model-00005-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e845031eac63c87c13bdda34dd5cf9f454817a73591a4e4a540fabf136ca0fe +size 4832007496 diff --git a/model-00006-of-00007.safetensors b/model-00006-of-00007.safetensors new file mode 100644 index 0000000..6f24bb6 --- /dev/null +++ b/model-00006-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ae221236343a40f55c6ed42e1a7c3aafb7ee3588dc6ab1156eae7800d946cc +size 4999813120 diff --git a/model-00007-of-00007.safetensors b/model-00007-of-00007.safetensors new file mode 100644 index 0000000..42548a0 --- /dev/null +++ b/model-00007-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9116b74a4b06b78004a21b7bf57523fb3d3f0702b7db7fa0b80c2cc38480ceab +size 2571158184 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..0985084 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 32121044992 + }, + "weight_map": { + "lm_head.weight": "model-00007-of-00007.safetensors", + "model.embed_tokens.weight": "model-00001-of-00007.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.norm.weight": "model-00007-of-00007.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..8c6916a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train.log b/train.log new file mode 100644 index 0000000..aa6f46e --- /dev/null +++ b/train.log @@ -0,0 +1,1160 @@ +2026-04-29 16:12:12 - INFO - __main__ - Model parameters ModelArguments(base_model_revision=None, model_name_or_path='/workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200', model_revision='main', model_code_revision=None, torch_dtype='bfloat16', tokenizer_name_or_path=None, trust_remote_code=False, attn_implementation='flash_attention_2', use_peft=False, lora_r=16, lora_alpha=32, lora_dropout=0.05, lora_target_modules=None, lora_modules_to_save=None, load_in_8bit=False, load_in_4bit=False, bnb_4bit_quant_type='nf4', use_bnb_nested_quant=False, bnb_4bit_quant_storage='uint8') +2026-04-29 16:12:12 - INFO - __main__ - Data parameters DataArguments(chat_template=None, dataset_mixer={'Anthropic/hh-rlhf': 1.0}, text_column='text', dataset_splits=['train'], dataset_configs=['helpful-base'], dataset_dir=None, preprocessing_num_workers=12, use_persistent_hf_cache=True, hf_cache_dir='/workspace/dynamic-dpo-v4/hf/datasets', truncation_side=None, auto_insert_empty_system_msg=True, disable_thinking=False, preprocessing_log_samples=0, preprocessing_log_dir=None) +2026-04-29 16:12:12 - INFO - __main__ - Training/evaluation parameters NewDPOConfig( +_n_gpu=1, +accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False}, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +average_tokens_across_devices=False, +batch_eval_metrics=False, +beta=0.3, +bf16=True, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=True, +dataloader_num_workers=0, +dataloader_persistent_workers=False, +dataloader_pin_memory=True, +dataloader_prefetch_factor=None, +dataset_num_proc=12, +ddp_backend=None, +ddp_broadcast_buffers=None, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +ddp_timeout=1800, +debug=[], +deepspeed=None, +disable_dropout=True, +disable_tqdm=False, +do_eval=False, +do_predict=False, +do_train=False, +eta=0.1, +eval_accumulation_steps=None, +eval_delay=0, +eval_do_concat_batches=True, +eval_on_start=False, +eval_steps=200, +eval_strategy=IntervalStrategy.NO, +eval_use_gather_object=False, +f_alpha_divergence_coef=1.0, +f_divergence_type=reverse_kl, +force_use_ref_model=False, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +generate_during_eval=False, +gradient_accumulation_steps=2, +gradient_checkpointing=True, +gradient_checkpointing_kwargs={'use_reentrant': False}, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_always_push=False, +hub_margin_dataset_id=None, +hub_model_id=W-61/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449, +hub_model_revision=main, +hub_private_repo=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +include_for_metrics=[], +include_inputs_for_metrics=False, +include_num_input_tokens_seen=False, +include_tokens_per_second=False, +is_encoder_decoder=None, +jit_mode_eval=False, +label_names=None, +label_pad_token_id=-100, +label_smoothing=0.0, +label_smoothing_factor=0.0, +learning_rate=5e-07, +length_column_name=length, +load_best_model_at_end=False, +local_rank=0, +log_level=info, +log_level_replica=warning, +log_on_each_node=True, +logging_dir=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/runs/Apr29_16-12-11_bc4ce3cd7c4e, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=1, +logging_strategy=IntervalStrategy.STEPS, +loss_type=sigmoid, +lr_scheduler_kwargs={}, +lr_scheduler_type=SchedulerType.COSINE, +margin_dataset_private=None, +margin_dataset_split=train, +margin_log_path=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs, +margin_log_steps=1, +margin_save_full=True, +max_grad_norm=1.0, +max_length=512, +max_prompt_length=256, +max_steps=-1, +max_target_length=None, +metric_for_best_model=None, +model_adapter_name=None, +model_init_kwargs=None, +mp_parameters=, +neftune_noise_alpha=None, +no_cuda=False, +non_finite_logits_handling=error, +num_train_epochs=1, +optim=OptimizerNames.ADAMW_TORCH, +optim_args=None, +optim_target_modules=None, +output_dir=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449, +overwrite_output_dir=False, +padding_value=None, +past_index=-1, +per_device_eval_batch_size=8, +per_device_train_batch_size=8, +post_tokenization_log_dir=None, +post_tokenization_log_samples=0, +precompute_ref_batch_size=None, +precompute_ref_eval_batch_size=None, +precompute_ref_log_probs=False, +prediction_loss_only=False, +push_margin_dataset=False, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +q_target=0.45, +ray_scope=last, +ref_adapter_name=None, +ref_model_init_kwargs=None, +ref_model_mixup_alpha=0.9, +ref_model_sync_steps=64, +reference_free=False, +remove_unused_columns=False, +report_to=['wandb'], +require_explicit_ref_model=True, +restore_callback_states_from_checkpoint=False, +resume_from_checkpoint=None, +reuse_tokenized_dataset=True, +rpo_alpha=None, +run_name=llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449, +s_star=0.4, +save_hf_model_artifacts=True, +save_on_each_node=False, +save_only_model=False, +save_safetensors=True, +save_steps=50, +save_strategy=SaveStrategy.NO, +save_total_limit=2, +seed=42, +sft_weight=0.0, +skip_memory_metrics=True, +sync_ref_model=False, +tf32=None, +tokenization_batch_size=128, +tokenization_mode=online, +tokenized_dataset_cache_dir=/workspace/dynamic-dpo-v4/tokenized_preferences, +torch_compile=False, +torch_compile_backend=None, +torch_compile_mode=None, +torch_empty_cache_steps=None, +torchdynamo=None, +tp_size=0, +tpu_metrics_debug=False, +tpu_num_cores=None, +trainer_type=new_dpo, +truncation_mode=keep_end, +use_cpu=False, +use_ipex=False, +use_legacy_prediction_loop=False, +use_liger_kernel=False, +use_mps_device=False, +wandb_project=llama3-hh-new-dpo-multi-beta-sweep, +warmup_ratio=0.1, +warmup_steps=0, +weight_decay=0.0, +) +2026-04-29 16:12:12 - INFO - __main__ - Using W&B project from training args: llama3-hh-new-dpo-multi-beta-sweep +wandb: Currently logged in as: can-not-fand (can-not-fand-northeastern-university). Use `wandb login --relogin` to force relogin +wandb: wandb version 0.26.1 is available! To upgrade, please run: +wandb: $ pip install wandb --upgrade +wandb: Tracking run with wandb version 0.17.5 +wandb: Run data is saved locally in /workspace/dynamic-dpo-v4/wandb/wandb/run-20260429_161214-5ilq5gld +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449 +wandb: ⭐️ View project at https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep +wandb: 🚀 View run at https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep/runs/5ilq5gld +2026-04-29 16:12:16 - INFO - __main__ - New-DPO parameters: beta=0.3, q_target=0.45, s_star=0.4, eta=0.1 +2026-04-29 16:12:16 - INFO - __main__ - Using persistent HF datasets cache at /workspace/dynamic-dpo-v4/hf/datasets + Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( +[WARNING|logging.py:328] 2026-04-29 16:12:21,516 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( + Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. +[WARNING|logging.py:328] 2026-04-29 16:12:21,621 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. + Normalizing raw HH preferences (train): 18%|█▊ | 8000/43598 [00:00<00:04, 8346.88 examples/s] Normalizing raw HH preferences (train): 21%|██ | 9150/43598 [00:00<00:03, 9143.55 examples/s] Normalizing raw HH preferences (train): 24%|██▎ | 10317/43598 [00:01<00:03, 9811.78 examples/s] Normalizing raw HH preferences (train): 26%|██▋ | 11479/43598 [00:01<00:03, 10307.45 examples/s] Normalizing raw HH preferences (train): 29%|██▉ | 12655/43598 [00:01<00:02, 10703.82 examples/s] Normalizing raw HH preferences (train): 32%|███▏ | 13820/43598 [00:01<00:02, 10970.92 examples/s] Normalizing raw HH preferences (train): 34%|███▍ | 14978/43598 [00:01<00:02, 11144.66 examples/s] Normalizing raw HH preferences (train): 38%|███▊ | 16678/43598 [00:01<00:02, 11214.61 examples/s] Normalizing raw HH preferences (train): 41%|████ | 17823/43598 [00:01<00:02, 11275.80 examples/s] Normalizing raw HH preferences (train): 45%|████▍ | 19469/43598 [00:01<00:02, 11163.07 examples/s] Normalizing raw HH preferences (train): 47%|████▋ | 20647/43598 [00:01<00:02, 11273.15 examples/s] Normalizing raw HH preferences (train): 50%|█████ | 21800/43598 [00:02<00:01, 11339.49 examples/s] Normalizing raw HH preferences (train): 54%|█████▍ | 23452/43598 [00:02<00:01, 11215.53 examples/s] Normalizing raw HH preferences (train): 57%|█████▋ | 24643/43598 [00:02<00:01, 11316.96 examples/s] Normalizing raw HH preferences (train): 59%|█████▉ | 25787/43598 [00:02<00:01, 11346.30 examples/s] Normalizing raw HH preferences (train): 62%|██████▏ | 26930/43598 [00:02<00:01, 11368.57 examples/s] Normalizing raw HH preferences (train): 66%|██████▌ | 28651/43598 [00:02<00:01, 11304.20 examples/s] Normalizing raw HH preferences (train): 68%|██████▊ | 29829/43598 [00:02<00:01, 11423.49 examples/s] Normalizing raw HH preferences (train): 72%|███████▏ | 31488/43598 [00:02<00:01, 11290.16 examples/s] Normalizing raw HH preferences (train): 75%|███████▍ | 32659/43598 [00:03<00:00, 11392.31 examples/s] Normalizing raw HH preferences (train): 78%|███████▊ | 33809/43598 [00:03<00:00, 11418.77 examples/s] Normalizing raw HH preferences (train): 81%|████████▏ | 35458/43598 [00:03<00:00, 11260.52 examples/s] Normalizing raw HH preferences (train): 84%|████████▍ | 36655/43598 [00:03<00:00, 11342.05 examples/s] Normalizing raw HH preferences (train): 87%|████████▋ | 37805/43598 [00:03<00:00, 11382.60 examples/s] Normalizing raw HH preferences (train): 90%|█████████ | 39451/43598 [00:03<00:00, 11229.91 examples/s] Normalizing raw HH preferences (train): 93%|█████████▎| 40654/43598 [00:03<00:00, 11331.37 examples/s] Normalizing raw HH preferences (train): 96%|█████████▌| 41812/43598 [00:03<00:00, 11394.69 examples/s] Normalizing raw HH preferences (train): 100%|█████████▉| 43473/43598 [00:03<00:00, 11274.15 examples/s] Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10860.71 examples/s] +2026-04-29 16:12:24 - INFO - __main__ - Training on the following splits: ['train : 43598'] +[INFO|tokenization_utils_base.py:2058] 2026-04-29 16:12:24,970 >> loading file tokenizer.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 16:12:24,970 >> loading file tokenizer.model +[INFO|tokenization_utils_base.py:2058] 2026-04-29 16:12:24,970 >> loading file added_tokens.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 16:12:24,970 >> loading file special_tokens_map.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 16:12:24,970 >> loading file tokenizer_config.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 16:12:24,970 >> loading file chat_template.jinja +[INFO|tokenization_utils_base.py:2323] 2026-04-29 16:12:25,361 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. +2026-04-29 16:12:25 - INFO - __main__ - Processed train sample 41905: + +Prompt: +<|begin_of_text|><|start_header_id|>user<|end_header_id|> + +What is the best way to clean my refrigerator?<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +Well, I have a few ideas, but the best way to know what works best is to run some tests and then analyze the data to figure out which technique works best.<|eot_id|><|start_header_id|>user<|end_header_id|> + +What are your ideas?<|eot_id|> + +Chosen: +<|start_header_id|>assistant<|end_header_id|> + +Some of my ideas include wiping down the interior with a rag, using bleach and water to wipe down the door, emptying the contents of the fridge to scrub the shelves with a clean cloth, cleaning the interior of the fridge with vinegar and water, and using a vacuum cleaner to clean the interior.<|eot_id|> + +Rejected: +<|start_header_id|>assistant<|end_header_id|> + +I was thinking that you could try different types of soap, which could be a variation of “soap,” like antibacterial soap. I was also thinking that you could try using different tools, like a toothbrush, to clean the inside. Or I was thinking that you could try different ways to get the gunk out, like using a squeegee, or using a paper towel. I was also thinking you could try other types of cleaning, like vacuuming, but I think that could have the opposite of the desired effect.<|eot_id|> +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( +[INFO|configuration_utils.py:691] 2026-04-29 16:12:25,477 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/config.json +[INFO|configuration_utils.py:765] 2026-04-29 16:12:25,478 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": false, + "vocab_size": 128256 +} + +[INFO|modeling_utils.py:1121] 2026-04-29 16:12:25,491 >> loading weights file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/model.safetensors.index.json +[INFO|modeling_utils.py:2167] 2026-04-29 16:12:25,492 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. +[WARNING|logging.py:328] 2026-04-29 16:12:25,495 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[INFO|configuration_utils.py:1142] 2026-04-29 16:12:25,496 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "eos_token_id": 128001, + "use_cache": false +} + + Loading checkpoint shards: 0%| | 0/7 [00:00> All model checkpoint weights were used when initializing LlamaForCausalLM. + +[INFO|modeling_utils.py:4934] 2026-04-29 16:12:37,323 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200. +If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. +[INFO|configuration_utils.py:1095] 2026-04-29 16:12:37,326 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/generation_config.json +[INFO|configuration_utils.py:1142] 2026-04-29 16:12:37,327 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9 +} + +[INFO|configuration_utils.py:691] 2026-04-29 16:12:37,329 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/config.json +[INFO|configuration_utils.py:765] 2026-04-29 16:12:37,329 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": false, + "vocab_size": 128256 +} + +[INFO|modeling_utils.py:1121] 2026-04-29 16:12:37,331 >> loading weights file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/model.safetensors.index.json +[INFO|modeling_utils.py:2167] 2026-04-29 16:12:37,332 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. +[INFO|configuration_utils.py:1142] 2026-04-29 16:12:37,336 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "eos_token_id": 128001, + "use_cache": false +} + + Loading checkpoint shards: 0%| | 0/7 [00:00> All model checkpoint weights were used when initializing LlamaForCausalLM. + +[INFO|modeling_utils.py:4934] 2026-04-29 16:12:48,920 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200. +If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. +[INFO|configuration_utils.py:1095] 2026-04-29 16:12:48,923 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/generation_config.json +[INFO|configuration_utils.py:1142] 2026-04-29 16:12:48,924 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9 +} + +[WARNING|trainer.py:821] 2026-04-29 16:12:48,925 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. +[WARNING|trainer.py:816] 2026-04-29 16:12:48,925 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 16:12:48,938 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-29 16:12:50,403 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 16:12:50,403 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 16:12:50,404 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 16:12:50,427 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-29 16:12:50,431 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-29 16:12:50,435 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[INFO|trainer.py:748] 2026-04-29 16:12:50,684 >> Using auto half precision backend +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaForCausalLM because mixed precision turned on in FSDP. Affects: model.embed_tokens.weight, model.norm.weight, lm_head.weight. + warnings.warn( +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaDecoderLayer because mixed precision turned on in FSDP. Affects: self_attn.q_proj.weight, self_attn.k_proj.weight, self_attn.v_proj.weight, self_attn.o_proj.weight, mlp.gate_proj.weight, mlp.up_proj.weight, mlp.down_proj.weight, input_layernorm.weight, post_attention_layernorm.weight. + warnings.warn( +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1563: UserWarning: FSDP upcast of low precision parameters may affect the precision of model checkpoints. + warnings.warn( +[INFO|trainer.py:2414] 2026-04-29 16:12:58,589 >> ***** Running training ***** +[INFO|trainer.py:2415] 2026-04-29 16:12:58,589 >> Num examples = 43,598 +[INFO|trainer.py:2416] 2026-04-29 16:12:58,589 >> Num Epochs = 1 +[INFO|trainer.py:2417] 2026-04-29 16:12:58,589 >> Instantaneous batch size per device = 8 +[INFO|trainer.py:2420] 2026-04-29 16:12:58,589 >> Total train batch size (w. parallel, distributed & accumulation) = 64 +[INFO|trainer.py:2421] 2026-04-29 16:12:58,589 >> Gradient Accumulation steps = 2 +[INFO|trainer.py:2422] 2026-04-29 16:12:58,589 >> Total optimization steps = 681 +[INFO|trainer.py:2423] 2026-04-29 16:12:58,590 >> Number of trainable parameters = 2,007,565,312 +[INFO|integration_utils.py:831] 2026-04-29 16:12:58,591 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/681 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 16:13:00,193 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 16:13:00,202 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 16:13:00,206 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 1/681 [00:02<31:13, 2.75s/it] {'loss': 1.3971, 'grad_norm': 251.27125549316406, 'learning_rate': 0.0, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.501685619354248, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.02287006378173828, 'margin_dpo/margin_mean': -0.02287048101425171, 'margin_dpo/margin_std': 0.41920793056488037, 'logps/chosen': -50.1435661315918, 'logps/rejected': -74.09991455078125, 'logps/ref_chosen': -50.14883804321289, 'logps/ref_rejected': -74.1280517578125, 'KL/chosen_KL_mean': 0.00527191162109375, 'KL/rejected_KL_mean': 0.028141021728515625, 'KL/mean': 0.016706019639968872, 'KL/std': 0.272699236869812, 'logits/chosen': -0.4974287748336792, 'logits/rejected': -0.43299180269241333, 'epoch': 0.0} + 0%| | 1/681 [00:02<31:13, 2.75s/it] 0%| | 2/681 [00:05<29:34, 2.61s/it] {'loss': 1.4089, 'grad_norm': 217.6841278076172, 'learning_rate': 7.246376811594203e-09, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.5049160718917847, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.06572261452674866, 'margin_dpo/margin_mean': -0.06572240591049194, 'margin_dpo/margin_std': 0.35048407316207886, 'logps/chosen': -52.65568923950195, 'logps/rejected': -75.27340698242188, 'logps/ref_chosen': -52.620704650878906, 'logps/ref_rejected': -75.30413818359375, 'KL/chosen_KL_mean': -0.03498649597167969, 'KL/rejected_KL_mean': 0.030735015869140625, 'KL/mean': -0.00212840735912323, 'KL/std': 0.24797174334526062, 'logits/chosen': -0.49536412954330444, 'logits/rejected': -0.4594460427761078, 'epoch': 0.0} + 0%| | 2/681 [00:05<29:34, 2.61s/it] 0%| | 3/681 [00:07<29:22, 2.60s/it] {'loss': 1.3761, 'grad_norm': 210.45652770996094, 'learning_rate': 1.4492753623188406e-08, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.49674931168556213, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.04345357418060303, 'margin_dpo/margin_mean': 0.04345354437828064, 'margin_dpo/margin_std': 0.35039910674095154, 'logps/chosen': -60.962440490722656, 'logps/rejected': -68.6968994140625, 'logps/ref_chosen': -60.981597900390625, 'logps/ref_rejected': -68.67259216308594, 'KL/chosen_KL_mean': 0.019153594970703125, 'KL/rejected_KL_mean': -0.02429962158203125, 'KL/mean': -0.0025722086429595947, 'KL/std': 0.2354850471019745, 'logits/chosen': -0.4816104471683502, 'logits/rejected': -0.4421927034854889, 'epoch': 0.0} + 0%| | 3/681 [00:07<29:22, 2.60s/it] 1%| | 4/681 [00:10<29:35, 2.62s/it] {'loss': 1.4047, 'grad_norm': 217.65200805664062, 'learning_rate': 2.1739130434782606e-08, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.5037118196487427, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.04963979125022888, 'margin_dpo/margin_mean': -0.04964029788970947, 'margin_dpo/margin_std': 0.39235860109329224, 'logps/chosen': -56.76927947998047, 'logps/rejected': -86.59903717041016, 'logps/ref_chosen': -56.7677116394043, 'logps/ref_rejected': -86.64710998535156, 'KL/chosen_KL_mean': -0.001567840576171875, 'KL/rejected_KL_mean': 0.04807281494140625, 'KL/mean': 0.023254141211509705, 'KL/std': 0.26486122608184814, 'logits/chosen': -0.4678453207015991, 'logits/rejected': -0.4402541518211365, 'epoch': 0.01} + 1%| | 4/681 [00:10<29:35, 2.62s/it] 1%| | 5/681 [00:13<29:24, 2.61s/it] {'loss': 1.3902, 'grad_norm': 270.9964294433594, 'learning_rate': 2.898550724637681e-08, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.5001676678657532, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.00204351544380188, 'margin_dpo/margin_mean': -0.0020435750484466553, 'margin_dpo/margin_std': 0.37501761317253113, 'logps/chosen': -53.82865524291992, 'logps/rejected': -84.11642456054688, 'logps/ref_chosen': -53.859375, 'logps/ref_rejected': -84.14918518066406, 'KL/chosen_KL_mean': 0.030719757080078125, 'KL/rejected_KL_mean': 0.032764434814453125, 'KL/mean': 0.031741127371788025, 'KL/std': 0.2725304961204529, 'logits/chosen': -0.4972953498363495, 'logits/rejected': -0.4523712396621704, 'epoch': 0.01} + 1%| | 5/681 [00:13<29:24, 2.61s/it] 1%| | 6/681 [00:15<27:54, 2.48s/it] {'loss': 1.3894, 'grad_norm': 276.3399963378906, 'learning_rate': 3.6231884057971014e-08, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.49989837408065796, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.001337289810180664, 'margin_dpo/margin_mean': 0.0013370811939239502, 'margin_dpo/margin_std': 0.3880041539669037, 'logps/chosen': -63.027122497558594, 'logps/rejected': -92.66632080078125, 'logps/ref_chosen': -63.007484436035156, 'logps/ref_rejected': -92.64534759521484, 'KL/chosen_KL_mean': -0.0196380615234375, 'KL/rejected_KL_mean': -0.02097320556640625, 'KL/mean': -0.020306527614593506, 'KL/std': 0.27848026156425476, 'logits/chosen': -0.5145661234855652, 'logits/rejected': -0.47312256693840027, 'epoch': 0.01} + 1%| | 6/681 [00:15<27:54, 2.48s/it] 1%| | 7/681 [00:17<27:22, 2.44s/it] {'loss': 1.3873, 'grad_norm': 245.37692260742188, 'learning_rate': 4.347826086956521e-08, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.49938228726387024, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.008149892091751099, 'margin_dpo/margin_mean': 0.008150070905685425, 'margin_dpo/margin_std': 0.38711145520210266, 'logps/chosen': -57.74197006225586, 'logps/rejected': -103.8958969116211, 'logps/ref_chosen': -57.774818420410156, 'logps/ref_rejected': -103.92059326171875, 'KL/chosen_KL_mean': 0.03285026550292969, 'KL/rejected_KL_mean': 0.02469635009765625, 'KL/mean': 0.02877350151538849, 'KL/std': 0.30477985739707947, 'logits/chosen': -0.5077540874481201, 'logits/rejected': -0.47386452555656433, 'epoch': 0.01} + 1%| | 7/681 [00:17<27:22, 2.44s/it] 1%| | 8/681 [00:20<27:05, 2.41s/it] {'loss': 1.4116, 'grad_norm': 240.70013427734375, 'learning_rate': 5.0724637681159424e-08, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.5049271583557129, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.06675639748573303, 'margin_dpo/margin_mean': -0.06675609946250916, 'margin_dpo/margin_std': 0.47787904739379883, 'logps/chosen': -58.718265533447266, 'logps/rejected': -79.24690246582031, 'logps/ref_chosen': -58.716033935546875, 'logps/ref_rejected': -79.3114242553711, 'KL/chosen_KL_mean': -0.0022296905517578125, 'KL/rejected_KL_mean': 0.06452560424804688, 'KL/mean': 0.031146153807640076, 'KL/std': 0.33025887608528137, 'logits/chosen': -0.5015411376953125, 'logits/rejected': -0.47501832246780396, 'epoch': 0.01} + 1%| | 8/681 [00:20<27:05, 2.41s/it] 1%|▏ | 9/681 [00:22<27:37, 2.47s/it] {'loss': 1.3778, 'grad_norm': 254.40870666503906, 'learning_rate': 5.797101449275362e-08, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.49667075276374817, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.04486778378486633, 'margin_dpo/margin_mean': 0.04486680030822754, 'margin_dpo/margin_std': 0.4566071927547455, 'logps/chosen': -69.84456634521484, 'logps/rejected': -99.625244140625, 'logps/ref_chosen': -69.8668441772461, 'logps/ref_rejected': -99.6026611328125, 'KL/chosen_KL_mean': 0.02227783203125, 'KL/rejected_KL_mean': -0.0225830078125, 'KL/mean': -0.00015251338481903076, 'KL/std': 0.30635231733322144, 'logits/chosen': -0.4882626235485077, 'logits/rejected': -0.4411010444164276, 'epoch': 0.01} + 1%|▏ | 9/681 [00:22<27:37, 2.47s/it] 1%|▏ | 10/681 [00:25<27:41, 2.48s/it] {'loss': 1.3858, 'grad_norm': 212.15330505371094, 'learning_rate': 6.521739130434782e-08, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.4991758465766907, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.010898560285568237, 'margin_dpo/margin_mean': 0.010898619890213013, 'margin_dpo/margin_std': 0.34846025705337524, 'logps/chosen': -48.33521270751953, 'logps/rejected': -80.36048889160156, 'logps/ref_chosen': -48.35768508911133, 'logps/ref_rejected': -80.37206268310547, 'KL/chosen_KL_mean': 0.022472381591796875, 'KL/rejected_KL_mean': 0.01157379150390625, 'KL/mean': 0.017022237181663513, 'KL/std': 0.24305114150047302, 'logits/chosen': -0.4911458492279053, 'logits/rejected': -0.4477323889732361, 'epoch': 0.01} + 1%|▏ | 10/681 [00:25<27:41, 2.48s/it] 2%|▏ | 11/681 [00:27<28:42, 2.57s/it] {'loss': 1.3909, 'grad_norm': 207.24131774902344, 'learning_rate': 7.246376811594203e-08, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.5001416802406311, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.0021440982818603516, 'margin_dpo/margin_mean': -0.0021438300609588623, 'margin_dpo/margin_std': 0.4191063344478607, 'logps/chosen': -53.0169563293457, 'logps/rejected': -87.77833557128906, 'logps/ref_chosen': -53.01685333251953, 'logps/ref_rejected': -87.78038024902344, 'KL/chosen_KL_mean': -0.0001010894775390625, 'KL/rejected_KL_mean': 0.00203704833984375, 'KL/mean': 0.0009690821170806885, 'KL/std': 0.291149377822876, 'logits/chosen': -0.4701375365257263, 'logits/rejected': -0.4457797110080719, 'epoch': 0.02} + 2%|▏ | 11/681 [00:27<28:42, 2.57s/it] 2%|▏ | 12/681 [00:30<28:48, 2.58s/it] {'loss': 1.3887, 'grad_norm': 271.4062194824219, 'learning_rate': 7.971014492753623e-08, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.4999362528324127, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.0008537918329238892, 'margin_dpo/margin_mean': 0.0008526891469955444, 'margin_dpo/margin_std': 0.3452816605567932, 'logps/chosen': -61.81591033935547, 'logps/rejected': -104.86959838867188, 'logps/ref_chosen': -61.80543518066406, 'logps/ref_rejected': -104.8582763671875, 'KL/chosen_KL_mean': -0.010473251342773438, 'KL/rejected_KL_mean': -0.011325836181640625, 'KL/mean': -0.01090405136346817, 'KL/std': 0.27011072635650635, 'logits/chosen': -0.538188099861145, 'logits/rejected': -0.5020288228988647, 'epoch': 0.02} + 2%|▏ | 12/681 [00:30<28:48, 2.58s/it] 2%|▏ | 13/681 [00:33<29:06, 2.61s/it] {'loss': 1.3727, 'grad_norm': 236.69508361816406, 'learning_rate': 8.695652173913042e-08, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.495451420545578, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.06127917766571045, 'margin_dpo/margin_mean': 0.061279088258743286, 'margin_dpo/margin_std': 0.44392725825309753, 'logps/chosen': -64.22016906738281, 'logps/rejected': -87.22416687011719, 'logps/ref_chosen': -64.2603530883789, 'logps/ref_rejected': -87.20307922363281, 'KL/chosen_KL_mean': 0.04018592834472656, 'KL/rejected_KL_mean': -0.021087646484375, 'KL/mean': 0.009547561407089233, 'KL/std': 0.2959768772125244, 'logits/chosen': -0.4695357084274292, 'logits/rejected': -0.44066792726516724, 'epoch': 0.02} + 2%|▏ | 13/681 [00:33<29:06, 2.61s/it] 2%|▏ | 14/681 [00:35<28:43, 2.58s/it] {'loss': 1.3824, 'grad_norm': 255.34683227539062, 'learning_rate': 9.420289855072464e-08, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.49836230278015137, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.021841615438461304, 'margin_dpo/margin_mean': 0.021842211484909058, 'margin_dpo/margin_std': 0.34157758951187134, 'logps/chosen': -58.12159729003906, 'logps/rejected': -104.08030700683594, 'logps/ref_chosen': -58.11021041870117, 'logps/ref_rejected': -104.04708099365234, 'KL/chosen_KL_mean': -0.011384963989257812, 'KL/rejected_KL_mean': -0.03322601318359375, 'KL/mean': -0.02230377495288849, 'KL/std': 0.2484772801399231, 'logits/chosen': -0.46936067938804626, 'logits/rejected': -0.4296714961528778, 'epoch': 0.02} + 2%|▏ | 14/681 [00:35<28:43, 2.58s/it] 2%|▏ | 15/681 [00:38<28:39, 2.58s/it] {'loss': 1.3952, 'grad_norm': 193.75828552246094, 'learning_rate': 1.0144927536231885e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.501282811164856, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.01762327551841736, 'margin_dpo/margin_mean': -0.017623186111450195, 'margin_dpo/margin_std': 0.3986828327178955, 'logps/chosen': -57.01972198486328, 'logps/rejected': -80.84383392333984, 'logps/ref_chosen': -56.96691131591797, 'logps/ref_rejected': -80.80863952636719, 'KL/chosen_KL_mean': -0.05281257629394531, 'KL/rejected_KL_mean': -0.035190582275390625, 'KL/mean': -0.04400016367435455, 'KL/std': 0.252704918384552, 'logits/chosen': -0.5062054991722107, 'logits/rejected': -0.4881584942340851, 'epoch': 0.02} + 2%|▏ | 15/681 [00:38<28:39, 2.58s/it] 2%|▏ | 16/681 [00:40<28:15, 2.55s/it] {'loss': 1.3846, 'grad_norm': 251.1475830078125, 'learning_rate': 1.0869565217391303e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.49875974655151367, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.016786009073257446, 'margin_dpo/margin_mean': 0.016786575317382812, 'margin_dpo/margin_std': 0.38438552618026733, 'logps/chosen': -61.75618362426758, 'logps/rejected': -84.40254974365234, 'logps/ref_chosen': -61.739891052246094, 'logps/ref_rejected': -84.36947631835938, 'KL/chosen_KL_mean': -0.016290664672851562, 'KL/rejected_KL_mean': -0.033077239990234375, 'KL/mean': -0.02468542754650116, 'KL/std': 0.264546275138855, 'logits/chosen': -0.5262372493743896, 'logits/rejected': -0.48537588119506836, 'epoch': 0.02} + 2%|▏ | 16/681 [00:40<28:15, 2.55s/it] 2%|▏ | 17/681 [00:43<28:00, 2.53s/it] {'loss': 1.3596, 'grad_norm': 233.72305297851562, 'learning_rate': 1.1594202898550725e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.492563396692276, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.09929555654525757, 'margin_dpo/margin_mean': 0.09929636120796204, 'margin_dpo/margin_std': 0.3553627133369446, 'logps/chosen': -67.66523742675781, 'logps/rejected': -85.43284606933594, 'logps/ref_chosen': -67.71033477783203, 'logps/ref_rejected': -85.37865447998047, 'KL/chosen_KL_mean': 0.045101165771484375, 'KL/rejected_KL_mean': -0.054195404052734375, 'KL/mean': -0.004545360803604126, 'KL/std': 0.26345258951187134, 'logits/chosen': -0.4858800768852234, 'logits/rejected': -0.44683146476745605, 'epoch': 0.02} + 2%|▏ | 17/681 [00:43<28:00, 2.53s/it] 3%|▎ | 18/681 [00:45<27:50, 2.52s/it] {'loss': 1.3715, 'grad_norm': 245.1805877685547, 'learning_rate': 1.2318840579710146e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.4957374036312103, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.05686947703361511, 'margin_dpo/margin_mean': 0.05686900019645691, 'margin_dpo/margin_std': 0.30619317293167114, 'logps/chosen': -47.72101593017578, 'logps/rejected': -75.51068115234375, 'logps/ref_chosen': -47.7394905090332, 'logps/ref_rejected': -75.4722900390625, 'KL/chosen_KL_mean': 0.018472671508789062, 'KL/rejected_KL_mean': -0.03839874267578125, 'KL/mean': -0.009962007403373718, 'KL/std': 0.21256747841835022, 'logits/chosen': -0.5064246654510498, 'logits/rejected': -0.45240044593811035, 'epoch': 0.03} + 3%|▎ | 18/681 [00:45<27:50, 2.52s/it] 3%|▎ | 19/681 [00:48<28:00, 2.54s/it] {'loss': 1.3562, 'grad_norm': 221.10816955566406, 'learning_rate': 1.3043478260869563e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.49154412746429443, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.11293384432792664, 'margin_dpo/margin_mean': 0.11293420195579529, 'margin_dpo/margin_std': 0.39282259345054626, 'logps/chosen': -70.14795684814453, 'logps/rejected': -89.81312561035156, 'logps/ref_chosen': -70.20536041259766, 'logps/ref_rejected': -89.7575912475586, 'KL/chosen_KL_mean': 0.057403564453125, 'KL/rejected_KL_mean': -0.05553436279296875, 'KL/mean': 0.0009317547082901001, 'KL/std': 0.27681607007980347, 'logits/chosen': -0.5035191774368286, 'logits/rejected': -0.45468592643737793, 'epoch': 0.03} + 3%|▎ | 19/681 [00:48<28:00, 2.54s/it] 3%|▎ | 20/681 [00:50<28:04, 2.55s/it] {'loss': 1.359, 'grad_norm': 218.9619903564453, 'learning_rate': 1.3768115942028986e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.4925253391265869, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.09983032941818237, 'margin_dpo/margin_mean': 0.0998302698135376, 'margin_dpo/margin_std': 0.32752203941345215, 'logps/chosen': -50.800750732421875, 'logps/rejected': -78.92068481445312, 'logps/ref_chosen': -50.80324172973633, 'logps/ref_rejected': -78.82334899902344, 'KL/chosen_KL_mean': 0.002490997314453125, 'KL/rejected_KL_mean': -0.0973358154296875, 'KL/mean': -0.047424912452697754, 'KL/std': 0.25252386927604675, 'logits/chosen': -0.5584119558334351, 'logits/rejected': -0.5027008652687073, 'epoch': 0.03} + 3%|▎ | 20/681 [00:50<28:04, 2.55s/it] 3%|▎ | 21/681 [00:53<27:49, 2.53s/it] {'loss': 1.3583, 'grad_norm': 227.61415100097656, 'learning_rate': 1.4492753623188405e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.49217915534973145, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.1046256422996521, 'margin_dpo/margin_mean': 0.10462629795074463, 'margin_dpo/margin_std': 0.3697164058685303, 'logps/chosen': -50.0589714050293, 'logps/rejected': -77.96937561035156, 'logps/ref_chosen': -50.063018798828125, 'logps/ref_rejected': -77.86878967285156, 'KL/chosen_KL_mean': 0.0040454864501953125, 'KL/rejected_KL_mean': -0.10057830810546875, 'KL/mean': -0.04826641082763672, 'KL/std': 0.28005871176719666, 'logits/chosen': -0.4951311945915222, 'logits/rejected': -0.4713231921195984, 'epoch': 0.03} + 3%|▎ | 21/681 [00:53<27:49, 2.53s/it] 3%|▎ | 22/681 [00:55<27:49, 2.53s/it] {'loss': 1.3233, 'grad_norm': 243.1344451904297, 'learning_rate': 1.5217391304347825e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.48323309421539307, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.22439709305763245, 'margin_dpo/margin_mean': 0.22439703345298767, 'margin_dpo/margin_std': 0.3758489489555359, 'logps/chosen': -59.01601028442383, 'logps/rejected': -97.68744659423828, 'logps/ref_chosen': -59.05763626098633, 'logps/ref_rejected': -97.50466918945312, 'KL/chosen_KL_mean': 0.0416259765625, 'KL/rejected_KL_mean': -0.18277359008789062, 'KL/mean': -0.07057403028011322, 'KL/std': 0.27579018473625183, 'logits/chosen': -0.46972396969795227, 'logits/rejected': -0.4252376854419708, 'epoch': 0.03} + 3%|▎ | 22/681 [00:55<27:49, 2.53s/it] 3%|▎ | 23/681 [00:58<28:59, 2.64s/it] {'loss': 1.3475, 'grad_norm': 235.14389038085938, 'learning_rate': 1.5942028985507245e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.48901820182800293, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.14679017663002014, 'margin_dpo/margin_mean': 0.1467902660369873, 'margin_dpo/margin_std': 0.4319424331188202, 'logps/chosen': -60.079341888427734, 'logps/rejected': -81.2879867553711, 'logps/ref_chosen': -60.07769775390625, 'logps/ref_rejected': -81.13955688476562, 'KL/chosen_KL_mean': -0.0016460418701171875, 'KL/rejected_KL_mean': -0.14843368530273438, 'KL/mean': -0.07504256069660187, 'KL/std': 0.2956269383430481, 'logits/chosen': -0.4931301474571228, 'logits/rejected': -0.470862478017807, 'epoch': 0.03} + 3%|▎ | 23/681 [00:58<28:59, 2.64s/it] 4%|▎ | 24/681 [01:01<29:00, 2.65s/it] {'loss': 1.3241, 'grad_norm': 242.87490844726562, 'learning_rate': 1.6666666666666665e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.4833376705646515, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.2231999784708023, 'margin_dpo/margin_mean': 0.22320020198822021, 'margin_dpo/margin_std': 0.3832412362098694, 'logps/chosen': -44.244178771972656, 'logps/rejected': -99.30155944824219, 'logps/ref_chosen': -44.29103469848633, 'logps/ref_rejected': -99.12521362304688, 'KL/chosen_KL_mean': 0.04685783386230469, 'KL/rejected_KL_mean': -0.1763458251953125, 'KL/mean': -0.06474106758832932, 'KL/std': 0.2938792407512665, 'logits/chosen': -0.5050971508026123, 'logits/rejected': -0.48868709802627563, 'epoch': 0.04} + 4%|▎ | 24/681 [01:01<29:00, 2.65s/it] 4%|▎ | 25/681 [01:04<28:56, 2.65s/it] {'loss': 1.3255, 'grad_norm': 215.2100830078125, 'learning_rate': 1.7391304347826085e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.48354804515838623, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.22117015719413757, 'margin_dpo/margin_mean': 0.22116953134536743, 'margin_dpo/margin_std': 0.4389370083808899, 'logps/chosen': -52.4984130859375, 'logps/rejected': -89.52471923828125, 'logps/ref_chosen': -52.537052154541016, 'logps/ref_rejected': -89.34219360351562, 'KL/chosen_KL_mean': 0.03863716125488281, 'KL/rejected_KL_mean': -0.18252944946289062, 'KL/mean': -0.07194776833057404, 'KL/std': 0.36078929901123047, 'logits/chosen': -0.5046179294586182, 'logits/rejected': -0.47490301728248596, 'epoch': 0.04} + 4%|▎ | 25/681 [01:04<28:56, 2.65s/it] 4%|▍ | 26/681 [01:06<27:39, 2.53s/it] {'loss': 1.277, 'grad_norm': 240.65769958496094, 'learning_rate': 1.8115942028985507e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.470198392868042, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.40157991647720337, 'margin_dpo/margin_mean': 0.40158015489578247, 'margin_dpo/margin_std': 0.5663931965827942, 'logps/chosen': -53.82801055908203, 'logps/rejected': -103.66648864746094, 'logps/ref_chosen': -53.92280578613281, 'logps/ref_rejected': -103.35971069335938, 'KL/chosen_KL_mean': 0.09479713439941406, 'KL/rejected_KL_mean': -0.3067779541015625, 'KL/mean': -0.10598999261856079, 'KL/std': 0.4495195746421814, 'logits/chosen': -0.5313920974731445, 'logits/rejected': -0.49980974197387695, 'epoch': 0.04} + 4%|▍ | 26/681 [01:06<27:39, 2.53s/it] 4%|▍ | 27/681 [01:08<27:27, 2.52s/it] {'loss': 1.2455, 'grad_norm': 256.8159484863281, 'learning_rate': 1.8840579710144927e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.4619525671005249, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.5132265090942383, 'margin_dpo/margin_mean': 0.5132263898849487, 'margin_dpo/margin_std': 0.5646921992301941, 'logps/chosen': -42.8001823425293, 'logps/rejected': -99.13908386230469, 'logps/ref_chosen': -42.898529052734375, 'logps/ref_rejected': -98.72419738769531, 'KL/chosen_KL_mean': 0.09834671020507812, 'KL/rejected_KL_mean': -0.41487884521484375, 'KL/mean': -0.15826506912708282, 'KL/std': 0.47776395082473755, 'logits/chosen': -0.5238237977027893, 'logits/rejected': -0.4873714745044708, 'epoch': 0.04} + 4%|▍ | 27/681 [01:08<27:27, 2.52s/it] 4%|▍ | 28/681 [01:11<27:36, 2.54s/it] {'loss': 1.299, 'grad_norm': 209.7417755126953, 'learning_rate': 1.9565217391304347e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.4754894971847534, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.3291375935077667, 'margin_dpo/margin_mean': 0.32913774251937866, 'margin_dpo/margin_std': 0.6227332353591919, 'logps/chosen': -60.541160583496094, 'logps/rejected': -91.71491241455078, 'logps/ref_chosen': -60.55650329589844, 'logps/ref_rejected': -91.40111541748047, 'KL/chosen_KL_mean': 0.015338897705078125, 'KL/rejected_KL_mean': -0.3137969970703125, 'KL/mean': -0.14923109114170074, 'KL/std': 0.4466787576675415, 'logits/chosen': -0.5104295611381531, 'logits/rejected': -0.4556117355823517, 'epoch': 0.04} + 4%|▍ | 28/681 [01:11<27:36, 2.54s/it] 4%|▍ | 29/681 [01:13<26:33, 2.44s/it] {'loss': 1.2226, 'grad_norm': 247.98696899414062, 'learning_rate': 2.028985507246377e-07, 'fcm_dpo/beta': 0.30000001192092896, 'fcm_dpo/q_t': 0.4558557868003845, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.5947138071060181, 'margin_dpo/margin_mean': 0.5947141647338867, 'margin_dpo/margin_std': 0.5515247583389282, 'logps/chosen': -57.66114807128906, 'logps/rejected': -97.84243774414062, 'logps/ref_chosen': -57.80778503417969, 'logps/ref_rejected': -97.39434814453125, 'KL/chosen_KL_mean': 0.1466350555419922, 'KL/rejected_KL_mean': -0.44808197021484375, 'KL/mean': -0.15072329342365265, 'KL/std': 0.5040621161460876, 'logits/chosen': -0.5724257826805115, 'logits/rejected': -0.5275709629058838, 'epoch': 0.04} + 4%|▍ | 29/681 [01:13<26:33, 2.44s/it] 4%|▍ | 30/681 [01:16<27:09, 2.50s/it] {'loss': 1.2141, 'grad_norm': 242.61647033691406, 'learning_rate': 2.1014492753623187e-07, 'fcm_dpo/beta': 0.30578601360321045, 'fcm_dpo/q_t': 0.4539734125137329, 'fcm_dpo/delta': 0.09551539272069931, 'fcm_dpo/margin': 0.6149008274078369, 'margin_dpo/margin_mean': 0.6149011850357056, 'margin_dpo/margin_std': 0.5445628762245178, 'logps/chosen': -52.44371032714844, 'logps/rejected': -98.97044372558594, 'logps/ref_chosen': -52.577369689941406, 'logps/ref_rejected': -98.48920440673828, 'KL/chosen_KL_mean': 0.13365936279296875, 'KL/rejected_KL_mean': -0.4812431335449219, 'KL/mean': -0.17379064857959747, 'KL/std': 0.5387458801269531, 'logits/chosen': -0.5310481190681458, 'logits/rejected': -0.5018342137336731, 'epoch': 0.04} + 4%|▍ | 30/681 [01:16<27:09, 2.50s/it] 5%|▍ | 31/681 [01:19<27:44, 2.56s/it] {'loss': 1.2527, 'grad_norm': 189.3855438232422, 'learning_rate': 2.1739130434782607e-07, 'fcm_dpo/beta': 0.30578601360321045, 'fcm_dpo/q_t': 0.46302998065948486, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.4925115406513214, 'margin_dpo/margin_mean': 0.4925113320350647, 'margin_dpo/margin_std': 0.6987070441246033, 'logps/chosen': -63.70354461669922, 'logps/rejected': -73.28314208984375, 'logps/ref_chosen': -63.806922912597656, 'logps/ref_rejected': -72.89400482177734, 'KL/chosen_KL_mean': 0.1033782958984375, 'KL/rejected_KL_mean': -0.3891334533691406, 'KL/mean': -0.14287717640399933, 'KL/std': 0.5448415279388428, 'logits/chosen': -0.4839329719543457, 'logits/rejected': -0.43673014640808105, 'epoch': 0.05} + 5%|▍ | 31/681 [01:19<27:44, 2.56s/it] 5%|▍ | 32/681 [01:21<28:16, 2.61s/it] {'loss': 1.2053, 'grad_norm': 225.79928588867188, 'learning_rate': 2.2463768115942027e-07, 'fcm_dpo/beta': 0.30862361192703247, 'fcm_dpo/q_t': 0.4495420455932617, 'fcm_dpo/delta': 0.0919463187456131, 'fcm_dpo/margin': 0.6792210936546326, 'margin_dpo/margin_mean': 0.6792212724685669, 'margin_dpo/margin_std': 0.8771206140518188, 'logps/chosen': -62.59779357910156, 'logps/rejected': -89.85499572753906, 'logps/ref_chosen': -62.739524841308594, 'logps/ref_rejected': -89.3175048828125, 'KL/chosen_KL_mean': 0.14173126220703125, 'KL/rejected_KL_mean': -0.5374908447265625, 'KL/mean': -0.19788116216659546, 'KL/std': 0.67319655418396, 'logits/chosen': -0.5124537944793701, 'logits/rejected': -0.4711976647377014, 'epoch': 0.05} + 5%|▍ | 32/681 [01:21<28:16, 2.61s/it] 5%|▍ | 33/681 [01:24<27:34, 2.55s/it] {'loss': 1.2149, 'grad_norm': 205.43460083007812, 'learning_rate': 2.318840579710145e-07, 'fcm_dpo/beta': 0.31455251574516296, 'fcm_dpo/q_t': 0.45361170172691345, 'fcm_dpo/delta': 0.09827958792448044, 'fcm_dpo/margin': 0.6040317416191101, 'margin_dpo/margin_mean': 0.6040312051773071, 'margin_dpo/margin_std': 0.6277109384536743, 'logps/chosen': -53.10909652709961, 'logps/rejected': -88.33729553222656, 'logps/ref_chosen': -53.26097106933594, 'logps/ref_rejected': -87.8851318359375, 'KL/chosen_KL_mean': 0.15187644958496094, 'KL/rejected_KL_mean': -0.45215606689453125, 'KL/mean': -0.1501408815383911, 'KL/std': 0.5369387865066528, 'logits/chosen': -0.5107744932174683, 'logits/rejected': -0.4852328896522522, 'epoch': 0.05} + 5%|▍ | 33/681 [01:24<27:34, 2.55s/it] 5%|▍ | 34/681 [01:26<27:44, 2.57s/it] {'loss': 1.1632, 'grad_norm': 211.18833923339844, 'learning_rate': 2.391304347826087e-07, 'fcm_dpo/beta': 0.3245845437049866, 'fcm_dpo/q_t': 0.43769991397857666, 'fcm_dpo/delta': 0.1425287425518036, 'fcm_dpo/margin': 0.8045636415481567, 'margin_dpo/margin_mean': 0.8045632839202881, 'margin_dpo/margin_std': 0.911353349685669, 'logps/chosen': -50.747802734375, 'logps/rejected': -102.6568832397461, 'logps/ref_chosen': -50.81732940673828, 'logps/ref_rejected': -101.92184448242188, 'KL/chosen_KL_mean': 0.06952667236328125, 'KL/rejected_KL_mean': -0.7350387573242188, 'KL/mean': -0.3327553868293762, 'KL/std': 0.7567273378372192, 'logits/chosen': -0.48843878507614136, 'logits/rejected': -0.471035897731781, 'epoch': 0.05} + 5%|▍ | 34/681 [01:26<27:44, 2.57s/it] 5%|▌ | 35/681 [01:29<28:16, 2.63s/it] {'loss': 1.0562, 'grad_norm': 206.67367553710938, 'learning_rate': 2.463768115942029e-07, 'fcm_dpo/beta': 0.32634085416793823, 'fcm_dpo/q_t': 0.40531784296035767, 'fcm_dpo/delta': -0.0021304162219166756, 'fcm_dpo/margin': 1.2318875789642334, 'margin_dpo/margin_mean': 1.2318875789642334, 'margin_dpo/margin_std': 1.157043218612671, 'logps/chosen': -50.87975311279297, 'logps/rejected': -107.91159057617188, 'logps/ref_chosen': -51.02449035644531, 'logps/ref_rejected': -106.82443237304688, 'KL/chosen_KL_mean': 0.14473533630371094, 'KL/rejected_KL_mean': -1.087158203125, 'KL/mean': -0.47121092677116394, 'KL/std': 1.0332869291305542, 'logits/chosen': -0.520686149597168, 'logits/rejected': -0.48392248153686523, 'epoch': 0.05} + 5%|▌ | 35/681 [01:29<28:16, 2.63s/it] 5%|▌ | 36/681 [01:32<28:20, 2.64s/it] {'loss': 1.0873, 'grad_norm': 183.64088439941406, 'learning_rate': 2.536231884057971e-07, 'fcm_dpo/beta': 0.3298990726470947, 'fcm_dpo/q_t': 0.41170650720596313, 'fcm_dpo/delta': 0.026729058474302292, 'fcm_dpo/margin': 1.132055640220642, 'margin_dpo/margin_mean': 1.1320552825927734, 'margin_dpo/margin_std': 1.191450834274292, 'logps/chosen': -51.95035171508789, 'logps/rejected': -87.13153839111328, 'logps/ref_chosen': -51.991493225097656, 'logps/ref_rejected': -86.0406265258789, 'KL/chosen_KL_mean': 0.04113960266113281, 'KL/rejected_KL_mean': -1.090911865234375, 'KL/mean': -0.5248871445655823, 'KL/std': 1.0772857666015625, 'logits/chosen': -0.5615625381469727, 'logits/rejected': -0.5254453420639038, 'epoch': 0.05} + 5%|▌ | 36/681 [01:32<28:20, 2.64s/it] 5%|▌ | 37/681 [01:34<28:17, 2.64s/it] {'loss': 1.1429, 'grad_norm': 166.49075317382812, 'learning_rate': 2.6086956521739126e-07, 'fcm_dpo/beta': 0.3311406373977661, 'fcm_dpo/q_t': 0.4271436929702759, 'fcm_dpo/delta': 0.08806828409433365, 'fcm_dpo/margin': 0.9501617550849915, 'margin_dpo/margin_mean': 0.9501620531082153, 'margin_dpo/margin_std': 1.3070077896118164, 'logps/chosen': -62.82125473022461, 'logps/rejected': -78.85938262939453, 'logps/ref_chosen': -62.807106018066406, 'logps/ref_rejected': -77.89507293701172, 'KL/chosen_KL_mean': -0.014146804809570312, 'KL/rejected_KL_mean': -0.9643096923828125, 'KL/mean': -0.48922494053840637, 'KL/std': 1.0300785303115845, 'logits/chosen': -0.5190207958221436, 'logits/rejected': -0.4760361909866333, 'epoch': 0.05} + 5%|▌ | 37/681 [01:34<28:17, 2.64s/it] 6%|▌ | 38/681 [01:37<26:55, 2.51s/it] {'loss': 1.0542, 'grad_norm': 170.35545349121094, 'learning_rate': 2.681159420289855e-07, 'fcm_dpo/beta': 0.332706481218338, 'fcm_dpo/q_t': 0.3991077244281769, 'fcm_dpo/delta': -0.053727779537439346, 'fcm_dpo/margin': 1.3563368320465088, 'margin_dpo/margin_mean': 1.3563368320465088, 'margin_dpo/margin_std': 1.683530569076538, 'logps/chosen': -48.28889465332031, 'logps/rejected': -99.16715240478516, 'logps/ref_chosen': -48.39051818847656, 'logps/ref_rejected': -97.91244506835938, 'KL/chosen_KL_mean': 0.10162544250488281, 'KL/rejected_KL_mean': -1.2547111511230469, 'KL/mean': -0.5765421390533447, 'KL/std': 1.3713576793670654, 'logits/chosen': -0.5162097215652466, 'logits/rejected': -0.483456015586853, 'epoch': 0.06} + 6%|▌ | 38/681 [01:37<26:55, 2.51s/it] 6%|▌ | 39/681 [01:39<26:56, 2.52s/it] {'loss': 0.9688, 'grad_norm': 174.84286499023438, 'learning_rate': 2.753623188405797e-07, 'fcm_dpo/beta': 0.3225635588169098, 'fcm_dpo/q_t': 0.37417465448379517, 'fcm_dpo/delta': -0.1443103402853012, 'fcm_dpo/margin': 1.6612706184387207, 'margin_dpo/margin_mean': 1.6612703800201416, 'margin_dpo/margin_std': 1.4079031944274902, 'logps/chosen': -50.65996551513672, 'logps/rejected': -80.14027404785156, 'logps/ref_chosen': -50.75047302246094, 'logps/ref_rejected': -78.56951141357422, 'KL/chosen_KL_mean': 0.09050559997558594, 'KL/rejected_KL_mean': -1.5707664489746094, 'KL/mean': -0.7401334047317505, 'KL/std': 1.3007447719573975, 'logits/chosen': -0.5401022434234619, 'logits/rejected': -0.49907436966896057, 'epoch': 0.06} + 6%|▌ | 39/681 [01:39<26:56, 2.52s/it] 6%|▌ | 40/681 [01:42<27:29, 2.57s/it] {'loss': 1.0208, 'grad_norm': 139.58270263671875, 'learning_rate': 2.8260869565217386e-07, 'fcm_dpo/beta': 0.31791430711746216, 'fcm_dpo/q_t': 0.38887178897857666, 'fcm_dpo/delta': -0.09945414215326309, 'fcm_dpo/margin': 1.5557353496551514, 'margin_dpo/margin_mean': 1.5557358264923096, 'margin_dpo/margin_std': 1.7217731475830078, 'logps/chosen': -57.792579650878906, 'logps/rejected': -75.66331481933594, 'logps/ref_chosen': -57.985069274902344, 'logps/ref_rejected': -74.3000717163086, 'KL/chosen_KL_mean': 0.1924877166748047, 'KL/rejected_KL_mean': -1.3632469177246094, 'KL/mean': -0.5853748321533203, 'KL/std': 1.4208192825317383, 'logits/chosen': -0.5097917318344116, 'logits/rejected': -0.4792172312736511, 'epoch': 0.06} + 6%|▌ | 40/681 [01:42<27:29, 2.57s/it] 6%|▌ | 41/681 [01:44<27:27, 2.57s/it] {'loss': 0.9812, 'grad_norm': 151.85443115234375, 'learning_rate': 2.898550724637681e-07, 'fcm_dpo/beta': 0.3076004981994629, 'fcm_dpo/q_t': 0.3731822073459625, 'fcm_dpo/delta': -0.17332524061203003, 'fcm_dpo/margin': 1.8329108953475952, 'margin_dpo/margin_mean': 1.8329112529754639, 'margin_dpo/margin_std': 1.9542649984359741, 'logps/chosen': -62.6768798828125, 'logps/rejected': -98.8375015258789, 'logps/ref_chosen': -62.69581604003906, 'logps/ref_rejected': -97.02352905273438, 'KL/chosen_KL_mean': 0.018938064575195312, 'KL/rejected_KL_mean': -1.8139724731445312, 'KL/mean': -0.8975176811218262, 'KL/std': 1.8013949394226074, 'logits/chosen': -0.5335030555725098, 'logits/rejected': -0.49662622809410095, 'epoch': 0.06} + 6%|▌ | 41/681 [01:44<27:27, 2.57s/it] 6%|▌ | 42/681 [01:47<27:18, 2.56s/it] {'loss': 0.8866, 'grad_norm': 148.23951721191406, 'learning_rate': 2.971014492753623e-07, 'fcm_dpo/beta': 0.2897103428840637, 'fcm_dpo/q_t': 0.34266549348831177, 'fcm_dpo/delta': -0.33123135566711426, 'fcm_dpo/margin': 2.4430980682373047, 'margin_dpo/margin_mean': 2.4430971145629883, 'margin_dpo/margin_std': 2.242748737335205, 'logps/chosen': -58.746665954589844, 'logps/rejected': -112.1317138671875, 'logps/ref_chosen': -58.966426849365234, 'logps/ref_rejected': -109.90837097167969, 'KL/chosen_KL_mean': 0.2197589874267578, 'KL/rejected_KL_mean': -2.2233352661132812, 'KL/mean': -1.0017893314361572, 'KL/std': 2.0296993255615234, 'logits/chosen': -0.549653172492981, 'logits/rejected': -0.5031782984733582, 'epoch': 0.06} + 6%|▌ | 42/681 [01:47<27:18, 2.56s/it] 6%|▋ | 43/681 [01:50<27:19, 2.57s/it] {'loss': 0.888, 'grad_norm': 136.68927001953125, 'learning_rate': 3.043478260869565e-07, 'fcm_dpo/beta': 0.2717617154121399, 'fcm_dpo/q_t': 0.3463453948497772, 'fcm_dpo/delta': -0.2843329906463623, 'fcm_dpo/margin': 2.442809581756592, 'margin_dpo/margin_mean': 2.442809581756592, 'margin_dpo/margin_std': 1.9328808784484863, 'logps/chosen': -53.65175247192383, 'logps/rejected': -98.41876220703125, 'logps/ref_chosen': -54.15599822998047, 'logps/ref_rejected': -96.48019409179688, 'KL/chosen_KL_mean': 0.5042438507080078, 'KL/rejected_KL_mean': -1.938568115234375, 'KL/mean': -0.717160165309906, 'KL/std': 1.816794991493225, 'logits/chosen': -0.558872640132904, 'logits/rejected': -0.5347921848297119, 'epoch': 0.06} + 6%|▋ | 43/681 [01:50<27:19, 2.57s/it] 6%|▋ | 44/681 [01:52<27:19, 2.57s/it] {'loss': 0.8594, 'grad_norm': 138.35983276367188, 'learning_rate': 3.115942028985507e-07, 'fcm_dpo/beta': 0.2552001476287842, 'fcm_dpo/q_t': 0.33637571334838867, 'fcm_dpo/delta': -0.3377786874771118, 'fcm_dpo/margin': 2.792766571044922, 'margin_dpo/margin_mean': 2.79276704788208, 'margin_dpo/margin_std': 2.16209077835083, 'logps/chosen': -49.85053253173828, 'logps/rejected': -111.34856414794922, 'logps/ref_chosen': -50.07849884033203, 'logps/ref_rejected': -108.78376007080078, 'KL/chosen_KL_mean': 0.22796630859375, 'KL/rejected_KL_mean': -2.5648040771484375, 'KL/mean': -1.16841721534729, 'KL/std': 2.2068114280700684, 'logits/chosen': -0.4246031641960144, 'logits/rejected': -0.40571877360343933, 'epoch': 0.06} + 6%|▋ | 44/681 [01:52<27:19, 2.57s/it] 7%|▋ | 45/681 [01:55<27:28, 2.59s/it] {'loss': 0.9987, 'grad_norm': 111.90202331542969, 'learning_rate': 3.188405797101449e-07, 'fcm_dpo/beta': 0.24679788947105408, 'fcm_dpo/q_t': 0.38011178374290466, 'fcm_dpo/delta': -0.14438273012638092, 'fcm_dpo/margin': 2.1753125190734863, 'margin_dpo/margin_mean': 2.1753129959106445, 'margin_dpo/margin_std': 2.421452283859253, 'logps/chosen': -48.2497444152832, 'logps/rejected': -79.9465560913086, 'logps/ref_chosen': -48.4149284362793, 'logps/ref_rejected': -77.93643188476562, 'KL/chosen_KL_mean': 0.16518402099609375, 'KL/rejected_KL_mean': -2.0101280212402344, 'KL/mean': -0.9224708676338196, 'KL/std': 1.9454594850540161, 'logits/chosen': -0.4974350333213806, 'logits/rejected': -0.4851893186569214, 'epoch': 0.07} + 7%|▋ | 45/681 [01:55<27:28, 2.59s/it] 7%|▋ | 46/681 [01:57<27:40, 2.61s/it] {'loss': 0.9307, 'grad_norm': 118.44244384765625, 'learning_rate': 3.260869565217391e-07, 'fcm_dpo/beta': 0.23457413911819458, 'fcm_dpo/q_t': 0.3544684946537018, 'fcm_dpo/delta': -0.2872818112373352, 'fcm_dpo/margin': 2.849971294403076, 'margin_dpo/margin_mean': 2.8499715328216553, 'margin_dpo/margin_std': 3.0548930168151855, 'logps/chosen': -55.80439758300781, 'logps/rejected': -98.30752563476562, 'logps/ref_chosen': -55.999427795410156, 'logps/ref_rejected': -95.652587890625, 'KL/chosen_KL_mean': 0.19502639770507812, 'KL/rejected_KL_mean': -2.6549415588378906, 'KL/mean': -1.2299586534500122, 'KL/std': 2.5399794578552246, 'logits/chosen': -0.527849555015564, 'logits/rejected': -0.4767192304134369, 'epoch': 0.07} + 7%|▋ | 46/681 [01:57<27:40, 2.61s/it] 7%|▋ | 47/681 [02:00<27:44, 2.62s/it] {'loss': 0.9238, 'grad_norm': 111.23075866699219, 'learning_rate': 3.333333333333333e-07, 'fcm_dpo/beta': 0.22347593307495117, 'fcm_dpo/q_t': 0.35581424832344055, 'fcm_dpo/delta': -0.25001367926597595, 'fcm_dpo/margin': 2.840768337249756, 'margin_dpo/margin_mean': 2.840768575668335, 'margin_dpo/margin_std': 2.5524120330810547, 'logps/chosen': -57.542076110839844, 'logps/rejected': -97.13597106933594, 'logps/ref_chosen': -57.92607879638672, 'logps/ref_rejected': -94.67920684814453, 'KL/chosen_KL_mean': 0.384002685546875, 'KL/rejected_KL_mean': -2.4567604064941406, 'KL/mean': -1.0363800525665283, 'KL/std': 2.471060276031494, 'logits/chosen': -0.5658366680145264, 'logits/rejected': -0.5126087665557861, 'epoch': 0.07} + 7%|▋ | 47/681 [02:00<27:44, 2.62s/it] 7%|▋ | 48/681 [02:03<28:04, 2.66s/it] {'loss': 0.9519, 'grad_norm': 119.95755767822266, 'learning_rate': 3.4057971014492755e-07, 'fcm_dpo/beta': 0.21101200580596924, 'fcm_dpo/q_t': 0.36156171560287476, 'fcm_dpo/delta': -0.22269634902477264, 'fcm_dpo/margin': 2.8779749870300293, 'margin_dpo/margin_mean': 2.877974510192871, 'margin_dpo/margin_std': 2.711777448654175, 'logps/chosen': -57.12587356567383, 'logps/rejected': -90.83238220214844, 'logps/ref_chosen': -57.188072204589844, 'logps/ref_rejected': -88.0166015625, 'KL/chosen_KL_mean': 0.06220054626464844, 'KL/rejected_KL_mean': -2.815776824951172, 'KL/mean': -1.376787781715393, 'KL/std': 2.4476280212402344, 'logits/chosen': -0.611646294593811, 'logits/rejected': -0.5553910732269287, 'epoch': 0.07} + 7%|▋ | 48/681 [02:03<28:04, 2.66s/it] 7%|▋ | 49/681 [02:05<27:45, 2.64s/it] {'loss': 0.9077, 'grad_norm': 93.63461303710938, 'learning_rate': 3.478260869565217e-07, 'fcm_dpo/beta': 0.20099371671676636, 'fcm_dpo/q_t': 0.3465607166290283, 'fcm_dpo/delta': -0.329367995262146, 'fcm_dpo/margin': 3.5184366703033447, 'margin_dpo/margin_mean': 3.5184359550476074, 'margin_dpo/margin_std': 3.583613157272339, 'logps/chosen': -61.340702056884766, 'logps/rejected': -86.94134521484375, 'logps/ref_chosen': -61.685272216796875, 'logps/ref_rejected': -83.76747131347656, 'KL/chosen_KL_mean': 0.34456825256347656, 'KL/rejected_KL_mean': -3.1738739013671875, 'KL/mean': -1.4146552085876465, 'KL/std': 3.116457939147949, 'logits/chosen': -0.537588357925415, 'logits/rejected': -0.4779571294784546, 'epoch': 0.07} + 7%|▋ | 49/681 [02:05<27:45, 2.64s/it] 7%|▋ | 50/681 [02:08<27:49, 2.65s/it] {'loss': 0.8888, 'grad_norm': 91.7352066040039, 'learning_rate': 3.5507246376811595e-07, 'fcm_dpo/beta': 0.18617978692054749, 'fcm_dpo/q_t': 0.34047919511795044, 'fcm_dpo/delta': -0.3431151509284973, 'fcm_dpo/margin': 3.8532233238220215, 'margin_dpo/margin_mean': 3.8532235622406006, 'margin_dpo/margin_std': 3.644498825073242, 'logps/chosen': -58.77288818359375, 'logps/rejected': -100.26011657714844, 'logps/ref_chosen': -58.72413635253906, 'logps/ref_rejected': -96.35814666748047, 'KL/chosen_KL_mean': -0.04874992370605469, 'KL/rejected_KL_mean': -3.9019737243652344, 'KL/mean': -1.9753637313842773, 'KL/std': 3.2308237552642822, 'logits/chosen': -0.5507527589797974, 'logits/rejected': -0.5151888728141785, 'epoch': 0.07} + 7%|▋ | 50/681 [02:08<27:49, 2.65s/it] 7%|▋ | 51/681 [02:11<27:42, 2.64s/it] {'loss': 0.9655, 'grad_norm': 73.21631622314453, 'learning_rate': 3.6231884057971015e-07, 'fcm_dpo/beta': 0.17457202076911926, 'fcm_dpo/q_t': 0.3622833490371704, 'fcm_dpo/delta': -0.27861201763153076, 'fcm_dpo/margin': 3.763016700744629, 'margin_dpo/margin_mean': 3.76301646232605, 'margin_dpo/margin_std': 4.63081693649292, 'logps/chosen': -61.564491271972656, 'logps/rejected': -79.95584106445312, 'logps/ref_chosen': -61.3736686706543, 'logps/ref_rejected': -76.00199890136719, 'KL/chosen_KL_mean': -0.19082260131835938, 'KL/rejected_KL_mean': -3.9538421630859375, 'KL/mean': -2.0723307132720947, 'KL/std': 3.815108299255371, 'logits/chosen': -0.5122474431991577, 'logits/rejected': -0.47880104184150696, 'epoch': 0.07} + 7%|▋ | 51/681 [02:11<27:42, 2.64s/it] 8%|▊ | 52/681 [02:13<27:08, 2.59s/it] {'loss': 0.7706, 'grad_norm': 73.25430297851562, 'learning_rate': 3.695652173913043e-07, 'fcm_dpo/beta': 0.15911118686199188, 'fcm_dpo/q_t': 0.2984340786933899, 'fcm_dpo/delta': -0.5776325464248657, 'fcm_dpo/margin': 5.8035993576049805, 'margin_dpo/margin_mean': 5.803599834442139, 'margin_dpo/margin_std': 4.6530866622924805, 'logps/chosen': -51.86079025268555, 'logps/rejected': -85.30094909667969, 'logps/ref_chosen': -52.33735656738281, 'logps/ref_rejected': -79.97391510009766, 'KL/chosen_KL_mean': 0.4765663146972656, 'KL/rejected_KL_mean': -5.327030181884766, 'KL/mean': -2.4252328872680664, 'KL/std': 4.417823314666748, 'logits/chosen': -0.5584224462509155, 'logits/rejected': -0.5033497214317322, 'epoch': 0.08} + 8%|▊ | 52/681 [02:13<27:08, 2.59s/it] 8%|▊ | 53/681 [02:16<27:03, 2.58s/it] {'loss': 0.8522, 'grad_norm': 72.15726470947266, 'learning_rate': 3.7681159420289855e-07, 'fcm_dpo/beta': 0.14530491828918457, 'fcm_dpo/q_t': 0.32626470923423767, 'fcm_dpo/delta': -0.49047210812568665, 'fcm_dpo/margin': 5.85880708694458, 'margin_dpo/margin_mean': 5.858806610107422, 'margin_dpo/margin_std': 5.748600006103516, 'logps/chosen': -53.398223876953125, 'logps/rejected': -97.72596740722656, 'logps/ref_chosen': -53.31465148925781, 'logps/ref_rejected': -91.78359985351562, 'KL/chosen_KL_mean': -0.0835723876953125, 'KL/rejected_KL_mean': -5.942371368408203, 'KL/mean': -3.0129737854003906, 'KL/std': 5.052390098571777, 'logits/chosen': -0.606256365776062, 'logits/rejected': -0.5844460725784302, 'epoch': 0.08} + 8%|▊ | 53/681 [02:16<27:03, 2.58s/it] 8%|▊ | 54/681 [02:18<26:26, 2.53s/it] {'loss': 0.8972, 'grad_norm': 64.02351379394531, 'learning_rate': 3.8405797101449274e-07, 'fcm_dpo/beta': 0.13407519459724426, 'fcm_dpo/q_t': 0.3468964397907257, 'fcm_dpo/delta': -0.3100808262825012, 'fcm_dpo/margin': 5.140138626098633, 'margin_dpo/margin_mean': 5.140138626098633, 'margin_dpo/margin_std': 4.812758445739746, 'logps/chosen': -50.92143630981445, 'logps/rejected': -97.08831024169922, 'logps/ref_chosen': -50.68865966796875, 'logps/ref_rejected': -91.71539306640625, 'KL/chosen_KL_mean': -0.2327747344970703, 'KL/rejected_KL_mean': -5.372917175292969, 'KL/mean': -2.8028464317321777, 'KL/std': 4.757123947143555, 'logits/chosen': -0.5881419777870178, 'logits/rejected': -0.534300684928894, 'epoch': 0.08} + 8%|▊ | 54/681 [02:18<26:26, 2.53s/it] 8%|▊ | 55/681 [02:20<25:27, 2.44s/it] {'loss': 0.915, 'grad_norm': 62.06749725341797, 'learning_rate': 3.9130434782608694e-07, 'fcm_dpo/beta': 0.12486197054386139, 'fcm_dpo/q_t': 0.3410576581954956, 'fcm_dpo/delta': -0.38554632663726807, 'fcm_dpo/margin': 6.0657572746276855, 'margin_dpo/margin_mean': 6.065756797790527, 'margin_dpo/margin_std': 6.8796820640563965, 'logps/chosen': -63.36795425415039, 'logps/rejected': -95.81198120117188, 'logps/ref_chosen': -62.615234375, 'logps/ref_rejected': -88.99349975585938, 'KL/chosen_KL_mean': -0.7527198791503906, 'KL/rejected_KL_mean': -6.8184814453125, 'KL/mean': -3.7856006622314453, 'KL/std': 5.726006507873535, 'logits/chosen': -0.6522265076637268, 'logits/rejected': -0.590487003326416, 'epoch': 0.08} + 8%|▊ | 55/681 [02:20<25:27, 2.44s/it] 8%|▊ | 56/681 [02:23<26:01, 2.50s/it] {'loss': 0.9546, 'grad_norm': 53.04601287841797, 'learning_rate': 3.9855072463768114e-07, 'fcm_dpo/beta': 0.11680299043655396, 'fcm_dpo/q_t': 0.3566039800643921, 'fcm_dpo/delta': -0.2921079993247986, 'fcm_dpo/margin': 5.758103847503662, 'margin_dpo/margin_mean': 5.758103847503662, 'margin_dpo/margin_std': 6.777911186218262, 'logps/chosen': -58.48115921020508, 'logps/rejected': -100.48097229003906, 'logps/ref_chosen': -57.9327278137207, 'logps/ref_rejected': -94.1744384765625, 'KL/chosen_KL_mean': -0.548431396484375, 'KL/rejected_KL_mean': -6.3065338134765625, 'KL/mean': -3.4274816513061523, 'KL/std': 5.56746768951416, 'logits/chosen': -0.6132587194442749, 'logits/rejected': -0.5706372261047363, 'epoch': 0.08} + 8%|▊ | 56/681 [02:23<26:01, 2.50s/it] 8%|▊ | 57/681 [02:25<26:04, 2.51s/it] {'loss': 0.8905, 'grad_norm': 57.49006652832031, 'learning_rate': 4.057971014492754e-07, 'fcm_dpo/beta': 0.10994692891836166, 'fcm_dpo/q_t': 0.34009259939193726, 'fcm_dpo/delta': -0.3346494138240814, 'fcm_dpo/margin': 6.474340915679932, 'margin_dpo/margin_mean': 6.474340438842773, 'margin_dpo/margin_std': 5.933760643005371, 'logps/chosen': -71.0665283203125, 'logps/rejected': -102.61103820800781, 'logps/ref_chosen': -70.49528503417969, 'logps/ref_rejected': -95.56546020507812, 'KL/chosen_KL_mean': -0.5712432861328125, 'KL/rejected_KL_mean': -7.045585632324219, 'KL/mean': -3.808412551879883, 'KL/std': 5.480106353759766, 'logits/chosen': -0.5823420882225037, 'logits/rejected': -0.5546176433563232, 'epoch': 0.08} + 8%|▊ | 57/681 [02:26<26:04, 2.51s/it] 9%|▊ | 58/681 [02:28<26:29, 2.55s/it] {'loss': 0.9009, 'grad_norm': 58.814815521240234, 'learning_rate': 4.1304347826086954e-07, 'fcm_dpo/beta': 0.10236389189958572, 'fcm_dpo/q_t': 0.3412542939186096, 'fcm_dpo/delta': -0.36620625853538513, 'fcm_dpo/margin': 7.231494903564453, 'margin_dpo/margin_mean': 7.2314958572387695, 'margin_dpo/margin_std': 7.5085673332214355, 'logps/chosen': -62.822715759277344, 'logps/rejected': -92.53856658935547, 'logps/ref_chosen': -62.13294219970703, 'logps/ref_rejected': -84.61729431152344, 'KL/chosen_KL_mean': -0.6897735595703125, 'KL/rejected_KL_mean': -7.921272277832031, 'KL/mean': -4.305522918701172, 'KL/std': 6.297882556915283, 'logits/chosen': -0.6043993234634399, 'logits/rejected': -0.529456377029419, 'epoch': 0.09} + 9%|▊ | 58/681 [02:28<26:29, 2.55s/it] 9%|▊ | 59/681 [02:31<26:34, 2.56s/it] {'loss': 0.8996, 'grad_norm': 54.276611328125, 'learning_rate': 4.2028985507246374e-07, 'fcm_dpo/beta': 0.09383856505155563, 'fcm_dpo/q_t': 0.34148186445236206, 'fcm_dpo/delta': -0.3598101735115051, 'fcm_dpo/margin': 7.762610912322998, 'margin_dpo/margin_mean': 7.76261043548584, 'margin_dpo/margin_std': 7.849611282348633, 'logps/chosen': -53.143951416015625, 'logps/rejected': -97.85923767089844, 'logps/ref_chosen': -51.932525634765625, 'logps/ref_rejected': -88.88520050048828, 'KL/chosen_KL_mean': -1.2114276885986328, 'KL/rejected_KL_mean': -8.974040985107422, 'KL/mean': -5.092733383178711, 'KL/std': 6.752954959869385, 'logits/chosen': -0.6205647587776184, 'logits/rejected': -0.5793225765228271, 'epoch': 0.09} + 9%|▊ | 59/681 [02:31<26:34, 2.56s/it] 9%|▉ | 60/681 [02:33<26:23, 2.55s/it] {'loss': 0.9876, 'grad_norm': 60.4672966003418, 'learning_rate': 4.2753623188405794e-07, 'fcm_dpo/beta': 0.09018626809120178, 'fcm_dpo/q_t': 0.36957529187202454, 'fcm_dpo/delta': -0.1816769540309906, 'fcm_dpo/margin': 6.3241682052612305, 'margin_dpo/margin_mean': 6.3241682052612305, 'margin_dpo/margin_std': 7.076349258422852, 'logps/chosen': -63.13063049316406, 'logps/rejected': -93.906005859375, 'logps/ref_chosen': -60.94218826293945, 'logps/ref_rejected': -85.39340209960938, 'KL/chosen_KL_mean': -2.1884403228759766, 'KL/rejected_KL_mean': -8.512611389160156, 'KL/mean': -5.350527286529541, 'KL/std': 6.1919403076171875, 'logits/chosen': -0.5891699194908142, 'logits/rejected': -0.527586042881012, 'epoch': 0.09} + 9%|▉ | 60/681 [02:33<26:23, 2.55s/it] 9%|▉ | 61/681 [02:36<26:36, 2.57s/it] {'loss': 0.9755, 'grad_norm': 49.0460205078125, 'learning_rate': 4.3478260869565214e-07, 'fcm_dpo/beta': 0.08559857308864594, 'fcm_dpo/q_t': 0.36387041211128235, 'fcm_dpo/delta': -0.2840117812156677, 'fcm_dpo/margin': 7.751380443572998, 'margin_dpo/margin_mean': 7.751380920410156, 'margin_dpo/margin_std': 10.394891738891602, 'logps/chosen': -61.770896911621094, 'logps/rejected': -98.74125671386719, 'logps/ref_chosen': -60.633522033691406, 'logps/ref_rejected': -89.85249328613281, 'KL/chosen_KL_mean': -1.1373729705810547, 'KL/rejected_KL_mean': -8.888755798339844, 'KL/mean': -5.013064861297607, 'KL/std': 8.38675594329834, 'logits/chosen': -0.5909574627876282, 'logits/rejected': -0.5559062361717224, 'epoch': 0.09} + 9%|▉ | 61/681 [02:36<26:36, 2.57s/it] 9%|▉ | 62/681 [02:39<27:01, 2.62s/it] {'loss': 1.0451, 'grad_norm': 47.25103759765625, 'learning_rate': 4.420289855072464e-07, 'fcm_dpo/beta': 0.08392874896526337, 'fcm_dpo/q_t': 0.3917636275291443, 'fcm_dpo/delta': -0.08972346782684326, 'fcm_dpo/margin': 5.783695697784424, 'margin_dpo/margin_mean': 5.783695697784424, 'margin_dpo/margin_std': 7.337882995605469, 'logps/chosen': -57.50813293457031, 'logps/rejected': -82.70726013183594, 'logps/ref_chosen': -56.15077209472656, 'logps/ref_rejected': -75.56619262695312, 'KL/chosen_KL_mean': -1.3573627471923828, 'KL/rejected_KL_mean': -7.141059875488281, 'KL/mean': -4.249211311340332, 'KL/std': 6.042973518371582, 'logits/chosen': -0.635386049747467, 'logits/rejected': -0.6032891273498535, 'epoch': 0.09} + 9%|▉ | 62/681 [02:39<27:01, 2.62s/it] 9%|▉ | 63/681 [02:41<26:42, 2.59s/it] {'loss': 0.9445, 'grad_norm': 47.997623443603516, 'learning_rate': 4.4927536231884053e-07, 'fcm_dpo/beta': 0.07972732186317444, 'fcm_dpo/q_t': 0.3571065664291382, 'fcm_dpo/delta': -0.2495255470275879, 'fcm_dpo/margin': 7.922541618347168, 'margin_dpo/margin_mean': 7.922541618347168, 'margin_dpo/margin_std': 8.097877502441406, 'logps/chosen': -75.3538818359375, 'logps/rejected': -107.73908996582031, 'logps/ref_chosen': -73.14739227294922, 'logps/ref_rejected': -97.61006164550781, 'KL/chosen_KL_mean': -2.206483840942383, 'KL/rejected_KL_mean': -10.129024505615234, 'KL/mean': -6.167753219604492, 'KL/std': 7.77467679977417, 'logits/chosen': -0.6038184762001038, 'logits/rejected': -0.5584800243377686, 'epoch': 0.09} + 9%|▉ | 63/681 [02:41<26:42, 2.59s/it] 9%|▉ | 64/681 [02:44<26:13, 2.55s/it] {'loss': 0.9301, 'grad_norm': 44.42738342285156, 'learning_rate': 4.5652173913043473e-07, 'fcm_dpo/beta': 0.07469938695430756, 'fcm_dpo/q_t': 0.34917110204696655, 'fcm_dpo/delta': -0.3220548927783966, 'fcm_dpo/margin': 9.295536041259766, 'margin_dpo/margin_mean': 9.295536041259766, 'margin_dpo/margin_std': 10.248291969299316, 'logps/chosen': -54.7468147277832, 'logps/rejected': -103.57395935058594, 'logps/ref_chosen': -53.998600006103516, 'logps/ref_rejected': -93.53019714355469, 'KL/chosen_KL_mean': -0.7482147216796875, 'KL/rejected_KL_mean': -10.043754577636719, 'KL/mean': -5.395984649658203, 'KL/std': 8.429512023925781, 'logits/chosen': -0.5919687747955322, 'logits/rejected': -0.5603554248809814, 'epoch': 0.09} + 9%|▉ | 64/681 [02:44<26:13, 2.55s/it] 10%|▉ | 65/681 [02:46<26:21, 2.57s/it] {'loss': 0.9463, 'grad_norm': 44.16692352294922, 'learning_rate': 4.63768115942029e-07, 'fcm_dpo/beta': 0.07084572315216064, 'fcm_dpo/q_t': 0.35268324613571167, 'fcm_dpo/delta': -0.2809777855873108, 'fcm_dpo/margin': 9.323431015014648, 'margin_dpo/margin_mean': 9.323431015014648, 'margin_dpo/margin_std': 10.194602012634277, 'logps/chosen': -67.60426330566406, 'logps/rejected': -122.03814697265625, 'logps/ref_chosen': -64.83599853515625, 'logps/ref_rejected': -109.94645690917969, 'KL/chosen_KL_mean': -2.768260955810547, 'KL/rejected_KL_mean': -12.091690063476562, 'KL/mean': -7.429973602294922, 'KL/std': 9.386064529418945, 'logits/chosen': -0.6519949436187744, 'logits/rejected': -0.6382172107696533, 'epoch': 0.1} + 10%|▉ | 65/681 [02:46<26:21, 2.57s/it] 10%|▉ | 66/681 [02:49<26:33, 2.59s/it] {'loss': 0.987, 'grad_norm': 39.67411422729492, 'learning_rate': 4.7101449275362313e-07, 'fcm_dpo/beta': 0.06770157068967819, 'fcm_dpo/q_t': 0.37264156341552734, 'fcm_dpo/delta': -0.19393965601921082, 'fcm_dpo/margin': 8.572092056274414, 'margin_dpo/margin_mean': 8.572092056274414, 'margin_dpo/margin_std': 10.129063606262207, 'logps/chosen': -54.004547119140625, 'logps/rejected': -86.7694091796875, 'logps/ref_chosen': -51.44352722167969, 'logps/ref_rejected': -75.63629913330078, 'KL/chosen_KL_mean': -2.5610218048095703, 'KL/rejected_KL_mean': -11.133113861083984, 'KL/mean': -6.84706974029541, 'KL/std': 8.479511260986328, 'logits/chosen': -0.6443692445755005, 'logits/rejected': -0.6113446950912476, 'epoch': 0.1} + 10%|▉ | 66/681 [02:49<26:33, 2.59s/it] 10%|▉ | 67/681 [02:51<25:32, 2.50s/it] {'loss': 0.9797, 'grad_norm': 38.83026885986328, 'learning_rate': 4.782608695652174e-07, 'fcm_dpo/beta': 0.06574313342571259, 'fcm_dpo/q_t': 0.37113136053085327, 'fcm_dpo/delta': -0.19544380903244019, 'fcm_dpo/margin': 8.888347625732422, 'margin_dpo/margin_mean': 8.888347625732422, 'margin_dpo/margin_std': 10.183094024658203, 'logps/chosen': -61.580657958984375, 'logps/rejected': -83.91548919677734, 'logps/ref_chosen': -59.34080505371094, 'logps/ref_rejected': -72.78728485107422, 'KL/chosen_KL_mean': -2.2398548126220703, 'KL/rejected_KL_mean': -11.128204345703125, 'KL/mean': -6.684027671813965, 'KL/std': 8.494741439819336, 'logits/chosen': -0.6339254975318909, 'logits/rejected': -0.5946371555328369, 'epoch': 0.1} + 10%|▉ | 67/681 [02:51<25:32, 2.50s/it] 10%|▉ | 68/681 [02:54<25:15, 2.47s/it] {'loss': 0.986, 'grad_norm': 38.64521408081055, 'learning_rate': 4.855072463768116e-07, 'fcm_dpo/beta': 0.06370236724615097, 'fcm_dpo/q_t': 0.3772110342979431, 'fcm_dpo/delta': -0.1451815813779831, 'fcm_dpo/margin': 8.439210891723633, 'margin_dpo/margin_mean': 8.439210891723633, 'margin_dpo/margin_std': 8.544252395629883, 'logps/chosen': -67.72261810302734, 'logps/rejected': -88.16325378417969, 'logps/ref_chosen': -65.2058334350586, 'logps/ref_rejected': -77.20724487304688, 'KL/chosen_KL_mean': -2.51678466796875, 'KL/rejected_KL_mean': -10.956001281738281, 'KL/mean': -6.736393928527832, 'KL/std': 7.671031951904297, 'logits/chosen': -0.6282086968421936, 'logits/rejected': -0.5680973529815674, 'epoch': 0.1} + 10%|▉ | 68/681 [02:54<25:15, 2.47s/it] 10%|█ | 69/681 [02:56<26:01, 2.55s/it] {'loss': 0.9375, 'grad_norm': 40.75960159301758, 'learning_rate': 4.927536231884058e-07, 'fcm_dpo/beta': 0.06064834073185921, 'fcm_dpo/q_t': 0.3607165217399597, 'fcm_dpo/delta': -0.23509711027145386, 'fcm_dpo/margin': 10.210319519042969, 'margin_dpo/margin_mean': 10.210320472717285, 'margin_dpo/margin_std': 9.960121154785156, 'logps/chosen': -62.86627960205078, 'logps/rejected': -116.64622497558594, 'logps/ref_chosen': -59.81924057006836, 'logps/ref_rejected': -103.38886260986328, 'KL/chosen_KL_mean': -3.0470409393310547, 'KL/rejected_KL_mean': -13.257366180419922, 'KL/mean': -8.152204513549805, 'KL/std': 9.061971664428711, 'logits/chosen': -0.5967146158218384, 'logits/rejected': -0.5730553865432739, 'epoch': 0.1} + 10%|█ | 69/681 [02:56<26:01, 2.55s/it] 10%|█ | 70/681 [02:59<25:46, 2.53s/it] {'loss': 0.9454, 'grad_norm': 40.870914459228516, 'learning_rate': 5e-07, 'fcm_dpo/beta': 0.058067694306373596, 'fcm_dpo/q_t': 0.35891324281692505, 'fcm_dpo/delta': -0.25848639011383057, 'fcm_dpo/margin': 11.062643051147461, 'margin_dpo/margin_mean': 11.062643051147461, 'margin_dpo/margin_std': 11.850614547729492, 'logps/chosen': -66.42919921875, 'logps/rejected': -106.62198638916016, 'logps/ref_chosen': -61.930641174316406, 'logps/ref_rejected': -91.06078338623047, 'KL/chosen_KL_mean': -4.498558044433594, 'KL/rejected_KL_mean': -15.561203002929688, 'KL/mean': -10.02988052368164, 'KL/std': 11.078158378601074, 'logits/chosen': -0.6100128889083862, 'logits/rejected': -0.5743746757507324, 'epoch': 0.1} + 10%|█ | 70/681 [02:59<25:46, 2.53s/it] 10%|█ | 71/681 [03:01<25:48, 2.54s/it] {'loss': 0.9003, 'grad_norm': 37.54128646850586, 'learning_rate': 4.999967061337492e-07, 'fcm_dpo/beta': 0.05431191250681877, 'fcm_dpo/q_t': 0.34613728523254395, 'fcm_dpo/delta': -0.3282097578048706, 'fcm_dpo/margin': 12.967889785766602, 'margin_dpo/margin_mean': 12.967889785766602, 'margin_dpo/margin_std': 12.603883743286133, 'logps/chosen': -65.84942626953125, 'logps/rejected': -114.40359497070312, 'logps/ref_chosen': -61.750335693359375, 'logps/ref_rejected': -97.33662414550781, 'KL/chosen_KL_mean': -4.099088668823242, 'KL/rejected_KL_mean': -17.066974639892578, 'KL/mean': -10.583032608032227, 'KL/std': 11.512796401977539, 'logits/chosen': -0.6697078943252563, 'logits/rejected': -0.6301345825195312, 'epoch': 0.1} + 10%|█ | 71/681 [03:01<25:48, 2.54s/it] 11%|█ | 72/681 [03:04<26:04, 2.57s/it] {'loss': 0.9125, 'grad_norm': 37.724822998046875, 'learning_rate': 4.999868246217933e-07, 'fcm_dpo/beta': 0.05085095018148422, 'fcm_dpo/q_t': 0.3455986976623535, 'fcm_dpo/delta': -0.32645586133003235, 'fcm_dpo/margin': 13.816070556640625, 'margin_dpo/margin_mean': 13.816070556640625, 'margin_dpo/margin_std': 14.364995956420898, 'logps/chosen': -70.84852600097656, 'logps/rejected': -113.89817810058594, 'logps/ref_chosen': -66.05341339111328, 'logps/ref_rejected': -95.2869873046875, 'KL/chosen_KL_mean': -4.795114517211914, 'KL/rejected_KL_mean': -18.611186981201172, 'KL/mean': -11.703153610229492, 'KL/std': 12.470186233520508, 'logits/chosen': -0.6510441303253174, 'logits/rejected': -0.6148891448974609, 'epoch': 0.11} + 11%|█ | 72/681 [03:04<26:04, 2.57s/it] 11%|█ | 73/681 [03:07<26:27, 2.61s/it] {'loss': 1.0192, 'grad_norm': 36.75204849243164, 'learning_rate': 4.999703557245192e-07, 'fcm_dpo/beta': 0.047877371311187744, 'fcm_dpo/q_t': 0.3676333427429199, 'fcm_dpo/delta': -0.2798731029033661, 'fcm_dpo/margin': 13.781963348388672, 'margin_dpo/margin_mean': 13.781963348388672, 'margin_dpo/margin_std': 20.299331665039062, 'logps/chosen': -73.05313110351562, 'logps/rejected': -111.03495788574219, 'logps/ref_chosen': -66.25627136230469, 'logps/ref_rejected': -90.45613098144531, 'KL/chosen_KL_mean': -6.796857833862305, 'KL/rejected_KL_mean': -20.57882308959961, 'KL/mean': -13.687841415405273, 'KL/std': 16.359634399414062, 'logits/chosen': -0.6967453956604004, 'logits/rejected': -0.6562691926956177, 'epoch': 0.11} + 11%|█ | 73/681 [03:07<26:27, 2.61s/it] 11%|█ | 74/681 [03:09<26:12, 2.59s/it] {'loss': 0.9871, 'grad_norm': 37.91171646118164, 'learning_rate': 4.999472998758977e-07, 'fcm_dpo/beta': 0.04516391456127167, 'fcm_dpo/q_t': 0.3618400990962982, 'fcm_dpo/delta': -0.3053804337978363, 'fcm_dpo/margin': 15.132284164428711, 'margin_dpo/margin_mean': 15.132284164428711, 'margin_dpo/margin_std': 22.55142593383789, 'logps/chosen': -60.85551071166992, 'logps/rejected': -118.50984954833984, 'logps/ref_chosen': -53.42488098144531, 'logps/ref_rejected': -95.94693756103516, 'KL/chosen_KL_mean': -7.430627822875977, 'KL/rejected_KL_mean': -22.562911987304688, 'KL/mean': -14.996770858764648, 'KL/std': 18.265613555908203, 'logits/chosen': -0.6205891370773315, 'logits/rejected': -0.6105706691741943, 'epoch': 0.11} + 11%|█ | 74/681 [03:09<26:12, 2.59s/it] 11%|█ | 75/681 [03:12<26:23, 2.61s/it] {'loss': 0.8343, 'grad_norm': 33.1515998840332, 'learning_rate': 4.999176576834721e-07, 'fcm_dpo/beta': 0.041482701897621155, 'fcm_dpo/q_t': 0.3164390027523041, 'fcm_dpo/delta': -0.5359930396080017, 'fcm_dpo/margin': 21.468948364257812, 'margin_dpo/margin_mean': 21.468948364257812, 'margin_dpo/margin_std': 21.272823333740234, 'logps/chosen': -59.16933059692383, 'logps/rejected': -140.0305938720703, 'logps/ref_chosen': -51.861663818359375, 'logps/ref_rejected': -111.25398254394531, 'KL/chosen_KL_mean': -7.307668685913086, 'KL/rejected_KL_mean': -28.776607513427734, 'KL/mean': -18.042144775390625, 'KL/std': 18.98027801513672, 'logits/chosen': -0.6864483952522278, 'logits/rejected': -0.6774381399154663, 'epoch': 0.11} + 11%|█ | 75/681 [03:12<26:23, 2.61s/it] 11%|█ | 76/681 [03:14<26:14, 2.60s/it] {'loss': 1.0119, 'grad_norm': 32.834896087646484, 'learning_rate': 4.998814299283415e-07, 'fcm_dpo/beta': 0.039188824594020844, 'fcm_dpo/q_t': 0.3768173158168793, 'fcm_dpo/delta': -0.14997366070747375, 'fcm_dpo/margin': 13.827640533447266, 'margin_dpo/margin_mean': 13.827640533447266, 'margin_dpo/margin_std': 16.90443229675293, 'logps/chosen': -62.30120086669922, 'logps/rejected': -101.07942199707031, 'logps/ref_chosen': -53.26603698730469, 'logps/ref_rejected': -78.21662902832031, 'KL/chosen_KL_mean': -9.035161972045898, 'KL/rejected_KL_mean': -22.862796783447266, 'KL/mean': -15.948980331420898, 'KL/std': 15.473119735717773, 'logits/chosen': -0.714850664138794, 'logits/rejected': -0.6740258932113647, 'epoch': 0.11} + 11%|█ | 76/681 [03:14<26:14, 2.60s/it] 11%|█▏ | 77/681 [03:17<25:12, 2.50s/it] {'loss': 0.882, 'grad_norm': 34.206050872802734, 'learning_rate': 4.998386175651409e-07, 'fcm_dpo/beta': 0.036142949014902115, 'fcm_dpo/q_t': 0.3258803188800812, 'fcm_dpo/delta': -0.4468532204627991, 'fcm_dpo/margin': 22.32929039001465, 'margin_dpo/margin_mean': 22.329288482666016, 'margin_dpo/margin_std': 23.451766967773438, 'logps/chosen': -66.2728042602539, 'logps/rejected': -124.2790298461914, 'logps/ref_chosen': -58.0966796875, 'logps/ref_rejected': -93.77361297607422, 'KL/chosen_KL_mean': -8.176126480102539, 'KL/rejected_KL_mean': -30.505416870117188, 'KL/mean': -19.34077262878418, 'KL/std': 21.328655242919922, 'logits/chosen': -0.6671018004417419, 'logits/rejected': -0.6263935565948486, 'epoch': 0.11} + 11%|█▏ | 77/681 [03:17<25:12, 2.50s/it] 11%|█▏ | 78/681 [03:19<25:36, 2.55s/it] {'loss': 0.9801, 'grad_norm': 30.835861206054688, 'learning_rate': 4.997892217220159e-07, 'fcm_dpo/beta': 0.03458146005868912, 'fcm_dpo/q_t': 0.3691740036010742, 'fcm_dpo/delta': -0.2059612274169922, 'fcm_dpo/margin': 17.180259704589844, 'margin_dpo/margin_mean': 17.180259704589844, 'margin_dpo/margin_std': 19.697816848754883, 'logps/chosen': -63.882240295410156, 'logps/rejected': -110.38308715820312, 'logps/ref_chosen': -55.61378479003906, 'logps/ref_rejected': -84.93436431884766, 'KL/chosen_KL_mean': -8.268457412719727, 'KL/rejected_KL_mean': -25.448719024658203, 'KL/mean': -16.85858917236328, 'KL/std': 17.313419342041016, 'logits/chosen': -0.6544767618179321, 'logits/rejected': -0.6291429996490479, 'epoch': 0.11} + 11%|█▏ | 78/681 [03:19<25:36, 2.55s/it] 12%|█▏ | 79/681 [03:22<25:51, 2.58s/it] {'loss': 0.9939, 'grad_norm': 27.264328002929688, 'learning_rate': 4.997332437005931e-07, 'fcm_dpo/beta': 0.032929353415966034, 'fcm_dpo/q_t': 0.3699612617492676, 'fcm_dpo/delta': -0.2343355119228363, 'fcm_dpo/margin': 18.817596435546875, 'margin_dpo/margin_mean': 18.81760025024414, 'margin_dpo/margin_std': 24.226604461669922, 'logps/chosen': -64.0604248046875, 'logps/rejected': -115.07510375976562, 'logps/ref_chosen': -55.45048522949219, 'logps/ref_rejected': -87.64756774902344, 'KL/chosen_KL_mean': -8.609933853149414, 'KL/rejected_KL_mean': -27.427539825439453, 'KL/mean': -18.018735885620117, 'KL/std': 19.773313522338867, 'logits/chosen': -0.6396697163581848, 'logits/rejected': -0.6080412268638611, 'epoch': 0.12} + 12%|█▏ | 79/681 [03:22<25:51, 2.58s/it] 12%|█▏ | 80/681 [03:25<25:44, 2.57s/it] {'loss': 1.039, 'grad_norm': 29.68206214904785, 'learning_rate': 4.996706849759452e-07, 'fcm_dpo/beta': 0.03151794895529747, 'fcm_dpo/q_t': 0.38502687215805054, 'fcm_dpo/delta': -0.16158056259155273, 'fcm_dpo/margin': 17.452417373657227, 'margin_dpo/margin_mean': 17.45241928100586, 'margin_dpo/margin_std': 24.047962188720703, 'logps/chosen': -70.11990356445312, 'logps/rejected': -116.60054016113281, 'logps/ref_chosen': -58.519290924072266, 'logps/ref_rejected': -87.54750061035156, 'KL/chosen_KL_mean': -11.600616455078125, 'KL/rejected_KL_mean': -29.053043365478516, 'KL/mean': -20.326831817626953, 'KL/std': 20.81574058532715, 'logits/chosen': -0.723793625831604, 'logits/rejected': -0.681006669998169, 'epoch': 0.12} + 12%|█▏ | 80/681 [03:25<25:44, 2.57s/it] 12%|█▏ | 81/681 [03:27<26:01, 2.60s/it] {'loss': 0.9381, 'grad_norm': 30.155729293823242, 'learning_rate': 4.996015471965529e-07, 'fcm_dpo/beta': 0.029724348336458206, 'fcm_dpo/q_t': 0.34924542903900146, 'fcm_dpo/delta': -0.3522689640522003, 'fcm_dpo/margin': 24.351259231567383, 'margin_dpo/margin_mean': 24.35125732421875, 'margin_dpo/margin_std': 29.631593704223633, 'logps/chosen': -77.142822265625, 'logps/rejected': -164.70791625976562, 'logps/ref_chosen': -66.44886779785156, 'logps/ref_rejected': -129.66270446777344, 'KL/chosen_KL_mean': -10.693958282470703, 'KL/rejected_KL_mean': -35.04521942138672, 'KL/mean': -22.869586944580078, 'KL/std': 24.97785186767578, 'logits/chosen': -0.6988470554351807, 'logits/rejected': -0.6684309244155884, 'epoch': 0.12} + 12%|█▏ | 81/681 [03:27<26:01, 2.60s/it] 12%|█▏ | 82/681 [03:30<25:30, 2.55s/it] {'loss': 1.0786, 'grad_norm': 32.10773849487305, 'learning_rate': 4.995258321842611e-07, 'fcm_dpo/beta': 0.028949948027729988, 'fcm_dpo/q_t': 0.3837572932243347, 'fcm_dpo/delta': -0.16753321886062622, 'fcm_dpo/margin': 19.267545700073242, 'margin_dpo/margin_mean': 19.267547607421875, 'margin_dpo/margin_std': 31.091327667236328, 'logps/chosen': -65.29285430908203, 'logps/rejected': -123.0712661743164, 'logps/ref_chosen': -52.232383728027344, 'logps/ref_rejected': -90.74325561523438, 'KL/chosen_KL_mean': -13.060468673706055, 'KL/rejected_KL_mean': -32.3280143737793, 'KL/mean': -22.69424057006836, 'KL/std': 22.624094009399414, 'logits/chosen': -0.6523622274398804, 'logits/rejected': -0.6406021118164062, 'epoch': 0.12} + 12%|█▏ | 82/681 [03:30<25:30, 2.55s/it] 12%|█▏ | 83/681 [03:32<25:07, 2.52s/it] {'loss': 0.9834, 'grad_norm': 31.6568660736084, 'learning_rate': 4.994435419342304e-07, 'fcm_dpo/beta': 0.027322106063365936, 'fcm_dpo/q_t': 0.3655874729156494, 'fcm_dpo/delta': -0.2400093972682953, 'fcm_dpo/margin': 22.84069061279297, 'margin_dpo/margin_mean': 22.84069061279297, 'margin_dpo/margin_std': 28.148263931274414, 'logps/chosen': -69.33623504638672, 'logps/rejected': -140.06544494628906, 'logps/ref_chosen': -55.82738494873047, 'logps/ref_rejected': -103.71589660644531, 'KL/chosen_KL_mean': -13.508848190307617, 'KL/rejected_KL_mean': -36.349544525146484, 'KL/mean': -24.929195404052734, 'KL/std': 23.749820709228516, 'logits/chosen': -0.6658318042755127, 'logits/rejected': -0.6298344135284424, 'epoch': 0.12} + 12%|█▏ | 83/681 [03:32<25:07, 2.52s/it] 12%|█▏ | 84/681 [03:35<25:44, 2.59s/it] {'loss': 1.0067, 'grad_norm': 26.903989791870117, 'learning_rate': 4.993546786148857e-07, 'fcm_dpo/beta': 0.026361385360360146, 'fcm_dpo/q_t': 0.38021910190582275, 'fcm_dpo/delta': -0.12693113088607788, 'fcm_dpo/margin': 19.657663345336914, 'margin_dpo/margin_mean': 19.657665252685547, 'margin_dpo/margin_std': 20.888431549072266, 'logps/chosen': -79.76535034179688, 'logps/rejected': -119.54544830322266, 'logps/ref_chosen': -67.1761703491211, 'logps/ref_rejected': -87.29859924316406, 'KL/chosen_KL_mean': -12.589178085327148, 'KL/rejected_KL_mean': -32.24684524536133, 'KL/mean': -22.418010711669922, 'KL/std': 20.389741897583008, 'logits/chosen': -0.6667696237564087, 'logits/rejected': -0.6284007430076599, 'epoch': 0.12} + 12%|█▏ | 84/681 [03:35<25:44, 2.59s/it] 12%|█▏ | 85/681 [03:38<26:07, 2.63s/it] {'loss': 1.0268, 'grad_norm': 27.330554962158203, 'learning_rate': 4.992592445678582e-07, 'fcm_dpo/beta': 0.02606545016169548, 'fcm_dpo/q_t': 0.3828505277633667, 'fcm_dpo/delta': -0.13678425550460815, 'fcm_dpo/margin': 20.310611724853516, 'margin_dpo/margin_mean': 20.310611724853516, 'margin_dpo/margin_std': 25.04244613647461, 'logps/chosen': -71.40888977050781, 'logps/rejected': -111.9516830444336, 'logps/ref_chosen': -58.4066162109375, 'logps/ref_rejected': -78.63880157470703, 'KL/chosen_KL_mean': -13.00227165222168, 'KL/rejected_KL_mean': -33.31288146972656, 'KL/mean': -23.157577514648438, 'KL/std': 20.965476989746094, 'logits/chosen': -0.6378351449966431, 'logits/rejected': -0.6047541499137878, 'epoch': 0.12} + 12%|█▏ | 85/681 [03:38<26:07, 2.63s/it] 13%|█▎ | 86/681 [03:40<26:15, 2.65s/it] {'loss': 1.11, 'grad_norm': 30.875211715698242, 'learning_rate': 4.991572423079235e-07, 'fcm_dpo/beta': 0.025230124592781067, 'fcm_dpo/q_t': 0.3947563171386719, 'fcm_dpo/delta': -0.14920490980148315, 'fcm_dpo/margin': 21.457521438598633, 'margin_dpo/margin_mean': 21.457523345947266, 'margin_dpo/margin_std': 39.128604888916016, 'logps/chosen': -73.14411926269531, 'logps/rejected': -126.58583068847656, 'logps/ref_chosen': -56.13746643066406, 'logps/ref_rejected': -88.12165069580078, 'KL/chosen_KL_mean': -17.00665855407715, 'KL/rejected_KL_mean': -38.46418380737305, 'KL/mean': -27.735424041748047, 'KL/std': 28.39218521118164, 'logits/chosen': -0.6909030675888062, 'logits/rejected': -0.6786030530929565, 'epoch': 0.13} + 13%|█▎ | 86/681 [03:40<26:15, 2.65s/it] 13%|█▎ | 87/681 [03:43<26:08, 2.64s/it] {'loss': 1.0182, 'grad_norm': 26.347061157226562, 'learning_rate': 4.990486745229364e-07, 'fcm_dpo/beta': 0.023992381989955902, 'fcm_dpo/q_t': 0.36907005310058594, 'fcm_dpo/delta': -0.21979403495788574, 'fcm_dpo/margin': 25.20366859436035, 'margin_dpo/margin_mean': 25.20366859436035, 'margin_dpo/margin_std': 34.2125244140625, 'logps/chosen': -72.28180694580078, 'logps/rejected': -137.31695556640625, 'logps/ref_chosen': -55.63609313964844, 'logps/ref_rejected': -95.46757507324219, 'KL/chosen_KL_mean': -16.64571189880371, 'KL/rejected_KL_mean': -41.84938430786133, 'KL/mean': -29.247547149658203, 'KL/std': 28.30339813232422, 'logits/chosen': -0.719502329826355, 'logits/rejected': -0.6944303512573242, 'epoch': 0.13} + 13%|█▎ | 87/681 [03:43<26:08, 2.64s/it] 13%|█▎ | 88/681 [03:46<26:09, 2.65s/it] {'loss': 1.1201, 'grad_norm': 27.308347702026367, 'learning_rate': 4.989335440737586e-07, 'fcm_dpo/beta': 0.023543458431959152, 'fcm_dpo/q_t': 0.4041179418563843, 'fcm_dpo/delta': -0.051779814064502716, 'fcm_dpo/margin': 19.053417205810547, 'margin_dpo/margin_mean': 19.053417205810547, 'margin_dpo/margin_std': 30.958572387695312, 'logps/chosen': -94.71744537353516, 'logps/rejected': -146.80821228027344, 'logps/ref_chosen': -73.67115020751953, 'logps/ref_rejected': -106.70849609375, 'KL/chosen_KL_mean': -21.046295166015625, 'KL/rejected_KL_mean': -40.09971618652344, 'KL/mean': -30.57300567626953, 'KL/std': 29.50307846069336, 'logits/chosen': -0.6762702465057373, 'logits/rejected': -0.6694661378860474, 'epoch': 0.13} + 13%|█▎ | 88/681 [03:46<26:09, 2.65s/it] 13%|█▎ | 89/681 [03:48<25:36, 2.60s/it] {'loss': 1.042, 'grad_norm': 24.864566802978516, 'learning_rate': 4.988118539941847e-07, 'fcm_dpo/beta': 0.02335914969444275, 'fcm_dpo/q_t': 0.38910990953445435, 'fcm_dpo/delta': -0.10445674508810043, 'fcm_dpo/margin': 21.37381362915039, 'margin_dpo/margin_mean': 21.37381362915039, 'margin_dpo/margin_std': 28.21473503112793, 'logps/chosen': -73.40365600585938, 'logps/rejected': -116.23609924316406, 'logps/ref_chosen': -60.624916076660156, 'logps/ref_rejected': -82.08354949951172, 'KL/chosen_KL_mean': -12.778741836547852, 'KL/rejected_KL_mean': -34.15255355834961, 'KL/mean': -23.465646743774414, 'KL/std': 24.849987030029297, 'logits/chosen': -0.7264094352722168, 'logits/rejected': -0.6940090656280518, 'epoch': 0.13} + 13%|█▎ | 89/681 [03:48<25:36, 2.60s/it] 13%|█▎ | 90/681 [03:51<25:07, 2.55s/it] {'loss': 1.0204, 'grad_norm': 27.197731018066406, 'learning_rate': 4.986836074908615e-07, 'fcm_dpo/beta': 0.02243289351463318, 'fcm_dpo/q_t': 0.3703291416168213, 'fcm_dpo/delta': -0.2720962464809418, 'fcm_dpo/margin': 29.194570541381836, 'margin_dpo/margin_mean': 29.194570541381836, 'margin_dpo/margin_std': 42.260841369628906, 'logps/chosen': -69.82144927978516, 'logps/rejected': -157.27542114257812, 'logps/ref_chosen': -53.285308837890625, 'logps/ref_rejected': -111.54470825195312, 'KL/chosen_KL_mean': -16.536136627197266, 'KL/rejected_KL_mean': -45.730709075927734, 'KL/mean': -31.133424758911133, 'KL/std': 34.05305480957031, 'logits/chosen': -0.6625027656555176, 'logits/rejected': -0.6746160984039307, 'epoch': 0.13} + 13%|█▎ | 90/681 [03:51<25:07, 2.55s/it] 13%|█▎ | 91/681 [03:53<25:18, 2.57s/it] {'loss': 1.061, 'grad_norm': 24.908628463745117, 'learning_rate': 4.985488079432037e-07, 'fcm_dpo/beta': 0.02157766930758953, 'fcm_dpo/q_t': 0.3894466459751129, 'fcm_dpo/delta': -0.12366719543933868, 'fcm_dpo/margin': 23.98028564453125, 'margin_dpo/margin_mean': 23.98028564453125, 'margin_dpo/margin_std': 35.235958099365234, 'logps/chosen': -79.01390075683594, 'logps/rejected': -129.065185546875, 'logps/ref_chosen': -61.802955627441406, 'logps/ref_rejected': -87.87395477294922, 'KL/chosen_KL_mean': -17.2109432220459, 'KL/rejected_KL_mean': -41.191226959228516, 'KL/mean': -29.20108413696289, 'KL/std': 27.21971321105957, 'logits/chosen': -0.6959347724914551, 'logits/rejected': -0.6632735729217529, 'epoch': 0.13} + 13%|█▎ | 91/681 [03:53<25:18, 2.57s/it] 14%|█▎ | 92/681 [03:56<24:58, 2.54s/it] {'loss': 1.0536, 'grad_norm': 23.367460250854492, 'learning_rate': 4.984074589033043e-07, 'fcm_dpo/beta': 0.021103451028466225, 'fcm_dpo/q_t': 0.3890799880027771, 'fcm_dpo/delta': -0.11899492889642715, 'fcm_dpo/margin': 24.30576515197754, 'margin_dpo/margin_mean': 24.305763244628906, 'margin_dpo/margin_std': 34.140499114990234, 'logps/chosen': -67.28801727294922, 'logps/rejected': -117.83419799804688, 'logps/ref_chosen': -51.640769958496094, 'logps/ref_rejected': -77.88117980957031, 'KL/chosen_KL_mean': -15.647247314453125, 'KL/rejected_KL_mean': -39.95301055908203, 'KL/mean': -27.800127029418945, 'KL/std': 28.11497688293457, 'logits/chosen': -0.7003687620162964, 'logits/rejected': -0.676365852355957, 'epoch': 0.14} + 14%|█▎ | 92/681 [03:56<24:58, 2.54s/it] 14%|█▎ | 93/681 [03:58<23:45, 2.42s/it] {'loss': 1.0292, 'grad_norm': 23.9678897857666, 'learning_rate': 4.982595640958425e-07, 'fcm_dpo/beta': 0.020545653998851776, 'fcm_dpo/q_t': 0.3881436884403229, 'fcm_dpo/delta': -0.1087617427110672, 'fcm_dpo/margin': 24.499828338623047, 'margin_dpo/margin_mean': 24.499828338623047, 'margin_dpo/margin_std': 30.640230178833008, 'logps/chosen': -70.07649993896484, 'logps/rejected': -119.20783233642578, 'logps/ref_chosen': -52.529239654541016, 'logps/ref_rejected': -77.16075134277344, 'KL/chosen_KL_mean': -17.547260284423828, 'KL/rejected_KL_mean': -42.04708480834961, 'KL/mean': -29.79717254638672, 'KL/std': 26.565616607666016, 'logits/chosen': -0.751114547252655, 'logits/rejected': -0.7044565081596375, 'epoch': 0.14} + 14%|█▎ | 93/681 [03:58<23:45, 2.42s/it] 14%|█▍ | 94/681 [04:01<24:40, 2.52s/it] {'loss': 0.9929, 'grad_norm': 23.683237075805664, 'learning_rate': 4.98105127417984e-07, 'fcm_dpo/beta': 0.019795160740613937, 'fcm_dpo/q_t': 0.3750844895839691, 'fcm_dpo/delta': -0.16931986808776855, 'fcm_dpo/margin': 28.20786476135254, 'margin_dpo/margin_mean': 28.207866668701172, 'margin_dpo/margin_std': 31.745136260986328, 'logps/chosen': -80.17037200927734, 'logps/rejected': -146.75466918945312, 'logps/ref_chosen': -61.22261047363281, 'logps/ref_rejected': -99.59902954101562, 'KL/chosen_KL_mean': -18.9477596282959, 'KL/rejected_KL_mean': -47.1556282043457, 'KL/mean': -33.051692962646484, 'KL/std': 30.412057876586914, 'logits/chosen': -0.6706228256225586, 'logits/rejected': -0.6556359529495239, 'epoch': 0.14} + 14%|█▍ | 94/681 [04:01<24:40, 2.52s/it] 14%|█▍ | 95/681 [04:03<24:25, 2.50s/it] {'loss': 1.072, 'grad_norm': 22.271825790405273, 'learning_rate': 4.979441529392784e-07, 'fcm_dpo/beta': 0.019524898380041122, 'fcm_dpo/q_t': 0.39812785387039185, 'fcm_dpo/delta': -0.04741118103265762, 'fcm_dpo/margin': 22.786224365234375, 'margin_dpo/margin_mean': 22.786224365234375, 'margin_dpo/margin_std': 30.41301727294922, 'logps/chosen': -70.50542449951172, 'logps/rejected': -116.64836120605469, 'logps/ref_chosen': -52.523643493652344, 'logps/ref_rejected': -75.8803482055664, 'KL/chosen_KL_mean': -17.981779098510742, 'KL/rejected_KL_mean': -40.76800537109375, 'KL/mean': -29.374893188476562, 'KL/std': 29.04880142211914, 'logits/chosen': -0.7048947811126709, 'logits/rejected': -0.6773319244384766, 'epoch': 0.14} + 14%|█▍ | 95/681 [04:03<24:25, 2.50s/it] 14%|█▍ | 96/681 [04:06<24:29, 2.51s/it] {'loss': 0.986, 'grad_norm': 22.89360237121582, 'learning_rate': 4.977766449015534e-07, 'fcm_dpo/beta': 0.01885131560266018, 'fcm_dpo/q_t': 0.3727257251739502, 'fcm_dpo/delta': -0.19593745470046997, 'fcm_dpo/margin': 30.885547637939453, 'margin_dpo/margin_mean': 30.88555145263672, 'margin_dpo/margin_std': 36.464759826660156, 'logps/chosen': -79.71180725097656, 'logps/rejected': -145.03640747070312, 'logps/ref_chosen': -62.15697479248047, 'logps/ref_rejected': -96.59601593017578, 'KL/chosen_KL_mean': -17.554834365844727, 'KL/rejected_KL_mean': -48.44038391113281, 'KL/mean': -32.99760437011719, 'KL/std': 33.0831298828125, 'logits/chosen': -0.7279924154281616, 'logits/rejected': -0.7011754512786865, 'epoch': 0.14} + 14%|█▍ | 96/681 [04:06<24:29, 2.51s/it] 14%|█▍ | 97/681 [04:08<24:38, 2.53s/it] {'loss': 1.0555, 'grad_norm': 23.5416202545166, 'learning_rate': 4.976026077188012e-07, 'fcm_dpo/beta': 0.01880602166056633, 'fcm_dpo/q_t': 0.39575350284576416, 'fcm_dpo/delta': -0.0534333810210228, 'fcm_dpo/margin': 23.93172836303711, 'margin_dpo/margin_mean': 23.93172836303711, 'margin_dpo/margin_std': 27.58646011352539, 'logps/chosen': -73.47427368164062, 'logps/rejected': -119.72438049316406, 'logps/ref_chosen': -54.646366119384766, 'logps/ref_rejected': -76.96475219726562, 'KL/chosen_KL_mean': -18.827903747558594, 'KL/rejected_KL_mean': -42.75962829589844, 'KL/mean': -30.793764114379883, 'KL/std': 27.565874099731445, 'logits/chosen': -0.6459416151046753, 'logits/rejected': -0.6031548976898193, 'epoch': 0.14} + 14%|█▍ | 97/681 [04:08<24:38, 2.53s/it] 14%|█▍ | 98/681 [04:11<24:26, 2.52s/it] {'loss': 1.0592, 'grad_norm': 24.430879592895508, 'learning_rate': 4.974220459770639e-07, 'fcm_dpo/beta': 0.0182771235704422, 'fcm_dpo/q_t': 0.3863416314125061, 'fcm_dpo/delta': -0.1036653220653534, 'fcm_dpo/margin': 27.26085662841797, 'margin_dpo/margin_mean': 27.260852813720703, 'margin_dpo/margin_std': 37.74567794799805, 'logps/chosen': -88.60673522949219, 'logps/rejected': -147.13644409179688, 'logps/ref_chosen': -65.25862884521484, 'logps/ref_rejected': -96.5274887084961, 'KL/chosen_KL_mean': -23.348102569580078, 'KL/rejected_KL_mean': -50.60894775390625, 'KL/mean': -36.9785270690918, 'KL/std': 31.48232650756836, 'logits/chosen': -0.6633949875831604, 'logits/rejected': -0.6469439268112183, 'epoch': 0.14} + 14%|█▍ | 98/681 [04:11<24:26, 2.52s/it] 15%|█▍ | 99/681 [04:13<23:30, 2.42s/it] {'loss': 0.9921, 'grad_norm': 21.330432891845703, 'learning_rate': 4.972349644343108e-07, 'fcm_dpo/beta': 0.017622604966163635, 'fcm_dpo/q_t': 0.37370553612709045, 'fcm_dpo/delta': -0.20359688997268677, 'fcm_dpo/margin': 33.529537200927734, 'margin_dpo/margin_mean': 33.529541015625, 'margin_dpo/margin_std': 42.3082389831543, 'logps/chosen': -63.947776794433594, 'logps/rejected': -138.27676391601562, 'logps/ref_chosen': -45.638484954833984, 'logps/ref_rejected': -86.43793487548828, 'KL/chosen_KL_mean': -18.30929183959961, 'KL/rejected_KL_mean': -51.838829040527344, 'KL/mean': -35.074058532714844, 'KL/std': 34.02536392211914, 'logits/chosen': -0.6809293627738953, 'logits/rejected': -0.6816772222518921, 'epoch': 0.15} + 15%|█▍ | 99/681 [04:13<23:30, 2.42s/it] 15%|█▍ | 100/681 [04:15<24:04, 2.49s/it] {'loss': 1.1659, 'grad_norm': 23.772842407226562, 'learning_rate': 4.970413680203148e-07, 'fcm_dpo/beta': 0.01770273968577385, 'fcm_dpo/q_t': 0.4218849539756775, 'fcm_dpo/delta': 0.05536198988556862, 'fcm_dpo/margin': 19.546470642089844, 'margin_dpo/margin_mean': 19.546470642089844, 'margin_dpo/margin_std': 35.299861907958984, 'logps/chosen': -78.28994750976562, 'logps/rejected': -114.30264282226562, 'logps/ref_chosen': -57.59397888183594, 'logps/ref_rejected': -74.06021118164062, 'KL/chosen_KL_mean': -20.69596290588379, 'KL/rejected_KL_mean': -40.242435455322266, 'KL/mean': -30.469200134277344, 'KL/std': 27.422863006591797, 'logits/chosen': -0.620718240737915, 'logits/rejected': -0.5739752650260925, 'epoch': 0.15} + 15%|█▍ | 100/681 [04:15<24:04, 2.49s/it] 15%|█▍ | 101/681 [04:18<23:45, 2.46s/it] {'loss': 1.1286, 'grad_norm': 23.58587646484375, 'learning_rate': 4.968412618365215e-07, 'fcm_dpo/beta': 0.01753612421452999, 'fcm_dpo/q_t': 0.41101598739624023, 'fcm_dpo/delta': -0.01752624288201332, 'fcm_dpo/margin': 23.726482391357422, 'margin_dpo/margin_mean': 23.726482391357422, 'margin_dpo/margin_std': 40.702640533447266, 'logps/chosen': -87.10765838623047, 'logps/rejected': -132.37498474121094, 'logps/ref_chosen': -61.64885330200195, 'logps/ref_rejected': -83.18968200683594, 'KL/chosen_KL_mean': -25.458805084228516, 'KL/rejected_KL_mean': -49.18529510498047, 'KL/mean': -37.322052001953125, 'KL/std': 33.75147247314453, 'logits/chosen': -0.6659466028213501, 'logits/rejected': -0.6346107721328735, 'epoch': 0.15} + 15%|█▍ | 101/681 [04:18<23:45, 2.46s/it] 15%|█▍ | 102/681 [04:20<23:32, 2.44s/it] {'loss': 1.2103, 'grad_norm': 26.59412384033203, 'learning_rate': 4.966346511559149e-07, 'fcm_dpo/beta': 0.01757633686065674, 'fcm_dpo/q_t': 0.4317839741706848, 'fcm_dpo/delta': -0.024831483140587807, 'fcm_dpo/margin': 17.465709686279297, 'margin_dpo/margin_mean': 17.46571159362793, 'margin_dpo/margin_std': 37.897613525390625, 'logps/chosen': -91.72523498535156, 'logps/rejected': -113.29914093017578, 'logps/ref_chosen': -64.0788803100586, 'logps/ref_rejected': -68.18707275390625, 'KL/chosen_KL_mean': -27.646360397338867, 'KL/rejected_KL_mean': -45.112064361572266, 'KL/mean': -36.379215240478516, 'KL/std': 31.945594787597656, 'logits/chosen': -0.6938978433609009, 'logits/rejected': -0.6497205495834351, 'epoch': 0.15} + 15%|█▍ | 102/681 [04:20<23:32, 2.44s/it] 15%|█▌ | 103/681 [04:23<23:16, 2.42s/it] {'loss': 0.9857, 'grad_norm': 22.75851058959961, 'learning_rate': 4.964215414228785e-07, 'fcm_dpo/beta': 0.016969915479421616, 'fcm_dpo/q_t': 0.3709907531738281, 'fcm_dpo/delta': -0.20710483193397522, 'fcm_dpo/margin': 35.02384948730469, 'margin_dpo/margin_mean': 35.02384948730469, 'margin_dpo/margin_std': 42.5711669921875, 'logps/chosen': -83.11199951171875, 'logps/rejected': -150.40928649902344, 'logps/ref_chosen': -61.299278259277344, 'logps/ref_rejected': -93.57270812988281, 'KL/chosen_KL_mean': -21.812721252441406, 'KL/rejected_KL_mean': -56.83657455444336, 'KL/mean': -39.324649810791016, 'KL/std': 35.332679748535156, 'logits/chosen': -0.6656967997550964, 'logits/rejected': -0.6312940120697021, 'epoch': 0.15} + 15%|█▌ | 103/681 [04:23<23:16, 2.42s/it] 15%|█▌ | 104/681 [04:25<22:42, 2.36s/it] {'loss': 1.0453, 'grad_norm': 22.30910301208496, 'learning_rate': 4.96201938253052e-07, 'fcm_dpo/beta': 0.01646982505917549, 'fcm_dpo/q_t': 0.38579535484313965, 'fcm_dpo/delta': -0.15515577793121338, 'fcm_dpo/margin': 33.20488357543945, 'margin_dpo/margin_mean': 33.20488357543945, 'margin_dpo/margin_std': 47.9078369140625, 'logps/chosen': -78.29458618164062, 'logps/rejected': -146.69140625, 'logps/ref_chosen': -54.372772216796875, 'logps/ref_rejected': -89.5647201538086, 'KL/chosen_KL_mean': -23.92180824279785, 'KL/rejected_KL_mean': -57.12669372558594, 'KL/mean': -40.524253845214844, 'KL/std': 39.19900131225586, 'logits/chosen': -0.668786883354187, 'logits/rejected': -0.6362247467041016, 'epoch': 0.15} + 15%|█▌ | 104/681 [04:25<22:42, 2.36s/it] 15%|█▌ | 105/681 [04:27<23:21, 2.43s/it] {'loss': 0.8737, 'grad_norm': 22.623994827270508, 'learning_rate': 4.959758474331832e-07, 'fcm_dpo/beta': 0.015482816845178604, 'fcm_dpo/q_t': 0.3350944519042969, 'fcm_dpo/delta': -0.3573678731918335, 'fcm_dpo/margin': 47.245811462402344, 'margin_dpo/margin_mean': 47.245811462402344, 'margin_dpo/margin_std': 42.06477355957031, 'logps/chosen': -77.0775146484375, 'logps/rejected': -167.6579132080078, 'logps/ref_chosen': -54.638946533203125, 'logps/ref_rejected': -97.97351837158203, 'KL/chosen_KL_mean': -22.438573837280273, 'KL/rejected_KL_mean': -69.68439483642578, 'KL/mean': -46.06147766113281, 'KL/std': 38.165252685546875, 'logits/chosen': -0.6759747862815857, 'logits/rejected': -0.653762936592102, 'epoch': 0.15} + 15%|█▌ | 105/681 [04:27<23:21, 2.43s/it] 16%|█▌ | 106/681 [04:30<23:23, 2.44s/it] {'loss': 1.0594, 'grad_norm': 21.77722930908203, 'learning_rate': 4.957432749209755e-07, 'fcm_dpo/beta': 0.015045535750687122, 'fcm_dpo/q_t': 0.39749810099601746, 'fcm_dpo/delta': -0.04824310541152954, 'fcm_dpo/margin': 29.63909912109375, 'margin_dpo/margin_mean': 29.63909912109375, 'margin_dpo/margin_std': 36.35613250732422, 'logps/chosen': -79.99164581298828, 'logps/rejected': -140.02247619628906, 'logps/ref_chosen': -54.83289337158203, 'logps/ref_rejected': -85.22461700439453, 'KL/chosen_KL_mean': -25.158750534057617, 'KL/rejected_KL_mean': -54.79785919189453, 'KL/mean': -39.978302001953125, 'KL/std': 33.347076416015625, 'logits/chosen': -0.6268042325973511, 'logits/rejected': -0.5954272747039795, 'epoch': 0.16} + 16%|█▌ | 106/681 [04:30<23:23, 2.44s/it] 16%|█▌ | 107/681 [04:32<23:52, 2.50s/it] {'loss': 1.0453, 'grad_norm': 21.028383255004883, 'learning_rate': 4.955042268449307e-07, 'fcm_dpo/beta': 0.014755118638277054, 'fcm_dpo/q_t': 0.3882708251476288, 'fcm_dpo/delta': -0.09840479493141174, 'fcm_dpo/margin': 33.398658752441406, 'margin_dpo/margin_mean': 33.398658752441406, 'margin_dpo/margin_std': 42.718177795410156, 'logps/chosen': -99.63356018066406, 'logps/rejected': -158.06390380859375, 'logps/ref_chosen': -69.70780944824219, 'logps/ref_rejected': -94.73950958251953, 'KL/chosen_KL_mean': -29.925743103027344, 'KL/rejected_KL_mean': -63.32440185546875, 'KL/mean': -46.62507629394531, 'KL/std': 40.514007568359375, 'logits/chosen': -0.670194149017334, 'logits/rejected': -0.6239144802093506, 'epoch': 0.16} + 16%|█▌ | 107/681 [04:33<23:52, 2.50s/it] 16%|█▌ | 108/681 [04:35<23:30, 2.46s/it] {'loss': 1.0383, 'grad_norm': 21.29493522644043, 'learning_rate': 4.952587095041881e-07, 'fcm_dpo/beta': 0.014345895498991013, 'fcm_dpo/q_t': 0.38066431879997253, 'fcm_dpo/delta': -0.18586613237857819, 'fcm_dpo/margin': 40.095855712890625, 'margin_dpo/margin_mean': 40.095855712890625, 'margin_dpo/margin_std': 57.993988037109375, 'logps/chosen': -83.0417709350586, 'logps/rejected': -162.92376708984375, 'logps/ref_chosen': -56.0098876953125, 'logps/ref_rejected': -95.79601287841797, 'KL/chosen_KL_mean': -27.031885147094727, 'KL/rejected_KL_mean': -67.12774658203125, 'KL/mean': -47.079811096191406, 'KL/std': 44.77525329589844, 'logits/chosen': -0.669190526008606, 'logits/rejected': -0.6483861207962036, 'epoch': 0.16} + 16%|█▌ | 108/681 [04:35<23:30, 2.46s/it] 16%|█▌ | 109/681 [04:38<24:40, 2.59s/it] {'loss': 0.9884, 'grad_norm': 21.884559631347656, 'learning_rate': 4.95006729368358e-07, 'fcm_dpo/beta': 0.013737066648900509, 'fcm_dpo/q_t': 0.3687817454338074, 'fcm_dpo/delta': -0.20238548517227173, 'fcm_dpo/margin': 42.91639709472656, 'margin_dpo/margin_mean': 42.91639709472656, 'margin_dpo/margin_std': 50.632591247558594, 'logps/chosen': -88.09640502929688, 'logps/rejected': -166.81304931640625, 'logps/ref_chosen': -62.88549041748047, 'logps/ref_rejected': -98.68573760986328, 'KL/chosen_KL_mean': -25.21091651916504, 'KL/rejected_KL_mean': -68.12731170654297, 'KL/mean': -46.66911315917969, 'KL/std': 43.04130554199219, 'logits/chosen': -0.6145904660224915, 'logits/rejected': -0.59392911195755, 'epoch': 0.16} + 16%|█▌ | 109/681 [04:38<24:40, 2.59s/it] 16%|█▌ | 110/681 [04:40<24:53, 2.62s/it] {'loss': 1.0537, 'grad_norm': 19.124555587768555, 'learning_rate': 4.947482930773511e-07, 'fcm_dpo/beta': 0.013237670063972473, 'fcm_dpo/q_t': 0.3875874876976013, 'fcm_dpo/delta': -0.12065520882606506, 'fcm_dpo/margin': 38.542945861816406, 'margin_dpo/margin_mean': 38.542945861816406, 'margin_dpo/margin_std': 51.59848403930664, 'logps/chosen': -84.84319305419922, 'logps/rejected': -144.38247680664062, 'logps/ref_chosen': -58.753684997558594, 'logps/ref_rejected': -79.75001525878906, 'KL/chosen_KL_mean': -26.089508056640625, 'KL/rejected_KL_mean': -64.63245391845703, 'KL/mean': -45.36097717285156, 'KL/std': 42.92705154418945, 'logits/chosen': -0.5867836475372314, 'logits/rejected': -0.5484417676925659, 'epoch': 0.16} + 16%|█▌ | 110/681 [04:40<24:53, 2.62s/it] 16%|█▋ | 111/681 [04:43<24:38, 2.59s/it] {'loss': 1.0324, 'grad_norm': 21.609487533569336, 'learning_rate': 4.944834074412042e-07, 'fcm_dpo/beta': 0.01289959717541933, 'fcm_dpo/q_t': 0.3767807185649872, 'fcm_dpo/delta': -0.17664864659309387, 'fcm_dpo/margin': 43.809391021728516, 'margin_dpo/margin_mean': 43.80938720703125, 'margin_dpo/margin_std': 59.64892578125, 'logps/chosen': -98.25743865966797, 'logps/rejected': -171.87158203125, 'logps/ref_chosen': -68.62410736083984, 'logps/ref_rejected': -98.42886352539062, 'KL/chosen_KL_mean': -29.633333206176758, 'KL/rejected_KL_mean': -73.4427261352539, 'KL/mean': -51.53802490234375, 'KL/std': 48.50222396850586, 'logits/chosen': -0.669287919998169, 'logits/rejected': -0.6483087539672852, 'epoch': 0.16} + 16%|█▋ | 111/681 [04:43<24:38, 2.59s/it] 16%|█▋ | 112/681 [04:45<23:37, 2.49s/it] {'loss': 1.1257, 'grad_norm': 19.551979064941406, 'learning_rate': 4.942120794399002e-07, 'fcm_dpo/beta': 0.01288105733692646, 'fcm_dpo/q_t': 0.4176030158996582, 'fcm_dpo/delta': 0.04204365238547325, 'fcm_dpo/margin': 27.89263916015625, 'margin_dpo/margin_mean': 27.892641067504883, 'margin_dpo/margin_std': 40.78398895263672, 'logps/chosen': -77.62303924560547, 'logps/rejected': -120.04046630859375, 'logps/ref_chosen': -50.24964141845703, 'logps/ref_rejected': -64.77442932128906, 'KL/chosen_KL_mean': -27.373397827148438, 'KL/rejected_KL_mean': -55.26603698730469, 'KL/mean': -41.31971740722656, 'KL/std': 33.80635070800781, 'logits/chosen': -0.6326720118522644, 'logits/rejected': -0.5948728322982788, 'epoch': 0.16} + 16%|█▋ | 112/681 [04:45<23:37, 2.49s/it] 17%|█▋ | 113/681 [04:48<23:55, 2.53s/it] {'loss': 1.0938, 'grad_norm': 20.03278160095215, 'learning_rate': 4.939343162231841e-07, 'fcm_dpo/beta': 0.012986140325665474, 'fcm_dpo/q_t': 0.4100501537322998, 'fcm_dpo/delta': 0.01037517748773098, 'fcm_dpo/margin': 30.034034729003906, 'margin_dpo/margin_mean': 30.034034729003906, 'margin_dpo/margin_std': 38.99578094482422, 'logps/chosen': -101.01571655273438, 'logps/rejected': -142.30551147460938, 'logps/ref_chosen': -66.71295166015625, 'logps/ref_rejected': -77.96870422363281, 'KL/chosen_KL_mean': -34.30276870727539, 'KL/rejected_KL_mean': -64.33680725097656, 'KL/mean': -49.319786071777344, 'KL/std': 35.23823928833008, 'logits/chosen': -0.5970338582992554, 'logits/rejected': -0.5535135269165039, 'epoch': 0.17} + 17%|█▋ | 113/681 [04:48<23:55, 2.53s/it] 17%|█▋ | 114/681 [04:50<23:44, 2.51s/it] {'loss': 1.0003, 'grad_norm': 21.371883392333984, 'learning_rate': 4.936501251103751e-07, 'fcm_dpo/beta': 0.01250369194895029, 'fcm_dpo/q_t': 0.3746366500854492, 'fcm_dpo/delta': -0.21323440968990326, 'fcm_dpo/margin': 47.85365295410156, 'margin_dpo/margin_mean': 47.85365295410156, 'margin_dpo/margin_std': 64.31402587890625, 'logps/chosen': -89.44036102294922, 'logps/rejected': -166.61859130859375, 'logps/ref_chosen': -57.78507995605469, 'logps/ref_rejected': -87.10966491699219, 'KL/chosen_KL_mean': -31.6552791595459, 'KL/rejected_KL_mean': -79.50894165039062, 'KL/mean': -55.58210754394531, 'KL/std': 53.05522918701172, 'logits/chosen': -0.594833493232727, 'logits/rejected': -0.5611605048179626, 'epoch': 0.17} + 17%|█▋ | 114/681 [04:50<23:44, 2.51s/it] 17%|█▋ | 115/681 [04:53<24:06, 2.56s/it] {'loss': 1.1668, 'grad_norm': 26.58994483947754, 'learning_rate': 4.933595135901732e-07, 'fcm_dpo/beta': 0.012465628795325756, 'fcm_dpo/q_t': 0.414449542760849, 'fcm_dpo/delta': -0.009462913498282433, 'fcm_dpo/margin': 32.80569076538086, 'margin_dpo/margin_mean': 32.80569076538086, 'margin_dpo/margin_std': 66.59864807128906, 'logps/chosen': -106.83149719238281, 'logps/rejected': -172.6200714111328, 'logps/ref_chosen': -65.5826416015625, 'logps/ref_rejected': -98.56552124023438, 'KL/chosen_KL_mean': -41.24885559082031, 'KL/rejected_KL_mean': -74.05455017089844, 'KL/mean': -57.65170669555664, 'KL/std': 50.606632232666016, 'logits/chosen': -0.6483290195465088, 'logits/rejected': -0.6295895576477051, 'epoch': 0.17} + 17%|█▋ | 115/681 [04:53<24:06, 2.56s/it] 17%|█▋ | 116/681 [04:55<23:25, 2.49s/it] {'loss': 1.0582, 'grad_norm': 21.639448165893555, 'learning_rate': 4.930624893204624e-07, 'fcm_dpo/beta': 0.012374404817819595, 'fcm_dpo/q_t': 0.39768484234809875, 'fcm_dpo/delta': -0.05652306228876114, 'fcm_dpo/margin': 36.67411804199219, 'margin_dpo/margin_mean': 36.67411804199219, 'margin_dpo/margin_std': 46.316162109375, 'logps/chosen': -82.32562255859375, 'logps/rejected': -148.1212921142578, 'logps/ref_chosen': -51.40031433105469, 'logps/ref_rejected': -80.5218505859375, 'KL/chosen_KL_mean': -30.92531394958496, 'KL/rejected_KL_mean': -67.59944152832031, 'KL/mean': -49.26237487792969, 'KL/std': 42.78678894042969, 'logits/chosen': -0.6038175821304321, 'logits/rejected': -0.5935859680175781, 'epoch': 0.17} + 17%|█▋ | 116/681 [04:55<23:25, 2.49s/it] 17%|█▋ | 117/681 [04:58<23:18, 2.48s/it] {'loss': 1.1431, 'grad_norm': 28.117990493774414, 'learning_rate': 4.927590601281083e-07, 'fcm_dpo/beta': 0.012346116825938225, 'fcm_dpo/q_t': 0.41722893714904785, 'fcm_dpo/delta': 0.031927622854709625, 'fcm_dpo/margin': 29.907602310180664, 'margin_dpo/margin_mean': 29.907604217529297, 'margin_dpo/margin_std': 51.51899719238281, 'logps/chosen': -108.87709045410156, 'logps/rejected': -136.0702667236328, 'logps/ref_chosen': -69.29840850830078, 'logps/ref_rejected': -66.583984375, 'KL/chosen_KL_mean': -39.578678131103516, 'KL/rejected_KL_mean': -69.48628234863281, 'KL/mean': -54.53247833251953, 'KL/std': 45.36625671386719, 'logits/chosen': -0.6056150197982788, 'logits/rejected': -0.5710107088088989, 'epoch': 0.17} + 17%|█▋ | 117/681 [04:58<23:18, 2.48s/it] 17%|█▋ | 118/681 [05:00<23:18, 2.48s/it] {'loss': 1.0642, 'grad_norm': 20.659421920776367, 'learning_rate': 4.924492340087524e-07, 'fcm_dpo/beta': 0.01227930560708046, 'fcm_dpo/q_t': 0.3994569778442383, 'fcm_dpo/delta': -0.04938432201743126, 'fcm_dpo/margin': 36.41863250732422, 'margin_dpo/margin_mean': 36.41863250732422, 'margin_dpo/margin_std': 47.30088424682617, 'logps/chosen': -87.1544189453125, 'logps/rejected': -143.6011199951172, 'logps/ref_chosen': -55.6409797668457, 'logps/ref_rejected': -75.66905975341797, 'KL/chosen_KL_mean': -31.513439178466797, 'KL/rejected_KL_mean': -67.93206024169922, 'KL/mean': -49.722755432128906, 'KL/std': 41.247047424316406, 'logits/chosen': -0.6473113298416138, 'logits/rejected': -0.6298643350601196, 'epoch': 0.17} + 17%|█▋ | 118/681 [05:00<23:18, 2.48s/it] 17%|█▋ | 119/681 [05:03<23:53, 2.55s/it] {'loss': 1.1036, 'grad_norm': 23.585227966308594, 'learning_rate': 4.92133019126601e-07, 'fcm_dpo/beta': 0.012094875797629356, 'fcm_dpo/q_t': 0.40563011169433594, 'fcm_dpo/delta': -0.036839861422777176, 'fcm_dpo/margin': 35.89598846435547, 'margin_dpo/margin_mean': 35.895992279052734, 'margin_dpo/margin_std': 56.17529296875, 'logps/chosen': -116.60855102539062, 'logps/rejected': -181.97161865234375, 'logps/ref_chosen': -73.51019287109375, 'logps/ref_rejected': -102.977294921875, 'KL/chosen_KL_mean': -43.098350524902344, 'KL/rejected_KL_mean': -78.99433898925781, 'KL/mean': -61.04634475708008, 'KL/std': 46.14408874511719, 'logits/chosen': -0.6196680068969727, 'logits/rejected': -0.6074246168136597, 'epoch': 0.17} + 17%|█▋ | 119/681 [05:03<23:53, 2.55s/it] 18%|█▊ | 120/681 [05:06<24:17, 2.60s/it] {'loss': 0.9934, 'grad_norm': 21.931350708007812, 'learning_rate': 4.918104238142103e-07, 'fcm_dpo/beta': 0.011788450181484222, 'fcm_dpo/q_t': 0.37107378244400024, 'fcm_dpo/delta': -0.2001763880252838, 'fcm_dpo/margin': 49.901649475097656, 'margin_dpo/margin_mean': 49.901649475097656, 'margin_dpo/margin_std': 60.89421081542969, 'logps/chosen': -121.25485229492188, 'logps/rejected': -202.3994140625, 'logps/ref_chosen': -76.78083801269531, 'logps/ref_rejected': -108.02374267578125, 'KL/chosen_KL_mean': -44.474021911621094, 'KL/rejected_KL_mean': -94.37568664550781, 'KL/mean': -69.42485046386719, 'KL/std': 58.080665588378906, 'logits/chosen': -0.6279151439666748, 'logits/rejected': -0.5975610017776489, 'epoch': 0.18} + 18%|█▊ | 120/681 [05:06<24:17, 2.60s/it] 18%|█▊ | 121/681 [05:08<24:00, 2.57s/it] {'loss': 0.9991, 'grad_norm': 24.322509765625, 'learning_rate': 4.91481456572267e-07, 'fcm_dpo/beta': 0.011228121817111969, 'fcm_dpo/q_t': 0.3683249354362488, 'fcm_dpo/delta': -0.22773230075836182, 'fcm_dpo/margin': 54.51830291748047, 'margin_dpo/margin_mean': 54.51830291748047, 'margin_dpo/margin_std': 70.47315216064453, 'logps/chosen': -104.72904968261719, 'logps/rejected': -207.4520263671875, 'logps/ref_chosen': -61.789894104003906, 'logps/ref_rejected': -109.99456787109375, 'KL/chosen_KL_mean': -42.93914794921875, 'KL/rejected_KL_mean': -97.45744323730469, 'KL/mean': -70.19830322265625, 'KL/std': 56.19927978515625, 'logits/chosen': -0.5546694993972778, 'logits/rejected': -0.5499193072319031, 'epoch': 0.18} + 18%|█▊ | 121/681 [05:08<24:00, 2.57s/it] 18%|█▊ | 122/681 [05:10<23:14, 2.49s/it] {'loss': 0.8959, 'grad_norm': 23.660940170288086, 'learning_rate': 4.911461260693638e-07, 'fcm_dpo/beta': 0.010600419715046883, 'fcm_dpo/q_t': 0.34132951498031616, 'fcm_dpo/delta': -0.34783935546875, 'fcm_dpo/margin': 68.18690490722656, 'margin_dpo/margin_mean': 68.18690490722656, 'margin_dpo/margin_std': 67.38003540039062, 'logps/chosen': -85.92497253417969, 'logps/rejected': -213.92385864257812, 'logps/ref_chosen': -46.9022102355957, 'logps/ref_rejected': -106.71418762207031, 'KL/chosen_KL_mean': -39.022762298583984, 'KL/rejected_KL_mean': -107.20967102050781, 'KL/mean': -73.1162109375, 'KL/std': 64.33946990966797, 'logits/chosen': -0.5362130403518677, 'logits/rejected': -0.5523936748504639, 'epoch': 0.18} + 18%|█▊ | 122/681 [05:11<23:14, 2.49s/it] 18%|█▊ | 123/681 [05:13<23:52, 2.57s/it] {'loss': 1.1084, 'grad_norm': 20.85264778137207, 'learning_rate': 4.908044411417711e-07, 'fcm_dpo/beta': 0.010254621505737305, 'fcm_dpo/q_t': 0.4016492962837219, 'fcm_dpo/delta': -0.06130140274763107, 'fcm_dpo/margin': 44.62263870239258, 'margin_dpo/margin_mean': 44.62263488769531, 'margin_dpo/margin_std': 73.6215591430664, 'logps/chosen': -104.25570678710938, 'logps/rejected': -175.3151092529297, 'logps/ref_chosen': -61.33863830566406, 'logps/ref_rejected': -87.775390625, 'KL/chosen_KL_mean': -42.91706848144531, 'KL/rejected_KL_mean': -87.53971862792969, 'KL/mean': -65.22838592529297, 'KL/std': 54.896278381347656, 'logits/chosen': -0.5527976751327515, 'logits/rejected': -0.535463273525238, 'epoch': 0.18} + 18%|█▊ | 123/681 [05:13<23:52, 2.57s/it] 18%|█▊ | 124/681 [05:16<23:48, 2.56s/it] {'loss': 1.0146, 'grad_norm': 22.487119674682617, 'learning_rate': 4.904564107932048e-07, 'fcm_dpo/beta': 0.00984976440668106, 'fcm_dpo/q_t': 0.36925771832466125, 'fcm_dpo/delta': -0.26814186573028564, 'fcm_dpo/margin': 65.97357177734375, 'margin_dpo/margin_mean': 65.97357177734375, 'margin_dpo/margin_std': 94.4095458984375, 'logps/chosen': -120.3160400390625, 'logps/rejected': -232.42184448242188, 'logps/ref_chosen': -71.44833374023438, 'logps/ref_rejected': -117.58056640625, 'KL/chosen_KL_mean': -48.867706298828125, 'KL/rejected_KL_mean': -114.84127044677734, 'KL/mean': -81.8544921875, 'KL/std': 75.15878295898438, 'logits/chosen': -0.5741191506385803, 'logits/rejected': -0.5771223306655884, 'epoch': 0.18} + 18%|█▊ | 124/681 [05:16<23:48, 2.56s/it] 18%|█▊ | 125/681 [05:18<23:32, 2.54s/it] {'loss': 1.0267, 'grad_norm': 18.873462677001953, 'learning_rate': 4.90102044194588e-07, 'fcm_dpo/beta': 0.009521868079900742, 'fcm_dpo/q_t': 0.3802080452442169, 'fcm_dpo/delta': -0.16708803176879883, 'fcm_dpo/margin': 58.601497650146484, 'margin_dpo/margin_mean': 58.60149383544922, 'margin_dpo/margin_std': 78.49264526367188, 'logps/chosen': -90.46898651123047, 'logps/rejected': -182.92214965820312, 'logps/ref_chosen': -50.136940002441406, 'logps/ref_rejected': -83.98861694335938, 'KL/chosen_KL_mean': -40.33204650878906, 'KL/rejected_KL_mean': -98.93354797363281, 'KL/mean': -69.6327896118164, 'KL/std': 63.41231918334961, 'logits/chosen': -0.4996240735054016, 'logits/rejected': -0.5013130903244019, 'epoch': 0.18} + 18%|█▊ | 125/681 [05:18<23:32, 2.54s/it] 19%|█▊ | 126/681 [05:21<23:46, 2.57s/it] {'loss': 1.0441, 'grad_norm': 20.310638427734375, 'learning_rate': 4.897413506838102e-07, 'fcm_dpo/beta': 0.009252631105482578, 'fcm_dpo/q_t': 0.38932526111602783, 'fcm_dpo/delta': -0.10898162424564362, 'fcm_dpo/margin': 54.387474060058594, 'margin_dpo/margin_mean': 54.387474060058594, 'margin_dpo/margin_std': 72.31570434570312, 'logps/chosen': -99.36490631103516, 'logps/rejected': -196.21507263183594, 'logps/ref_chosen': -55.66706848144531, 'logps/ref_rejected': -98.1297607421875, 'KL/chosen_KL_mean': -43.697837829589844, 'KL/rejected_KL_mean': -98.08531188964844, 'KL/mean': -70.89157104492188, 'KL/std': 56.95924377441406, 'logits/chosen': -0.543870210647583, 'logits/rejected': -0.5395331382751465, 'epoch': 0.19} + 19%|█▊ | 126/681 [05:21<23:46, 2.57s/it] 19%|█▊ | 127/681 [05:24<23:58, 2.60s/it] {'loss': 1.1319, 'grad_norm': 20.918685913085938, 'learning_rate': 4.89374339765481e-07, 'fcm_dpo/beta': 0.009267613291740417, 'fcm_dpo/q_t': 0.414898157119751, 'fcm_dpo/delta': 0.03231769800186157, 'fcm_dpo/margin': 39.8004264831543, 'margin_dpo/margin_mean': 39.80043029785156, 'margin_dpo/margin_std': 63.22393798828125, 'logps/chosen': -99.185791015625, 'logps/rejected': -159.22732543945312, 'logps/ref_chosen': -56.55467987060547, 'logps/ref_rejected': -76.7957763671875, 'KL/chosen_KL_mean': -42.63111114501953, 'KL/rejected_KL_mean': -82.43153381347656, 'KL/mean': -62.53132629394531, 'KL/std': 50.20075225830078, 'logits/chosen': -0.5204076170921326, 'logits/rejected': -0.5004839897155762, 'epoch': 0.19} + 19%|█▊ | 127/681 [05:24<23:58, 2.60s/it] 19%|█▉ | 128/681 [05:26<24:14, 2.63s/it] {'loss': 1.1428, 'grad_norm': 29.327892303466797, 'learning_rate': 4.890010211106795e-07, 'fcm_dpo/beta': 0.009327895939350128, 'fcm_dpo/q_t': 0.4129607379436493, 'fcm_dpo/delta': 0.007488146424293518, 'fcm_dpo/margin': 42.07066345214844, 'margin_dpo/margin_mean': 42.07066345214844, 'margin_dpo/margin_std': 74.08981323242188, 'logps/chosen': -103.84451293945312, 'logps/rejected': -164.23316955566406, 'logps/ref_chosen': -58.12095642089844, 'logps/ref_rejected': -76.43896484375, 'KL/chosen_KL_mean': -45.72355270385742, 'KL/rejected_KL_mean': -87.79420471191406, 'KL/mean': -66.75888061523438, 'KL/std': 59.13935852050781, 'logits/chosen': -0.5080424547195435, 'logits/rejected': -0.4870242476463318, 'epoch': 0.19} + 19%|█▉ | 128/681 [05:26<24:14, 2.63s/it] 19%|█▉ | 129/681 [05:29<24:02, 2.61s/it] {'loss': 1.1488, 'grad_norm': 20.87249183654785, 'learning_rate': 4.88621404556699e-07, 'fcm_dpo/beta': 0.009284512139856815, 'fcm_dpo/q_t': 0.4140230417251587, 'fcm_dpo/delta': -0.01128113642334938, 'fcm_dpo/margin': 44.24713134765625, 'margin_dpo/margin_mean': 44.24713134765625, 'margin_dpo/margin_std': 84.46028900146484, 'logps/chosen': -122.29839324951172, 'logps/rejected': -196.27137756347656, 'logps/ref_chosen': -66.91637420654297, 'logps/ref_rejected': -96.6422119140625, 'KL/chosen_KL_mean': -55.38201904296875, 'KL/rejected_KL_mean': -99.62916564941406, 'KL/mean': -77.5055923461914, 'KL/std': 65.07495880126953, 'logits/chosen': -0.5257991552352905, 'logits/rejected': -0.5148609280586243, 'epoch': 0.19} + 19%|█▉ | 129/681 [05:29<24:02, 2.61s/it] 19%|█▉ | 130/681 [05:31<23:21, 2.54s/it] {'loss': 0.9976, 'grad_norm': 21.265871047973633, 'learning_rate': 4.882355001067891e-07, 'fcm_dpo/beta': 0.00907239317893982, 'fcm_dpo/q_t': 0.37017908692359924, 'fcm_dpo/delta': -0.2178019881248474, 'fcm_dpo/margin': 66.64888763427734, 'margin_dpo/margin_mean': 66.64889526367188, 'margin_dpo/margin_std': 81.45700073242188, 'logps/chosen': -85.82325744628906, 'logps/rejected': -190.58694458007812, 'logps/ref_chosen': -44.66685104370117, 'logps/ref_rejected': -82.78165435791016, 'KL/chosen_KL_mean': -41.156402587890625, 'KL/rejected_KL_mean': -107.8052978515625, 'KL/mean': -74.48085021972656, 'KL/std': 66.90983581542969, 'logits/chosen': -0.5009858012199402, 'logits/rejected': -0.4956481158733368, 'epoch': 0.19} + 19%|█▉ | 130/681 [05:31<23:21, 2.54s/it] 19%|█▉ | 131/681 [05:34<23:10, 2.53s/it] {'loss': 0.9805, 'grad_norm': 28.770198822021484, 'learning_rate': 4.878433179298909e-07, 'fcm_dpo/beta': 0.008619595319032669, 'fcm_dpo/q_t': 0.3690122663974762, 'fcm_dpo/delta': -0.18765899538993835, 'fcm_dpo/margin': 66.88614654541016, 'margin_dpo/margin_mean': 66.88614654541016, 'margin_dpo/margin_std': 73.10858154296875, 'logps/chosen': -81.73116302490234, 'logps/rejected': -192.13671875, 'logps/ref_chosen': -44.924591064453125, 'logps/ref_rejected': -88.44401550292969, 'KL/chosen_KL_mean': -36.80657196044922, 'KL/rejected_KL_mean': -103.69271850585938, 'KL/mean': -70.24964904785156, 'KL/std': 66.65603637695312, 'logits/chosen': -0.498441219329834, 'logits/rejected': -0.5051707625389099, 'epoch': 0.19} + 19%|█▉ | 131/681 [05:34<23:10, 2.53s/it] 19%|█▉ | 132/681 [05:36<23:17, 2.55s/it] {'loss': 1.0886, 'grad_norm': 19.942279815673828, 'learning_rate': 4.874448683603694e-07, 'fcm_dpo/beta': 0.00844726525247097, 'fcm_dpo/q_t': 0.40113556385040283, 'fcm_dpo/delta': -0.06705770641565323, 'fcm_dpo/margin': 54.898109436035156, 'margin_dpo/margin_mean': 54.89811325073242, 'margin_dpo/margin_std': 86.42204284667969, 'logps/chosen': -107.89549255371094, 'logps/rejected': -191.68466186523438, 'logps/ref_chosen': -59.00108337402344, 'logps/ref_rejected': -87.89215087890625, 'KL/chosen_KL_mean': -48.89440155029297, 'KL/rejected_KL_mean': -103.79251098632812, 'KL/mean': -76.34346008300781, 'KL/std': 66.48584747314453, 'logits/chosen': -0.48032820224761963, 'logits/rejected': -0.47630518674850464, 'epoch': 0.19} + 19%|█▉ | 132/681 [05:36<23:17, 2.55s/it] 20%|█▉ | 133/681 [05:39<23:31, 2.58s/it] {'loss': 1.11, 'grad_norm': 27.46077537536621, 'learning_rate': 4.870401618977415e-07, 'fcm_dpo/beta': 0.008408504538238049, 'fcm_dpo/q_t': 0.41032248735427856, 'fcm_dpo/delta': -0.00830613262951374, 'fcm_dpo/margin': 48.5077018737793, 'margin_dpo/margin_mean': 48.50770568847656, 'margin_dpo/margin_std': 75.81759643554688, 'logps/chosen': -124.08689880371094, 'logps/rejected': -202.32366943359375, 'logps/ref_chosen': -66.60449981689453, 'logps/ref_rejected': -96.33355712890625, 'KL/chosen_KL_mean': -57.482398986816406, 'KL/rejected_KL_mean': -105.99009704589844, 'KL/mean': -81.73624420166016, 'KL/std': 59.815887451171875, 'logits/chosen': -0.5145904421806335, 'logits/rejected': -0.5010430812835693, 'epoch': 0.2} + 20%|█▉ | 133/681 [05:39<23:31, 2.58s/it] 20%|█▉ | 134/681 [05:41<22:54, 2.51s/it] {'loss': 1.0693, 'grad_norm': 18.916580200195312, 'learning_rate': 4.866292092063986e-07, 'fcm_dpo/beta': 0.008402526378631592, 'fcm_dpo/q_t': 0.4022940993309021, 'fcm_dpo/delta': -0.037258490920066833, 'fcm_dpo/margin': 51.83448028564453, 'margin_dpo/margin_mean': 51.83448028564453, 'margin_dpo/margin_std': 67.91160583496094, 'logps/chosen': -97.95437622070312, 'logps/rejected': -185.37411499023438, 'logps/ref_chosen': -52.06925582885742, 'logps/ref_rejected': -87.6545181274414, 'KL/chosen_KL_mean': -45.88512420654297, 'KL/rejected_KL_mean': -97.7196044921875, 'KL/mean': -71.80236053466797, 'KL/std': 58.62601852416992, 'logits/chosen': -0.4667087197303772, 'logits/rejected': -0.4526156187057495, 'epoch': 0.2} + 20%|█▉ | 134/681 [05:41<22:54, 2.51s/it] 20%|█▉ | 135/681 [05:44<22:44, 2.50s/it] {'loss': 0.9939, 'grad_norm': 22.08445167541504, 'learning_rate': 4.862120211153265e-07, 'fcm_dpo/beta': 0.008090103045105934, 'fcm_dpo/q_t': 0.3705596625804901, 'fcm_dpo/delta': -0.21815121173858643, 'fcm_dpo/margin': 74.76806640625, 'margin_dpo/margin_mean': 74.76806640625, 'margin_dpo/margin_std': 93.64501953125, 'logps/chosen': -100.58100128173828, 'logps/rejected': -240.9749755859375, 'logps/ref_chosen': -50.353858947753906, 'logps/ref_rejected': -115.97975158691406, 'KL/chosen_KL_mean': -50.227142333984375, 'KL/rejected_KL_mean': -124.99522399902344, 'KL/mean': -87.61117553710938, 'KL/std': 77.6145248413086, 'logits/chosen': -0.48219579458236694, 'logits/rejected': -0.5165150165557861, 'epoch': 0.2} + 20%|█▉ | 135/681 [05:44<22:44, 2.50s/it] 20%|█▉ | 136/681 [05:46<23:18, 2.57s/it] {'loss': 1.1443, 'grad_norm': 20.334075927734375, 'learning_rate': 4.857886086178193e-07, 'fcm_dpo/beta': 0.007961141876876354, 'fcm_dpo/q_t': 0.41935813426971436, 'fcm_dpo/delta': 0.005593650043010712, 'fcm_dpo/margin': 49.48542404174805, 'margin_dpo/margin_mean': 49.48542022705078, 'margin_dpo/margin_std': 90.12128448486328, 'logps/chosen': -125.0014877319336, 'logps/rejected': -205.73562622070312, 'logps/ref_chosen': -65.072509765625, 'logps/ref_rejected': -96.32122802734375, 'KL/chosen_KL_mean': -59.928977966308594, 'KL/rejected_KL_mean': -109.41439819335938, 'KL/mean': -84.67167663574219, 'KL/std': 70.4333724975586, 'logits/chosen': -0.481515109539032, 'logits/rejected': -0.4732978343963623, 'epoch': 0.2} + 20%|█▉ | 136/681 [05:47<23:18, 2.57s/it] 20%|██ | 137/681 [05:49<23:12, 2.56s/it] {'loss': 1.0251, 'grad_norm': 19.739362716674805, 'learning_rate': 4.853589828711902e-07, 'fcm_dpo/beta': 0.00774747971445322, 'fcm_dpo/q_t': 0.3767836093902588, 'fcm_dpo/delta': -0.21738505363464355, 'fcm_dpo/margin': 77.95802307128906, 'margin_dpo/margin_mean': 77.95802307128906, 'margin_dpo/margin_std': 111.74757385253906, 'logps/chosen': -106.31678771972656, 'logps/rejected': -249.37945556640625, 'logps/ref_chosen': -48.759117126464844, 'logps/ref_rejected': -113.86376953125, 'KL/chosen_KL_mean': -57.55767822265625, 'KL/rejected_KL_mean': -135.51568603515625, 'KL/mean': -96.53668212890625, 'KL/std': 93.2874984741211, 'logits/chosen': -0.42566192150115967, 'logits/rejected': -0.4528757333755493, 'epoch': 0.2} + 20%|██ | 137/681 [05:49<23:12, 2.56s/it] 20%|██ | 138/681 [05:51<22:29, 2.48s/it] {'loss': 1.0552, 'grad_norm': 21.295473098754883, 'learning_rate': 4.849231551964771e-07, 'fcm_dpo/beta': 0.007621297147125006, 'fcm_dpo/q_t': 0.3964976966381073, 'fcm_dpo/delta': -0.054680272936820984, 'fcm_dpo/margin': 59.33884811401367, 'margin_dpo/margin_mean': 59.33884811401367, 'margin_dpo/margin_std': 72.82606506347656, 'logps/chosen': -120.47132873535156, 'logps/rejected': -212.4874725341797, 'logps/ref_chosen': -60.519649505615234, 'logps/ref_rejected': -93.19694519042969, 'KL/chosen_KL_mean': -59.95167922973633, 'KL/rejected_KL_mean': -119.29052734375, 'KL/mean': -89.62110137939453, 'KL/std': 71.20696258544922, 'logits/chosen': -0.4297477602958679, 'logits/rejected': -0.41819727420806885, 'epoch': 0.2} + 20%|██ | 138/681 [05:51<22:29, 2.48s/it] 20%|██ | 139/681 [05:54<22:11, 2.46s/it] {'loss': 1.0198, 'grad_norm': 18.57466697692871, 'learning_rate': 4.844811370781446e-07, 'fcm_dpo/beta': 0.007455192506313324, 'fcm_dpo/q_t': 0.38338446617126465, 'fcm_dpo/delta': -0.13268427550792694, 'fcm_dpo/margin': 70.53996276855469, 'margin_dpo/margin_mean': 70.53996276855469, 'margin_dpo/margin_std': 87.36215209960938, 'logps/chosen': -97.11381530761719, 'logps/rejected': -200.49038696289062, 'logps/ref_chosen': -46.89138412475586, 'logps/ref_rejected': -79.72798156738281, 'KL/chosen_KL_mean': -50.222434997558594, 'KL/rejected_KL_mean': -120.76240539550781, 'KL/mean': -85.49241638183594, 'KL/std': 67.446044921875, 'logits/chosen': -0.44361281394958496, 'logits/rejected': -0.4341086149215698, 'epoch': 0.2} + 20%|██ | 139/681 [05:54<22:11, 2.46s/it] 21%|██ | 140/681 [05:56<21:53, 2.43s/it] {'loss': 1.0689, 'grad_norm': 21.425811767578125, 'learning_rate': 4.840329401637809e-07, 'fcm_dpo/beta': 0.007304832339286804, 'fcm_dpo/q_t': 0.39591526985168457, 'fcm_dpo/delta': -0.07196947187185287, 'fcm_dpo/margin': 64.14215850830078, 'margin_dpo/margin_mean': 64.14215850830078, 'margin_dpo/margin_std': 90.35142517089844, 'logps/chosen': -119.77159118652344, 'logps/rejected': -208.22314453125, 'logps/ref_chosen': -58.97471618652344, 'logps/ref_rejected': -83.28410339355469, 'KL/chosen_KL_mean': -60.796875, 'KL/rejected_KL_mean': -124.93904113769531, 'KL/mean': -92.86795806884766, 'KL/std': 75.12921905517578, 'logits/chosen': -0.4486401081085205, 'logits/rejected': -0.4356744587421417, 'epoch': 0.21} + 21%|██ | 140/681 [05:56<21:53, 2.43s/it] 21%|██ | 141/681 [05:59<22:20, 2.48s/it] {'loss': 1.1024, 'grad_norm': 27.03215980529785, 'learning_rate': 4.83578576263792e-07, 'fcm_dpo/beta': 0.007242698222398758, 'fcm_dpo/q_t': 0.4011520743370056, 'fcm_dpo/delta': -0.04263737052679062, 'fcm_dpo/margin': 60.85835266113281, 'margin_dpo/margin_mean': 60.85835266113281, 'margin_dpo/margin_std': 95.99069213867188, 'logps/chosen': -144.0777587890625, 'logps/rejected': -228.05270385742188, 'logps/ref_chosen': -75.07566833496094, 'logps/ref_rejected': -98.1922607421875, 'KL/chosen_KL_mean': -69.00209045410156, 'KL/rejected_KL_mean': -129.86044311523438, 'KL/mean': -99.4312744140625, 'KL/std': 83.01104736328125, 'logits/chosen': -0.4251963496208191, 'logits/rejected': -0.41217079758644104, 'epoch': 0.21} + 21%|██ | 141/681 [05:59<22:20, 2.48s/it] 21%|██ | 142/681 [06:01<22:58, 2.56s/it] {'loss': 1.0826, 'grad_norm': 26.979690551757812, 'learning_rate': 4.83118057351089e-07, 'fcm_dpo/beta': 0.007142849266529083, 'fcm_dpo/q_t': 0.3925698399543762, 'fcm_dpo/delta': -0.10244297236204147, 'fcm_dpo/margin': 69.63619995117188, 'margin_dpo/margin_mean': 69.63619995117188, 'margin_dpo/margin_std': 106.22422790527344, 'logps/chosen': -127.8824462890625, 'logps/rejected': -234.07293701171875, 'logps/ref_chosen': -58.027931213378906, 'logps/ref_rejected': -94.58222961425781, 'KL/chosen_KL_mean': -69.8545150756836, 'KL/rejected_KL_mean': -139.49070739746094, 'KL/mean': -104.672607421875, 'KL/std': 90.1983642578125, 'logits/chosen': -0.40249842405319214, 'logits/rejected': -0.40109604597091675, 'epoch': 0.21} + 21%|██ | 142/681 [06:01<22:58, 2.56s/it] 21%|██ | 143/681 [06:04<23:26, 2.61s/it] {'loss': 1.1961, 'grad_norm': 23.613080978393555, 'learning_rate': 4.826513955607734e-07, 'fcm_dpo/beta': 0.007136983796954155, 'fcm_dpo/q_t': 0.4322276711463928, 'fcm_dpo/delta': 0.08635500073432922, 'fcm_dpo/margin': 44.3311882019043, 'margin_dpo/margin_mean': 44.3311882019043, 'margin_dpo/margin_std': 92.59246826171875, 'logps/chosen': -131.70953369140625, 'logps/rejected': -197.44384765625, 'logps/ref_chosen': -57.59645080566406, 'logps/ref_rejected': -78.99957275390625, 'KL/chosen_KL_mean': -74.11309051513672, 'KL/rejected_KL_mean': -118.44427490234375, 'KL/mean': -96.2786865234375, 'KL/std': 79.39483642578125, 'logits/chosen': -0.38653671741485596, 'logits/rejected': -0.3791394829750061, 'epoch': 0.21} + 21%|██ | 143/681 [06:04<23:26, 2.61s/it] 21%|██ | 144/681 [06:07<23:38, 2.64s/it] {'loss': 1.1073, 'grad_norm': 21.00301170349121, 'learning_rate': 4.821786031898176e-07, 'fcm_dpo/beta': 0.007198760285973549, 'fcm_dpo/q_t': 0.4110908508300781, 'fcm_dpo/delta': 0.007602264638990164, 'fcm_dpo/margin': 54.54907989501953, 'margin_dpo/margin_mean': 54.54907989501953, 'margin_dpo/margin_std': 79.00935363769531, 'logps/chosen': -125.74029541015625, 'logps/rejected': -202.38327026367188, 'logps/ref_chosen': -59.90636444091797, 'logps/ref_rejected': -82.00025939941406, 'KL/chosen_KL_mean': -65.83393859863281, 'KL/rejected_KL_mean': -120.38301086425781, 'KL/mean': -93.10847473144531, 'KL/std': 67.2380142211914, 'logits/chosen': -0.38446202874183655, 'logits/rejected': -0.368974506855011, 'epoch': 0.21} + 21%|██ | 144/681 [06:07<23:38, 2.64s/it] 21%|██▏ | 145/681 [06:09<23:18, 2.61s/it] {'loss': 1.0876, 'grad_norm': 23.93907356262207, 'learning_rate': 4.816996926967401e-07, 'fcm_dpo/beta': 0.007178094238042831, 'fcm_dpo/q_t': 0.40436333417892456, 'fcm_dpo/delta': -0.02286495827138424, 'fcm_dpo/margin': 58.771915435791016, 'margin_dpo/margin_mean': 58.771915435791016, 'margin_dpo/margin_std': 81.85527038574219, 'logps/chosen': -119.10614013671875, 'logps/rejected': -199.14370727539062, 'logps/ref_chosen': -56.60066604614258, 'logps/ref_rejected': -77.86631774902344, 'KL/chosen_KL_mean': -62.50547409057617, 'KL/rejected_KL_mean': -121.27738952636719, 'KL/mean': -91.89143371582031, 'KL/std': 67.18325805664062, 'logits/chosen': -0.43855080008506775, 'logits/rejected': -0.42247945070266724, 'epoch': 0.21} + 21%|██▏ | 145/681 [06:09<23:18, 2.61s/it] 21%|██▏ | 146/681 [06:12<23:05, 2.59s/it] {'loss': 1.183, 'grad_norm': 26.226016998291016, 'learning_rate': 4.812146767012779e-07, 'fcm_dpo/beta': 0.007236181758344173, 'fcm_dpo/q_t': 0.4256265461444855, 'fcm_dpo/delta': 0.07508739829063416, 'fcm_dpo/margin': 45.2476806640625, 'margin_dpo/margin_mean': 45.2476806640625, 'margin_dpo/margin_std': 87.81689453125, 'logps/chosen': -151.4716796875, 'logps/rejected': -212.42169189453125, 'logps/ref_chosen': -66.00045013427734, 'logps/ref_rejected': -81.70278930664062, 'KL/chosen_KL_mean': -85.47122192382812, 'KL/rejected_KL_mean': -130.71890258789062, 'KL/mean': -108.09506225585938, 'KL/std': 72.80694580078125, 'logits/chosen': -0.3779621720314026, 'logits/rejected': -0.34679633378982544, 'epoch': 0.21} + 21%|██▏ | 146/681 [06:12<23:05, 2.59s/it] 22%|██▏ | 147/681 [06:15<23:03, 2.59s/it] {'loss': 1.0886, 'grad_norm': 19.40831184387207, 'learning_rate': 4.807235679840536e-07, 'fcm_dpo/beta': 0.007220801897346973, 'fcm_dpo/q_t': 0.4018627405166626, 'fcm_dpo/delta': -0.045540180057287216, 'fcm_dpo/margin': 61.40085983276367, 'margin_dpo/margin_mean': 61.40085983276367, 'margin_dpo/margin_std': 90.92599487304688, 'logps/chosen': -115.91423034667969, 'logps/rejected': -195.3002166748047, 'logps/ref_chosen': -53.405487060546875, 'logps/ref_rejected': -71.39060974121094, 'KL/chosen_KL_mean': -62.50874328613281, 'KL/rejected_KL_mean': -123.90959930419922, 'KL/mean': -93.20917510986328, 'KL/std': 73.37457275390625, 'logits/chosen': -0.44671040773391724, 'logits/rejected': -0.42650818824768066, 'epoch': 0.22} + 22%|██▏ | 147/681 [06:15<23:03, 2.59s/it] 22%|██▏ | 148/681 [06:17<22:58, 2.59s/it] {'loss': 1.1233, 'grad_norm': 19.22397804260254, 'learning_rate': 4.802263794862384e-07, 'fcm_dpo/beta': 0.007157785817980766, 'fcm_dpo/q_t': 0.4160994589328766, 'fcm_dpo/delta': -0.0840882733464241, 'fcm_dpo/margin': 52.805450439453125, 'margin_dpo/margin_mean': 52.80545425415039, 'margin_dpo/margin_std': 77.12681579589844, 'logps/chosen': -126.39234924316406, 'logps/rejected': -217.35455322265625, 'logps/ref_chosen': -64.93708038330078, 'logps/ref_rejected': -103.09384155273438, 'KL/chosen_KL_mean': -61.455265045166016, 'KL/rejected_KL_mean': -114.26071166992188, 'KL/mean': -87.85798645019531, 'KL/std': 73.19024658203125, 'logits/chosen': -0.47541412711143494, 'logits/rejected': -0.46777063608169556, 'epoch': 0.22} + 22%|██▏ | 148/681 [06:17<22:58, 2.59s/it] 22%|██▏ | 149/681 [06:20<23:12, 2.62s/it] {'loss': 1.0551, 'grad_norm': 18.177705764770508, 'learning_rate': 4.797231243092118e-07, 'fcm_dpo/beta': 0.00697628129273653, 'fcm_dpo/q_t': 0.3954726457595825, 'fcm_dpo/delta': -0.06137773394584656, 'fcm_dpo/margin': 65.48056030273438, 'margin_dpo/margin_mean': 65.48056030273438, 'margin_dpo/margin_std': 78.40389251708984, 'logps/chosen': -117.54315185546875, 'logps/rejected': -223.86468505859375, 'logps/ref_chosen': -58.47376251220703, 'logps/ref_rejected': -99.31474304199219, 'KL/chosen_KL_mean': -59.069393157958984, 'KL/rejected_KL_mean': -124.54994201660156, 'KL/mean': -91.8096694946289, 'KL/std': 66.26725006103516, 'logits/chosen': -0.4999982714653015, 'logits/rejected': -0.48564597964286804, 'epoch': 0.22} + 22%|██▏ | 149/681 [06:20<23:12, 2.62s/it] 22%|██▏ | 150/681 [06:22<23:06, 2.61s/it] {'loss': 1.0812, 'grad_norm': 18.062509536743164, 'learning_rate': 4.792138157142157e-07, 'fcm_dpo/beta': 0.006911845877766609, 'fcm_dpo/q_t': 0.40420806407928467, 'fcm_dpo/delta': -0.04800789803266525, 'fcm_dpo/margin': 64.3185043334961, 'margin_dpo/margin_mean': 64.3185043334961, 'margin_dpo/margin_std': 93.56321716308594, 'logps/chosen': -97.96762084960938, 'logps/rejected': -199.9279022216797, 'logps/ref_chosen': -45.705810546875, 'logps/ref_rejected': -83.34759521484375, 'KL/chosen_KL_mean': -52.26180648803711, 'KL/rejected_KL_mean': -116.58030700683594, 'KL/mean': -84.42105865478516, 'KL/std': 78.1861343383789, 'logits/chosen': -0.46106863021850586, 'logits/rejected': -0.4648742079734802, 'epoch': 0.22} + 22%|██▏ | 150/681 [06:22<23:06, 2.61s/it] 22%|██▏ | 151/681 [06:25<22:32, 2.55s/it] {'loss': 1.061, 'grad_norm': 20.905559539794922, 'learning_rate': 4.786984671220053e-07, 'fcm_dpo/beta': 0.006893502548336983, 'fcm_dpo/q_t': 0.3980643153190613, 'fcm_dpo/delta': -0.047122225165367126, 'fcm_dpo/margin': 64.55509185791016, 'margin_dpo/margin_mean': 64.55509185791016, 'margin_dpo/margin_std': 80.68389892578125, 'logps/chosen': -135.7144317626953, 'logps/rejected': -230.16250610351562, 'logps/ref_chosen': -70.57083129882812, 'logps/ref_rejected': -100.46382141113281, 'KL/chosen_KL_mean': -65.14360046386719, 'KL/rejected_KL_mean': -129.69869995117188, 'KL/mean': -97.421142578125, 'KL/std': 73.59419250488281, 'logits/chosen': -0.541815996170044, 'logits/rejected': -0.5158591866493225, 'epoch': 0.22} + 22%|██▏ | 151/681 [06:25<22:32, 2.55s/it] 22%|██▏ | 152/681 [06:27<22:47, 2.58s/it] {'loss': 1.0171, 'grad_norm': 19.958600997924805, 'learning_rate': 4.78177092112495e-07, 'fcm_dpo/beta': 0.006756227929145098, 'fcm_dpo/q_t': 0.38215482234954834, 'fcm_dpo/delta': -0.13709712028503418, 'fcm_dpo/margin': 78.45030212402344, 'margin_dpo/margin_mean': 78.45030212402344, 'margin_dpo/margin_std': 93.28910827636719, 'logps/chosen': -117.49854278564453, 'logps/rejected': -241.92491149902344, 'logps/ref_chosen': -60.16438674926758, 'logps/ref_rejected': -106.14045715332031, 'KL/chosen_KL_mean': -57.33415603637695, 'KL/rejected_KL_mean': -135.78445434570312, 'KL/mean': -96.5593032836914, 'KL/std': 76.12808990478516, 'logits/chosen': -0.48123008012771606, 'logits/rejected': -0.47946709394454956, 'epoch': 0.22} + 22%|██▏ | 152/681 [06:28<22:47, 2.58s/it] 22%|██▏ | 153/681 [06:30<22:44, 2.59s/it] {'loss': 1.0887, 'grad_norm': 15.512747764587402, 'learning_rate': 4.776497044244016e-07, 'fcm_dpo/beta': 0.006657836027443409, 'fcm_dpo/q_t': 0.403054416179657, 'fcm_dpo/delta': -0.04847495257854462, 'fcm_dpo/margin': 67.03783416748047, 'margin_dpo/margin_mean': 67.037841796875, 'margin_dpo/margin_std': 101.445068359375, 'logps/chosen': -114.07719421386719, 'logps/rejected': -210.45559692382812, 'logps/ref_chosen': -56.315277099609375, 'logps/ref_rejected': -85.65583801269531, 'KL/chosen_KL_mean': -57.76191711425781, 'KL/rejected_KL_mean': -124.79976654052734, 'KL/mean': -91.28083801269531, 'KL/std': 82.61441040039062, 'logits/chosen': -0.48859214782714844, 'logits/rejected': -0.48414355516433716, 'epoch': 0.22} + 22%|██▏ | 153/681 [06:30<22:44, 2.59s/it] 23%|██▎ | 154/681 [06:33<22:58, 2.62s/it] {'loss': 1.1201, 'grad_norm': 19.13957977294922, 'learning_rate': 4.771163179548808e-07, 'fcm_dpo/beta': 0.006629183888435364, 'fcm_dpo/q_t': 0.4066181182861328, 'fcm_dpo/delta': -0.026146577671170235, 'fcm_dpo/margin': 64.08331298828125, 'margin_dpo/margin_mean': 64.08331298828125, 'margin_dpo/margin_std': 105.86033630371094, 'logps/chosen': -132.29168701171875, 'logps/rejected': -237.87664794921875, 'logps/ref_chosen': -62.74256896972656, 'logps/ref_rejected': -104.24420166015625, 'KL/chosen_KL_mean': -69.54912567138672, 'KL/rejected_KL_mean': -133.6324462890625, 'KL/mean': -101.59078979492188, 'KL/std': 83.2306137084961, 'logits/chosen': -0.4480747580528259, 'logits/rejected': -0.4504152834415436, 'epoch': 0.23} + 23%|██▎ | 154/681 [06:33<22:58, 2.62s/it] 23%|██▎ | 155/681 [06:35<22:56, 2.62s/it] {'loss': 1.0942, 'grad_norm': 19.22228240966797, 'learning_rate': 4.7657694675916247e-07, 'fcm_dpo/beta': 0.006584943272173405, 'fcm_dpo/q_t': 0.4042484164237976, 'fcm_dpo/delta': -0.026932524517178535, 'fcm_dpo/margin': 64.64971923828125, 'margin_dpo/margin_mean': 64.64971923828125, 'margin_dpo/margin_std': 95.12773132324219, 'logps/chosen': -124.31242370605469, 'logps/rejected': -205.80116271972656, 'logps/ref_chosen': -60.65318298339844, 'logps/ref_rejected': -77.49220275878906, 'KL/chosen_KL_mean': -63.659236907958984, 'KL/rejected_KL_mean': -128.3089599609375, 'KL/mean': -95.98409271240234, 'KL/std': 76.96090698242188, 'logits/chosen': -0.49922215938568115, 'logits/rejected': -0.4822632670402527, 'epoch': 0.23} + 23%|██▎ | 155/681 [06:35<22:56, 2.62s/it] 23%|██▎ | 156/681 [06:38<22:57, 2.62s/it] {'loss': 1.2754, 'grad_norm': 29.14635467529297, 'learning_rate': 4.7603160505017893e-07, 'fcm_dpo/beta': 0.006626888178288937, 'fcm_dpo/q_t': 0.44431304931640625, 'fcm_dpo/delta': 0.05205275118350983, 'fcm_dpo/margin': 37.15288543701172, 'margin_dpo/margin_mean': 37.15288543701172, 'margin_dpo/margin_std': 108.07014465332031, 'logps/chosen': -158.67697143554688, 'logps/rejected': -203.50726318359375, 'logps/ref_chosen': -69.49188232421875, 'logps/ref_rejected': -77.16929626464844, 'KL/chosen_KL_mean': -89.18508911132812, 'KL/rejected_KL_mean': -126.33798217773438, 'KL/mean': -107.76153564453125, 'KL/std': 82.31591796875, 'logits/chosen': -0.4069097638130188, 'logits/rejected': -0.3994802236557007, 'epoch': 0.23} + 23%|██▎ | 156/681 [06:38<22:57, 2.62s/it] 23%|██▎ | 157/681 [06:40<22:18, 2.55s/it] {'loss': 1.0292, 'grad_norm': 23.95264434814453, 'learning_rate': 4.7548030719819154e-07, 'fcm_dpo/beta': 0.006456049624830484, 'fcm_dpo/q_t': 0.37898433208465576, 'fcm_dpo/delta': -0.13866297900676727, 'fcm_dpo/margin': 82.09356689453125, 'margin_dpo/margin_mean': 82.09357452392578, 'margin_dpo/margin_std': 101.68392944335938, 'logps/chosen': -141.8121795654297, 'logps/rejected': -270.1837158203125, 'logps/ref_chosen': -61.368438720703125, 'logps/ref_rejected': -107.64636993408203, 'KL/chosen_KL_mean': -80.44374084472656, 'KL/rejected_KL_mean': -162.53732299804688, 'KL/mean': -121.49053192138672, 'KL/std': 89.35894775390625, 'logits/chosen': -0.3959600329399109, 'logits/rejected': -0.4036720395088196, 'epoch': 0.23} + 23%|██▎ | 157/681 [06:40<22:18, 2.55s/it] 23%|██▎ | 158/681 [06:43<22:23, 2.57s/it] {'loss': 1.0552, 'grad_norm': 19.55266761779785, 'learning_rate': 4.7492306773041136e-07, 'fcm_dpo/beta': 0.006296713836491108, 'fcm_dpo/q_t': 0.3867127597332001, 'fcm_dpo/delta': -0.15908576548099518, 'fcm_dpo/margin': 87.40489959716797, 'margin_dpo/margin_mean': 87.4049072265625, 'margin_dpo/margin_std': 133.116943359375, 'logps/chosen': -138.39312744140625, 'logps/rejected': -281.8797912597656, 'logps/ref_chosen': -57.612918853759766, 'logps/ref_rejected': -113.6946792602539, 'KL/chosen_KL_mean': -80.78021240234375, 'KL/rejected_KL_mean': -168.18511962890625, 'KL/mean': -124.482666015625, 'KL/std': 110.5858154296875, 'logits/chosen': -0.33809971809387207, 'logits/rejected': -0.35502055287361145, 'epoch': 0.23} + 23%|██▎ | 158/681 [06:43<22:23, 2.57s/it] 23%|██▎ | 159/681 [06:46<22:34, 2.60s/it] {'loss': 1.1473, 'grad_norm': 22.071809768676758, 'learning_rate': 4.743599013306165e-07, 'fcm_dpo/beta': 0.006294050253927708, 'fcm_dpo/q_t': 0.41609764099121094, 'fcm_dpo/delta': 0.02400752529501915, 'fcm_dpo/margin': 59.80369567871094, 'margin_dpo/margin_mean': 59.80369567871094, 'margin_dpo/margin_std': 104.75639343261719, 'logps/chosen': -172.4268341064453, 'logps/rejected': -239.5688934326172, 'logps/ref_chosen': -81.56034851074219, 'logps/ref_rejected': -88.89871215820312, 'KL/chosen_KL_mean': -90.86648559570312, 'KL/rejected_KL_mean': -150.67018127441406, 'KL/mean': -120.76834106445312, 'KL/std': 97.11602783203125, 'logits/chosen': -0.4063527286052704, 'logits/rejected': -0.37675607204437256, 'epoch': 0.23} + 23%|██▎ | 159/681 [06:46<22:34, 2.60s/it] 23%|██▎ | 160/681 [06:48<22:17, 2.57s/it] {'loss': 1.0912, 'grad_norm': 23.225406646728516, 'learning_rate': 4.737908228387656e-07, 'fcm_dpo/beta': 0.006151704117655754, 'fcm_dpo/q_t': 0.3962337076663971, 'fcm_dpo/delta': -0.10115846991539001, 'fcm_dpo/margin': 80.51261901855469, 'margin_dpo/margin_mean': 80.51261901855469, 'margin_dpo/margin_std': 130.29788208007812, 'logps/chosen': -158.8489227294922, 'logps/rejected': -270.8484802246094, 'logps/ref_chosen': -65.73088073730469, 'logps/ref_rejected': -97.21781921386719, 'KL/chosen_KL_mean': -93.1180419921875, 'KL/rejected_KL_mean': -173.6306610107422, 'KL/mean': -133.37435913085938, 'KL/std': 104.18497467041016, 'logits/chosen': -0.3705775737762451, 'logits/rejected': -0.362305611371994, 'epoch': 0.23} + 23%|██▎ | 160/681 [06:48<22:17, 2.57s/it] 24%|██▎ | 161/681 [06:50<21:26, 2.47s/it] {'loss': 1.0935, 'grad_norm': 21.588083267211914, 'learning_rate': 4.7321584725060594e-07, 'fcm_dpo/beta': 0.00611657090485096, 'fcm_dpo/q_t': 0.4046275019645691, 'fcm_dpo/delta': -0.03301185369491577, 'fcm_dpo/margin': 70.55889129638672, 'margin_dpo/margin_mean': 70.55888366699219, 'margin_dpo/margin_std': 104.3523941040039, 'logps/chosen': -131.81849670410156, 'logps/rejected': -233.37185668945312, 'logps/ref_chosen': -52.43647003173828, 'logps/ref_rejected': -83.43095397949219, 'KL/chosen_KL_mean': -79.38202667236328, 'KL/rejected_KL_mean': -149.94090270996094, 'KL/mean': -114.66146850585938, 'KL/std': 82.19270324707031, 'logits/chosen': -0.3816624879837036, 'logits/rejected': -0.3820039629936218, 'epoch': 0.24} + 24%|██▎ | 161/681 [06:50<21:26, 2.47s/it] 24%|██▍ | 162/681 [06:53<22:06, 2.56s/it] {'loss': 1.109, 'grad_norm': 21.74049186706543, 'learning_rate': 4.7263498971727905e-07, 'fcm_dpo/beta': 0.0060338219627738, 'fcm_dpo/q_t': 0.407100111246109, 'fcm_dpo/delta': -0.02483561635017395, 'fcm_dpo/margin': 69.96736145019531, 'margin_dpo/margin_mean': 69.96736145019531, 'margin_dpo/margin_std': 108.229248046875, 'logps/chosen': -138.9389190673828, 'logps/rejected': -235.686279296875, 'logps/ref_chosen': -62.6105842590332, 'logps/ref_rejected': -89.39057922363281, 'KL/chosen_KL_mean': -76.32833862304688, 'KL/rejected_KL_mean': -146.29568481445312, 'KL/mean': -111.31201171875, 'KL/std': 91.16246032714844, 'logits/chosen': -0.4195418953895569, 'logits/rejected': -0.4026295840740204, 'epoch': 0.24} + 24%|██▍ | 162/681 [06:53<22:06, 2.56s/it] 24%|██▍ | 163/681 [06:56<22:00, 2.55s/it] {'loss': 1.1152, 'grad_norm': 21.437828063964844, 'learning_rate': 4.720482655449212e-07, 'fcm_dpo/beta': 0.006065480876713991, 'fcm_dpo/q_t': 0.4095137119293213, 'fcm_dpo/delta': -0.015123652294278145, 'fcm_dpo/margin': 68.33741760253906, 'margin_dpo/margin_mean': 68.33741760253906, 'margin_dpo/margin_std': 110.58999633789062, 'logps/chosen': -140.94549560546875, 'logps/rejected': -229.67950439453125, 'logps/ref_chosen': -55.021629333496094, 'logps/ref_rejected': -75.418212890625, 'KL/chosen_KL_mean': -85.92386627197266, 'KL/rejected_KL_mean': -154.26129150390625, 'KL/mean': -120.09257507324219, 'KL/std': 91.9381103515625, 'logits/chosen': -0.3672639727592468, 'logits/rejected': -0.3495738208293915, 'epoch': 0.24} + 24%|██▍ | 163/681 [06:56<22:00, 2.55s/it] 24%|██▍ | 164/681 [06:58<21:53, 2.54s/it] {'loss': 1.035, 'grad_norm': 21.113449096679688, 'learning_rate': 4.714556901942599e-07, 'fcm_dpo/beta': 0.0059239305555820465, 'fcm_dpo/q_t': 0.3878824710845947, 'fcm_dpo/delta': -0.10423934459686279, 'fcm_dpo/margin': 83.9617919921875, 'margin_dpo/margin_mean': 83.9617919921875, 'margin_dpo/margin_std': 102.90313720703125, 'logps/chosen': -133.02481079101562, 'logps/rejected': -241.01055908203125, 'logps/ref_chosen': -55.64066696166992, 'logps/ref_rejected': -79.66463470458984, 'KL/chosen_KL_mean': -77.3841323852539, 'KL/rejected_KL_mean': -161.34591674804688, 'KL/mean': -119.36503601074219, 'KL/std': 89.17874908447266, 'logits/chosen': -0.3516240119934082, 'logits/rejected': -0.33663517236709595, 'epoch': 0.24} + 24%|██▍ | 164/681 [06:58<21:53, 2.54s/it] 24%|██▍ | 165/681 [07:01<21:50, 2.54s/it] {'loss': 1.1749, 'grad_norm': 23.085264205932617, 'learning_rate': 4.708572792802069e-07, 'fcm_dpo/beta': 0.005989417899399996, 'fcm_dpo/q_t': 0.42767125368118286, 'fcm_dpo/delta': 0.08000632375478745, 'fcm_dpo/margin': 53.86854553222656, 'margin_dpo/margin_mean': 53.86854553222656, 'margin_dpo/margin_std': 100.26142883300781, 'logps/chosen': -145.2901611328125, 'logps/rejected': -211.51861572265625, 'logps/ref_chosen': -61.310691833496094, 'logps/ref_rejected': -73.67060852050781, 'KL/chosen_KL_mean': -83.97947692871094, 'KL/rejected_KL_mean': -137.8480224609375, 'KL/mean': -110.91374969482422, 'KL/std': 75.39066314697266, 'logits/chosen': -0.3876940608024597, 'logits/rejected': -0.36072492599487305, 'epoch': 0.24} + 24%|██▍ | 165/681 [07:01<21:50, 2.54s/it] 24%|██▍ | 166/681 [07:03<20:57, 2.44s/it] {'loss': 1.0185, 'grad_norm': 17.283048629760742, 'learning_rate': 4.702530485714461e-07, 'fcm_dpo/beta': 0.0058315591886639595, 'fcm_dpo/q_t': 0.3807521462440491, 'fcm_dpo/delta': -0.19435712695121765, 'fcm_dpo/margin': 99.76808166503906, 'margin_dpo/margin_mean': 99.76808166503906, 'margin_dpo/margin_std': 138.61410522460938, 'logps/chosen': -124.79261779785156, 'logps/rejected': -271.6722106933594, 'logps/ref_chosen': -50.98360061645508, 'logps/ref_rejected': -98.09512329101562, 'KL/chosen_KL_mean': -73.80902099609375, 'KL/rejected_KL_mean': -173.57708740234375, 'KL/mean': -123.69305419921875, 'KL/std': 109.76763916015625, 'logits/chosen': -0.36310431361198425, 'logits/rejected': -0.37374886870384216, 'epoch': 0.24} + 24%|██▍ | 166/681 [07:03<20:57, 2.44s/it] 25%|██▍ | 167/681 [07:06<21:20, 2.49s/it] {'loss': 0.9744, 'grad_norm': 21.618406295776367, 'learning_rate': 4.6964301399001877e-07, 'fcm_dpo/beta': 0.005625586491078138, 'fcm_dpo/q_t': 0.36813193559646606, 'fcm_dpo/delta': -0.20304620265960693, 'fcm_dpo/margin': 105.04924011230469, 'margin_dpo/margin_mean': 105.04924011230469, 'margin_dpo/margin_std': 115.94286346435547, 'logps/chosen': -125.45037841796875, 'logps/rejected': -276.10595703125, 'logps/ref_chosen': -50.424095153808594, 'logps/ref_rejected': -96.03042602539062, 'KL/chosen_KL_mean': -75.02628326416016, 'KL/rejected_KL_mean': -180.07553100585938, 'KL/mean': -127.5509033203125, 'KL/std': 100.14985656738281, 'logits/chosen': -0.3545036017894745, 'logits/rejected': -0.35761505365371704, 'epoch': 0.25} + 25%|██▍ | 167/681 [07:06<21:20, 2.49s/it] 25%|██▍ | 168/681 [07:08<21:33, 2.52s/it] {'loss': 1.079, 'grad_norm': 19.52683448791504, 'learning_rate': 4.690271916109034e-07, 'fcm_dpo/beta': 0.0055332607589662075, 'fcm_dpo/q_t': 0.40392887592315674, 'fcm_dpo/delta': -0.029504312202334404, 'fcm_dpo/margin': 77.36842346191406, 'margin_dpo/margin_mean': 77.36842346191406, 'margin_dpo/margin_std': 104.69574737548828, 'logps/chosen': -130.7799072265625, 'logps/rejected': -233.99404907226562, 'logps/ref_chosen': -49.462825775146484, 'logps/ref_rejected': -75.30855560302734, 'KL/chosen_KL_mean': -81.31709289550781, 'KL/rejected_KL_mean': -158.6855010986328, 'KL/mean': -120.00129699707031, 'KL/std': 93.61595153808594, 'logits/chosen': -0.349258691072464, 'logits/rejected': -0.339669793844223, 'epoch': 0.25} + 25%|██▍ | 168/681 [07:08<21:33, 2.52s/it] 25%|██▍ | 169/681 [07:11<22:04, 2.59s/it] {'loss': 1.162, 'grad_norm': 20.000539779663086, 'learning_rate': 4.6840559766159235e-07, 'fcm_dpo/beta': 0.005457356106489897, 'fcm_dpo/q_t': 0.420589804649353, 'fcm_dpo/delta': -0.07184266299009323, 'fcm_dpo/margin': 67.25149536132812, 'margin_dpo/margin_mean': 67.25149536132812, 'margin_dpo/margin_std': 126.57770538330078, 'logps/chosen': -143.72865295410156, 'logps/rejected': -234.5224609375, 'logps/ref_chosen': -59.803443908691406, 'logps/ref_rejected': -83.34574890136719, 'KL/chosen_KL_mean': -83.92520904541016, 'KL/rejected_KL_mean': -151.1767120361328, 'KL/mean': -117.55094909667969, 'KL/std': 92.2286605834961, 'logits/chosen': -0.3642885386943817, 'logits/rejected': -0.34793075919151306, 'epoch': 0.25} + 25%|██▍ | 169/681 [07:11<22:04, 2.59s/it] 25%|██▍ | 170/681 [07:14<22:31, 2.64s/it] {'loss': 1.0798, 'grad_norm': 17.664331436157227, 'learning_rate': 4.6777824852166437e-07, 'fcm_dpo/beta': 0.005398896988481283, 'fcm_dpo/q_t': 0.4014880359172821, 'fcm_dpo/delta': -0.03206340968608856, 'fcm_dpo/margin': 79.6080093383789, 'margin_dpo/margin_mean': 79.6080093383789, 'margin_dpo/margin_std': 105.32583618164062, 'logps/chosen': -124.42909240722656, 'logps/rejected': -230.482666015625, 'logps/ref_chosen': -49.471771240234375, 'logps/ref_rejected': -75.91734313964844, 'KL/chosen_KL_mean': -74.95732116699219, 'KL/rejected_KL_mean': -154.56533813476562, 'KL/mean': -114.7613296508789, 'KL/std': 88.03938293457031, 'logits/chosen': -0.298395574092865, 'logits/rejected': -0.2869154214859009, 'epoch': 0.25} + 25%|██▍ | 170/681 [07:14<22:31, 2.64s/it] 25%|██▌ | 171/681 [07:16<21:39, 2.55s/it] {'loss': 1.1816, 'grad_norm': 28.27412223815918, 'learning_rate': 4.6714516072235273e-07, 'fcm_dpo/beta': 0.005459581036120653, 'fcm_dpo/q_t': 0.4263428747653961, 'fcm_dpo/delta': 0.051255661994218826, 'fcm_dpo/margin': 64.2116470336914, 'margin_dpo/margin_mean': 64.2116470336914, 'margin_dpo/margin_std': 133.7387237548828, 'logps/chosen': -194.8042755126953, 'logps/rejected': -283.89874267578125, 'logps/ref_chosen': -84.49931335449219, 'logps/ref_rejected': -109.38209533691406, 'KL/chosen_KL_mean': -110.30496215820312, 'KL/rejected_KL_mean': -174.51663208007812, 'KL/mean': -142.41079711914062, 'KL/std': 103.6309585571289, 'logits/chosen': -0.3548741340637207, 'logits/rejected': -0.3387761116027832, 'epoch': 0.25} + 25%|██▌ | 171/681 [07:16<21:39, 2.55s/it] 25%|██▌ | 172/681 [07:18<21:23, 2.52s/it] {'loss': 1.132, 'grad_norm': 18.535226821899414, 'learning_rate': 4.6650635094610966e-07, 'fcm_dpo/beta': 0.005491352174431086, 'fcm_dpo/q_t': 0.41575637459754944, 'fcm_dpo/delta': 0.02349797450006008, 'fcm_dpo/margin': 68.72428131103516, 'margin_dpo/margin_mean': 68.72427368164062, 'margin_dpo/margin_std': 113.38480377197266, 'logps/chosen': -164.5863037109375, 'logps/rejected': -250.0933380126953, 'logps/ref_chosen': -68.65391540527344, 'logps/ref_rejected': -85.43667602539062, 'KL/chosen_KL_mean': -95.93238830566406, 'KL/rejected_KL_mean': -164.65667724609375, 'KL/mean': -130.29452514648438, 'KL/std': 100.20172882080078, 'logits/chosen': -0.3598722219467163, 'logits/rejected': -0.3389941453933716, 'epoch': 0.25} + 25%|██▌ | 172/681 [07:19<21:23, 2.52s/it] 25%|██▌ | 173/681 [07:21<21:16, 2.51s/it] {'loss': 1.1112, 'grad_norm': 20.111751556396484, 'learning_rate': 4.6586183602616687e-07, 'fcm_dpo/beta': 0.005545733496546745, 'fcm_dpo/q_t': 0.4147086441516876, 'fcm_dpo/delta': 0.025776570662856102, 'fcm_dpo/margin': 67.58930969238281, 'margin_dpo/margin_mean': 67.58930969238281, 'margin_dpo/margin_std': 95.4912109375, 'logps/chosen': -151.28118896484375, 'logps/rejected': -234.5035400390625, 'logps/ref_chosen': -63.050880432128906, 'logps/ref_rejected': -78.68392181396484, 'KL/chosen_KL_mean': -88.23031616210938, 'KL/rejected_KL_mean': -155.81961059570312, 'KL/mean': -122.02497100830078, 'KL/std': 92.91819763183594, 'logits/chosen': -0.3795207440853119, 'logits/rejected': -0.3491283059120178, 'epoch': 0.25} + 25%|██▌ | 173/681 [07:21<21:16, 2.51s/it] 26%|██▌ | 174/681 [07:24<21:18, 2.52s/it] {'loss': 1.0882, 'grad_norm': 28.604568481445312, 'learning_rate': 4.652116329460919e-07, 'fcm_dpo/beta': 0.005529084708541632, 'fcm_dpo/q_t': 0.402817964553833, 'fcm_dpo/delta': -0.04399598762392998, 'fcm_dpo/margin': 79.87288665771484, 'margin_dpo/margin_mean': 79.87288665771484, 'margin_dpo/margin_std': 115.7405014038086, 'logps/chosen': -136.24945068359375, 'logps/rejected': -264.67059326171875, 'logps/ref_chosen': -53.36296844482422, 'logps/ref_rejected': -101.91120910644531, 'KL/chosen_KL_mean': -82.88648223876953, 'KL/rejected_KL_mean': -162.75936889648438, 'KL/mean': -122.82292175292969, 'KL/std': 97.02500915527344, 'logits/chosen': -0.30759066343307495, 'logits/rejected': -0.3249150216579437, 'epoch': 0.26} + 26%|██▌ | 174/681 [07:24<21:18, 2.52s/it] 26%|██▌ | 175/681 [07:26<21:32, 2.55s/it] {'loss': 0.9593, 'grad_norm': 29.169300079345703, 'learning_rate': 4.645557588393406e-07, 'fcm_dpo/beta': 0.005318961106240749, 'fcm_dpo/q_t': 0.36687812209129333, 'fcm_dpo/delta': -0.1964312642812729, 'fcm_dpo/margin': 109.91595458984375, 'margin_dpo/margin_mean': 109.91596221923828, 'margin_dpo/margin_std': 109.20188903808594, 'logps/chosen': -121.88074493408203, 'logps/rejected': -275.884765625, 'logps/ref_chosen': -45.417762756347656, 'logps/ref_rejected': -89.50579833984375, 'KL/chosen_KL_mean': -76.46298217773438, 'KL/rejected_KL_mean': -186.37893676757812, 'KL/mean': -131.42095947265625, 'KL/std': 104.40403747558594, 'logits/chosen': -0.32927554845809937, 'logits/rejected': -0.31611427664756775, 'epoch': 0.26} + 26%|██▌ | 175/681 [07:26<21:32, 2.55s/it] 26%|██▌ | 176/681 [07:28<20:49, 2.47s/it] {'loss': 1.0499, 'grad_norm': 20.01445770263672, 'learning_rate': 4.638942309888058e-07, 'fcm_dpo/beta': 0.0052184974774718285, 'fcm_dpo/q_t': 0.394927978515625, 'fcm_dpo/delta': -0.08138823509216309, 'fcm_dpo/margin': 91.51618957519531, 'margin_dpo/margin_mean': 91.51618957519531, 'margin_dpo/margin_std': 118.59428405761719, 'logps/chosen': -132.0542755126953, 'logps/rejected': -268.67657470703125, 'logps/ref_chosen': -50.452842712402344, 'logps/ref_rejected': -95.5589599609375, 'KL/chosen_KL_mean': -81.6014404296875, 'KL/rejected_KL_mean': -173.11761474609375, 'KL/mean': -127.35952758789062, 'KL/std': 102.09504699707031, 'logits/chosen': -0.28535836935043335, 'logits/rejected': -0.3025384843349457, 'epoch': 0.26} + 26%|██▌ | 176/681 [07:28<20:49, 2.47s/it] 26%|██▌ | 177/681 [07:31<21:09, 2.52s/it] {'loss': 1.0495, 'grad_norm': 27.786762237548828, 'learning_rate': 4.6322706682636137e-07, 'fcm_dpo/beta': 0.005144456867128611, 'fcm_dpo/q_t': 0.3949311375617981, 'fcm_dpo/delta': -0.07641495764255524, 'fcm_dpo/margin': 91.91362762451172, 'margin_dpo/margin_mean': 91.91362762451172, 'margin_dpo/margin_std': 118.17066955566406, 'logps/chosen': -156.07052612304688, 'logps/rejected': -282.6614685058594, 'logps/ref_chosen': -61.216468811035156, 'logps/ref_rejected': -95.89378356933594, 'KL/chosen_KL_mean': -94.85406494140625, 'KL/rejected_KL_mean': -186.76768493652344, 'KL/mean': -140.81088256835938, 'KL/std': 111.390869140625, 'logits/chosen': -0.3724118173122406, 'logits/rejected': -0.364002525806427, 'epoch': 0.26} + 26%|██▌ | 177/681 [07:31<21:09, 2.52s/it] 26%|██▌ | 178/681 [07:34<21:01, 2.51s/it] {'loss': 1.0002, 'grad_norm': 27.795106887817383, 'learning_rate': 4.6255428393240354e-07, 'fcm_dpo/beta': 0.004952050745487213, 'fcm_dpo/q_t': 0.37578919529914856, 'fcm_dpo/delta': -0.18411573767662048, 'fcm_dpo/margin': 115.63043975830078, 'margin_dpo/margin_mean': 115.63044738769531, 'margin_dpo/margin_std': 143.41700744628906, 'logps/chosen': -162.78466796875, 'logps/rejected': -325.5156555175781, 'logps/ref_chosen': -58.26478958129883, 'logps/ref_rejected': -105.3653335571289, 'KL/chosen_KL_mean': -104.51988220214844, 'KL/rejected_KL_mean': -220.1503143310547, 'KL/mean': -162.33509826660156, 'KL/std': 131.26687622070312, 'logits/chosen': -0.2641046941280365, 'logits/rejected': -0.2551937997341156, 'epoch': 0.26} + 26%|██▌ | 178/681 [07:34<21:01, 2.51s/it] 26%|██▋ | 179/681 [07:36<21:30, 2.57s/it] {'loss': 1.1404, 'grad_norm': 34.54417419433594, 'learning_rate': 4.6187590003538724e-07, 'fcm_dpo/beta': 0.004905564710497856, 'fcm_dpo/q_t': 0.41320013999938965, 'fcm_dpo/delta': 0.006664544343948364, 'fcm_dpo/margin': 80.14815521240234, 'margin_dpo/margin_mean': 80.14815521240234, 'margin_dpo/margin_std': 139.17221069335938, 'logps/chosen': -170.57879638671875, 'logps/rejected': -280.19647216796875, 'logps/ref_chosen': -61.05832290649414, 'logps/ref_rejected': -90.52782440185547, 'KL/chosen_KL_mean': -109.52047729492188, 'KL/rejected_KL_mean': -189.66867065429688, 'KL/mean': -149.59457397460938, 'KL/std': 112.11015319824219, 'logits/chosen': -0.30266761779785156, 'logits/rejected': -0.3117542266845703, 'epoch': 0.26} + 26%|██▋ | 179/681 [07:36<21:30, 2.57s/it] 26%|██▋ | 180/681 [07:39<21:03, 2.52s/it] {'loss': 1.0305, 'grad_norm': 19.25888442993164, 'learning_rate': 4.611919330113591e-07, 'fcm_dpo/beta': 0.004845252260565758, 'fcm_dpo/q_t': 0.3857002854347229, 'fcm_dpo/delta': -0.10879069566726685, 'fcm_dpo/margin': 103.84219360351562, 'margin_dpo/margin_mean': 103.84219360351562, 'margin_dpo/margin_std': 126.80170440673828, 'logps/chosen': -149.2576904296875, 'logps/rejected': -296.968994140625, 'logps/ref_chosen': -54.34272003173828, 'logps/ref_rejected': -98.21183776855469, 'KL/chosen_KL_mean': -94.91496276855469, 'KL/rejected_KL_mean': -198.7571563720703, 'KL/mean': -146.8360595703125, 'KL/std': 101.62055969238281, 'logits/chosen': -0.29693859815597534, 'logits/rejected': -0.29173195362091064, 'epoch': 0.26} + 26%|██▋ | 180/681 [07:39<21:03, 2.52s/it] 27%|██▋ | 181/681 [07:41<21:14, 2.55s/it] {'loss': 1.1713, 'grad_norm': 20.40754508972168, 'learning_rate': 4.605024008834863e-07, 'fcm_dpo/beta': 0.004894108511507511, 'fcm_dpo/q_t': 0.4280344247817993, 'fcm_dpo/delta': 0.08573634922504425, 'fcm_dpo/margin': 64.73393249511719, 'margin_dpo/margin_mean': 64.73393249511719, 'margin_dpo/margin_std': 117.01361083984375, 'logps/chosen': -138.34690856933594, 'logps/rejected': -209.73655700683594, 'logps/ref_chosen': -55.000457763671875, 'logps/ref_rejected': -61.656166076660156, 'KL/chosen_KL_mean': -83.34645080566406, 'KL/rejected_KL_mean': -148.08038330078125, 'KL/mean': -115.71342468261719, 'KL/std': 93.42445373535156, 'logits/chosen': -0.3203880190849304, 'logits/rejected': -0.2962578535079956, 'epoch': 0.27} + 27%|██▋ | 181/681 [07:41<21:14, 2.55s/it] 27%|██▋ | 182/681 [07:44<21:31, 2.59s/it] {'loss': 1.0117, 'grad_norm': 18.048755645751953, 'learning_rate': 4.598073218215817e-07, 'fcm_dpo/beta': 0.004775552079081535, 'fcm_dpo/q_t': 0.37630826234817505, 'fcm_dpo/delta': -0.15708649158477783, 'fcm_dpo/margin': 114.658935546875, 'margin_dpo/margin_mean': 114.658935546875, 'margin_dpo/margin_std': 138.28912353515625, 'logps/chosen': -120.32955932617188, 'logps/rejected': -283.4022216796875, 'logps/ref_chosen': -41.107852935791016, 'logps/ref_rejected': -89.5215835571289, 'KL/chosen_KL_mean': -79.22171020507812, 'KL/rejected_KL_mean': -193.88064575195312, 'KL/mean': -136.55117797851562, 'KL/std': 114.58843994140625, 'logits/chosen': -0.2832631766796112, 'logits/rejected': -0.29323720932006836, 'epoch': 0.27} + 27%|██▋ | 182/681 [07:44<21:31, 2.59s/it] 27%|██▋ | 183/681 [07:46<20:52, 2.51s/it] {'loss': 1.1828, 'grad_norm': 21.687788009643555, 'learning_rate': 4.5910671414162484e-07, 'fcm_dpo/beta': 0.004714460577815771, 'fcm_dpo/q_t': 0.4325304627418518, 'fcm_dpo/delta': -0.04429354518651962, 'fcm_dpo/margin': 61.56635665893555, 'margin_dpo/margin_mean': 61.56635665893555, 'margin_dpo/margin_std': 104.21000671386719, 'logps/chosen': -174.328369140625, 'logps/rejected': -254.34588623046875, 'logps/ref_chosen': -57.52456283569336, 'logps/ref_rejected': -75.97572326660156, 'KL/chosen_KL_mean': -116.80380249023438, 'KL/rejected_KL_mean': -178.3701629638672, 'KL/mean': -147.58697509765625, 'KL/std': 93.99075317382812, 'logits/chosen': -0.29588770866394043, 'logits/rejected': -0.28640466928482056, 'epoch': 0.27} + 27%|██▋ | 183/681 [07:46<20:52, 2.51s/it] 27%|██▋ | 184/681 [07:49<21:22, 2.58s/it] {'loss': 1.1687, 'grad_norm': 18.115541458129883, 'learning_rate': 4.5840059630527985e-07, 'fcm_dpo/beta': 0.0047124335542321205, 'fcm_dpo/q_t': 0.4299464225769043, 'fcm_dpo/delta': -0.004301935900002718, 'fcm_dpo/margin': 65.50109100341797, 'margin_dpo/margin_mean': 65.50109100341797, 'margin_dpo/margin_std': 111.95549011230469, 'logps/chosen': -154.51901245117188, 'logps/rejected': -238.1092071533203, 'logps/ref_chosen': -58.544952392578125, 'logps/ref_rejected': -76.63406372070312, 'KL/chosen_KL_mean': -95.97406005859375, 'KL/rejected_KL_mean': -161.4751434326172, 'KL/mean': -128.724609375, 'KL/std': 88.82809448242188, 'logits/chosen': -0.3457328975200653, 'logits/rejected': -0.33615928888320923, 'epoch': 0.27} + 27%|██▋ | 184/681 [07:49<21:22, 2.58s/it] 27%|██▋ | 185/681 [07:51<20:58, 2.54s/it] {'loss': 1.2341, 'grad_norm': 20.880599975585938, 'learning_rate': 4.5768898691940836e-07, 'fcm_dpo/beta': 0.0048194690607488155, 'fcm_dpo/q_t': 0.44680285453796387, 'fcm_dpo/delta': 0.15635941922664642, 'fcm_dpo/margin': 51.37147521972656, 'margin_dpo/margin_mean': 51.37147521972656, 'margin_dpo/margin_std': 122.99656677246094, 'logps/chosen': -166.65792846679688, 'logps/rejected': -229.76614379882812, 'logps/ref_chosen': -62.025848388671875, 'logps/ref_rejected': -73.7625961303711, 'KL/chosen_KL_mean': -104.63207244873047, 'KL/rejected_KL_mean': -156.0035400390625, 'KL/mean': -130.31781005859375, 'KL/std': 102.3460693359375, 'logits/chosen': -0.3066332936286926, 'logits/rejected': -0.2832027077674866, 'epoch': 0.27} + 27%|██▋ | 185/681 [07:52<20:58, 2.54s/it] 27%|██▋ | 186/681 [07:54<20:53, 2.53s/it] {'loss': 1.045, 'grad_norm': 26.266706466674805, 'learning_rate': 4.5697190473557947e-07, 'fcm_dpo/beta': 0.004802432842552662, 'fcm_dpo/q_t': 0.393841028213501, 'fcm_dpo/delta': -0.07216604053974152, 'fcm_dpo/margin': 97.591064453125, 'margin_dpo/margin_mean': 97.591064453125, 'margin_dpo/margin_std': 118.66375732421875, 'logps/chosen': -165.03244018554688, 'logps/rejected': -281.34246826171875, 'logps/ref_chosen': -69.35346984863281, 'logps/ref_rejected': -88.07244873046875, 'KL/chosen_KL_mean': -95.6789779663086, 'KL/rejected_KL_mean': -193.27001953125, 'KL/mean': -144.47451782226562, 'KL/std': 103.38729858398438, 'logits/chosen': -0.3480488061904907, 'logits/rejected': -0.32328087091445923, 'epoch': 0.27} + 27%|██▋ | 186/681 [07:54<20:53, 2.53s/it] 27%|██▋ | 187/681 [07:56<20:16, 2.46s/it] {'loss': 1.0959, 'grad_norm': 22.043073654174805, 'learning_rate': 4.5624936864957555e-07, 'fcm_dpo/beta': 0.004818159155547619, 'fcm_dpo/q_t': 0.41053086519241333, 'fcm_dpo/delta': 0.010491464287042618, 'fcm_dpo/margin': 80.8456802368164, 'margin_dpo/margin_mean': 80.8456802368164, 'margin_dpo/margin_std': 105.46454620361328, 'logps/chosen': -141.10153198242188, 'logps/rejected': -251.15985107421875, 'logps/ref_chosen': -52.7564582824707, 'logps/ref_rejected': -81.96910095214844, 'KL/chosen_KL_mean': -88.34507751464844, 'KL/rejected_KL_mean': -169.19076538085938, 'KL/mean': -128.76791381835938, 'KL/std': 97.03087615966797, 'logits/chosen': -0.3333667516708374, 'logits/rejected': -0.3270256221294403, 'epoch': 0.27} + 27%|██▋ | 187/681 [07:56<20:16, 2.46s/it] 28%|██▊ | 188/681 [07:59<20:29, 2.49s/it] {'loss': 1.0479, 'grad_norm': 28.16905975341797, 'learning_rate': 4.5552139770089454e-07, 'fcm_dpo/beta': 0.004757707007229328, 'fcm_dpo/q_t': 0.3954910933971405, 'fcm_dpo/delta': -0.06513302028179169, 'fcm_dpo/margin': 97.13882446289062, 'margin_dpo/margin_mean': 97.13883209228516, 'margin_dpo/margin_std': 117.88801574707031, 'logps/chosen': -132.64480590820312, 'logps/rejected': -269.9085693359375, 'logps/ref_chosen': -49.415489196777344, 'logps/ref_rejected': -89.54043579101562, 'KL/chosen_KL_mean': -83.22930908203125, 'KL/rejected_KL_mean': -180.36813354492188, 'KL/mean': -131.79873657226562, 'KL/std': 107.7387466430664, 'logits/chosen': -0.3342798352241516, 'logits/rejected': -0.3404528498649597, 'epoch': 0.28} + 28%|██▊ | 188/681 [07:59<20:29, 2.49s/it] 28%|██▊ | 189/681 [08:01<20:14, 2.47s/it] {'loss': 1.1258, 'grad_norm': 23.41521644592285, 'learning_rate': 4.5478801107224794e-07, 'fcm_dpo/beta': 0.004754100926220417, 'fcm_dpo/q_t': 0.41550976037979126, 'fcm_dpo/delta': 0.015977924689650536, 'fcm_dpo/margin': 80.89956665039062, 'margin_dpo/margin_mean': 80.89956665039062, 'margin_dpo/margin_std': 133.14503479003906, 'logps/chosen': -148.7410125732422, 'logps/rejected': -249.40896606445312, 'logps/ref_chosen': -52.39896011352539, 'logps/ref_rejected': -72.16735076904297, 'KL/chosen_KL_mean': -96.34205627441406, 'KL/rejected_KL_mean': -177.24160766601562, 'KL/mean': -136.79183959960938, 'KL/std': 108.5394287109375, 'logits/chosen': -0.3520697355270386, 'logits/rejected': -0.3348464369773865, 'epoch': 0.28} + 28%|██▊ | 189/681 [08:01<20:14, 2.47s/it] 28%|██▊ | 190/681 [08:04<19:41, 2.41s/it] {'loss': 1.0822, 'grad_norm': 18.363422393798828, 'learning_rate': 4.5404922808905543e-07, 'fcm_dpo/beta': 0.004754353780299425, 'fcm_dpo/q_t': 0.39939507842063904, 'fcm_dpo/delta': -0.05636203661561012, 'fcm_dpo/margin': 95.25030517578125, 'margin_dpo/margin_mean': 95.25030517578125, 'margin_dpo/margin_std': 133.5958251953125, 'logps/chosen': -167.74429321289062, 'logps/rejected': -300.862060546875, 'logps/ref_chosen': -64.68305969238281, 'logps/ref_rejected': -102.55052185058594, 'KL/chosen_KL_mean': -103.06121826171875, 'KL/rejected_KL_mean': -198.3115234375, 'KL/mean': -150.68637084960938, 'KL/std': 115.74911499023438, 'logits/chosen': -0.38547688722610474, 'logits/rejected': -0.375651478767395, 'epoch': 0.28} + 28%|██▊ | 190/681 [08:04<19:41, 2.41s/it] 28%|██▊ | 191/681 [08:06<20:27, 2.51s/it] {'loss': 0.9565, 'grad_norm': 20.231264114379883, 'learning_rate': 4.5330506821893565e-07, 'fcm_dpo/beta': 0.004521770402789116, 'fcm_dpo/q_t': 0.3637212812900543, 'fcm_dpo/delta': -0.23212674260139465, 'fcm_dpo/margin': 136.34649658203125, 'margin_dpo/margin_mean': 136.34649658203125, 'margin_dpo/margin_std': 147.54470825195312, 'logps/chosen': -164.015869140625, 'logps/rejected': -341.84320068359375, 'logps/ref_chosen': -68.65887451171875, 'logps/ref_rejected': -110.1396713256836, 'KL/chosen_KL_mean': -95.35700988769531, 'KL/rejected_KL_mean': -231.70352172851562, 'KL/mean': -163.53025817871094, 'KL/std': 133.92214965820312, 'logits/chosen': -0.3467414379119873, 'logits/rejected': -0.3258952498435974, 'epoch': 0.28} + 28%|██▊ | 191/681 [08:06<20:27, 2.51s/it] 28%|██▊ | 192/681 [08:09<20:21, 2.50s/it] {'loss': 1.1135, 'grad_norm': 25.52708625793457, 'learning_rate': 4.5255555107119336e-07, 'fcm_dpo/beta': 0.0044925631955266, 'fcm_dpo/q_t': 0.4096482992172241, 'fcm_dpo/delta': -0.010320080444216728, 'fcm_dpo/margin': 91.21162414550781, 'margin_dpo/margin_mean': 91.21162414550781, 'margin_dpo/margin_std': 144.74786376953125, 'logps/chosen': -194.16470336914062, 'logps/rejected': -318.97076416015625, 'logps/ref_chosen': -69.72691345214844, 'logps/ref_rejected': -103.32135009765625, 'KL/chosen_KL_mean': -124.43778991699219, 'KL/rejected_KL_mean': -215.6494140625, 'KL/mean': -170.04360961914062, 'KL/std': 117.56196594238281, 'logits/chosen': -0.32894307374954224, 'logits/rejected': -0.328900545835495, 'epoch': 0.28} + 28%|██▊ | 192/681 [08:09<20:21, 2.50s/it] 28%|██▊ | 193/681 [08:11<20:02, 2.46s/it] {'loss': 1.2529, 'grad_norm': 26.372344970703125, 'learning_rate': 4.5180069639630236e-07, 'fcm_dpo/beta': 0.004495399538427591, 'fcm_dpo/q_t': 0.44323813915252686, 'fcm_dpo/delta': 0.0392833836376667, 'fcm_dpo/margin': 53.221702575683594, 'margin_dpo/margin_mean': 53.221702575683594, 'margin_dpo/margin_std': 137.95343017578125, 'logps/chosen': -185.02273559570312, 'logps/rejected': -254.4615020751953, 'logps/ref_chosen': -60.19049835205078, 'logps/ref_rejected': -76.40755462646484, 'KL/chosen_KL_mean': -124.83224487304688, 'KL/rejected_KL_mean': -178.053955078125, 'KL/mean': -151.44308471679688, 'KL/std': 107.97267150878906, 'logits/chosen': -0.3519429564476013, 'logits/rejected': -0.34495627880096436, 'epoch': 0.28} + 28%|██▊ | 193/681 [08:11<20:02, 2.46s/it] 28%|██▊ | 194/681 [08:13<19:36, 2.42s/it] {'loss': 1.082, 'grad_norm': 18.025230407714844, 'learning_rate': 4.510405240853854e-07, 'fcm_dpo/beta': 0.004506401717662811, 'fcm_dpo/q_t': 0.4085754156112671, 'fcm_dpo/delta': 0.010020148009061813, 'fcm_dpo/margin': 86.59368896484375, 'margin_dpo/margin_mean': 86.59367370605469, 'margin_dpo/margin_std': 99.3104019165039, 'logps/chosen': -116.42521667480469, 'logps/rejected': -225.86331176757812, 'logps/ref_chosen': -37.84037399291992, 'logps/ref_rejected': -60.684783935546875, 'KL/chosen_KL_mean': -78.5848388671875, 'KL/rejected_KL_mean': -165.17852783203125, 'KL/mean': -121.88168334960938, 'KL/std': 90.56858825683594, 'logits/chosen': -0.2157665491104126, 'logits/rejected': -0.1980063021183014, 'epoch': 0.28} + 28%|██▊ | 194/681 [08:13<19:36, 2.42s/it] 29%|██▊ | 195/681 [08:16<20:16, 2.50s/it] {'loss': 1.0714, 'grad_norm': 22.234222412109375, 'learning_rate': 4.5027505416968985e-07, 'fcm_dpo/beta': 0.004506120923906565, 'fcm_dpo/q_t': 0.4031534194946289, 'fcm_dpo/delta': -0.023354141041636467, 'fcm_dpo/margin': 93.73165893554688, 'margin_dpo/margin_mean': 93.73165893554688, 'margin_dpo/margin_std': 116.69031524658203, 'logps/chosen': -179.31309509277344, 'logps/rejected': -314.9241638183594, 'logps/ref_chosen': -54.891571044921875, 'logps/ref_rejected': -96.77095794677734, 'KL/chosen_KL_mean': -124.42152404785156, 'KL/rejected_KL_mean': -218.1531982421875, 'KL/mean': -171.287353515625, 'KL/std': 112.64602661132812, 'logits/chosen': -0.24858853220939636, 'logits/rejected': -0.2673921287059784, 'epoch': 0.29} + 29%|██▊ | 195/681 [08:16<20:16, 2.50s/it] 29%|██▉ | 196/681 [08:19<20:20, 2.52s/it] {'loss': 1.0592, 'grad_norm': 18.23614501953125, 'learning_rate': 4.495043068200599e-07, 'fcm_dpo/beta': 0.004426237195730209, 'fcm_dpo/q_t': 0.3946911692619324, 'fcm_dpo/delta': -0.07563818991184235, 'fcm_dpo/margin': 106.43045043945312, 'margin_dpo/margin_mean': 106.43045043945312, 'margin_dpo/margin_std': 137.8130645751953, 'logps/chosen': -150.75445556640625, 'logps/rejected': -279.99261474609375, 'logps/ref_chosen': -53.245243072509766, 'logps/ref_rejected': -76.05294799804688, 'KL/chosen_KL_mean': -97.50921630859375, 'KL/rejected_KL_mean': -203.93966674804688, 'KL/mean': -150.7244415283203, 'KL/std': 114.79684448242188, 'logits/chosen': -0.30258023738861084, 'logits/rejected': -0.288103848695755, 'epoch': 0.29} + 29%|██▉ | 196/681 [08:19<20:20, 2.52s/it] 29%|██▉ | 197/681 [08:21<20:29, 2.54s/it] {'loss': 1.1138, 'grad_norm': 18.144241333007812, 'learning_rate': 4.4872830234640493e-07, 'fcm_dpo/beta': 0.004469497129321098, 'fcm_dpo/q_t': 0.41608455777168274, 'fcm_dpo/delta': 0.03227302059531212, 'fcm_dpo/margin': 82.47077941894531, 'margin_dpo/margin_mean': 82.47077941894531, 'margin_dpo/margin_std': 115.41438293457031, 'logps/chosen': -162.36917114257812, 'logps/rejected': -261.6285400390625, 'logps/ref_chosen': -60.42033386230469, 'logps/ref_rejected': -77.20890808105469, 'KL/chosen_KL_mean': -101.94883728027344, 'KL/rejected_KL_mean': -184.41961669921875, 'KL/mean': -143.18423461914062, 'KL/std': 101.37451171875, 'logits/chosen': -0.2958967983722687, 'logits/rejected': -0.290219783782959, 'epoch': 0.29} + 29%|██▉ | 197/681 [08:21<20:29, 2.54s/it] 29%|██▉ | 198/681 [08:24<20:32, 2.55s/it] {'loss': 1.0591, 'grad_norm': 22.24930191040039, 'learning_rate': 4.479470611971645e-07, 'fcm_dpo/beta': 0.004416568670421839, 'fcm_dpo/q_t': 0.3967708349227905, 'fcm_dpo/delta': -0.07120651751756668, 'fcm_dpo/margin': 105.94068145751953, 'margin_dpo/margin_mean': 105.94068145751953, 'margin_dpo/margin_std': 143.1464080810547, 'logps/chosen': -169.86660766601562, 'logps/rejected': -318.0143737792969, 'logps/ref_chosen': -55.03618621826172, 'logps/ref_rejected': -97.24325561523438, 'KL/chosen_KL_mean': -114.83041381835938, 'KL/rejected_KL_mean': -220.7711181640625, 'KL/mean': -167.80075073242188, 'KL/std': 126.27465057373047, 'logits/chosen': -0.3203980028629303, 'logits/rejected': -0.3210110068321228, 'epoch': 0.29} + 29%|██▉ | 198/681 [08:24<20:32, 2.55s/it] 29%|██▉ | 199/681 [08:27<20:52, 2.60s/it] {'loss': 1.0675, 'grad_norm': 23.503461837768555, 'learning_rate': 4.471606039587695e-07, 'fcm_dpo/beta': 0.004335303790867329, 'fcm_dpo/q_t': 0.3972257673740387, 'fcm_dpo/delta': -0.058502815663814545, 'fcm_dpo/margin': 104.91361999511719, 'margin_dpo/margin_mean': 104.91361999511719, 'margin_dpo/margin_std': 138.49346923828125, 'logps/chosen': -167.2706756591797, 'logps/rejected': -300.003662109375, 'logps/ref_chosen': -56.828826904296875, 'logps/ref_rejected': -84.64820861816406, 'KL/chosen_KL_mean': -110.44184875488281, 'KL/rejected_KL_mean': -215.35546875, 'KL/mean': -162.89865112304688, 'KL/std': 114.6693115234375, 'logits/chosen': -0.27863985300064087, 'logits/rejected': -0.2598820924758911, 'epoch': 0.29} + 29%|██▉ | 199/681 [08:27<20:52, 2.60s/it] 29%|██▉ | 200/681 [08:29<20:52, 2.60s/it] {'loss': 1.0929, 'grad_norm': 22.9044246673584, 'learning_rate': 4.4636895135509966e-07, 'fcm_dpo/beta': 0.004300840198993683, 'fcm_dpo/q_t': 0.40177974104881287, 'fcm_dpo/delta': -0.04902205243706703, 'fcm_dpo/margin': 103.79698181152344, 'margin_dpo/margin_mean': 103.7969741821289, 'margin_dpo/margin_std': 158.15789794921875, 'logps/chosen': -161.44046020507812, 'logps/rejected': -292.77880859375, 'logps/ref_chosen': -53.06706237792969, 'logps/ref_rejected': -80.60843658447266, 'KL/chosen_KL_mean': -108.3734130859375, 'KL/rejected_KL_mean': -212.17037963867188, 'KL/mean': -160.2718963623047, 'KL/std': 123.57206726074219, 'logits/chosen': -0.26905137300491333, 'logits/rejected': -0.25207480788230896, 'epoch': 0.29} + 29%|██▉ | 200/681 [08:29<20:52, 2.60s/it] 30%|██▉ | 201/681 [08:32<20:47, 2.60s/it] {'loss': 1.0829, 'grad_norm': 20.798912048339844, 'learning_rate': 4.455721242469372e-07, 'fcm_dpo/beta': 0.004290143959224224, 'fcm_dpo/q_t': 0.40112942457199097, 'fcm_dpo/delta': -0.041380785405635834, 'fcm_dpo/margin': 102.45198822021484, 'margin_dpo/margin_mean': 102.45198059082031, 'margin_dpo/margin_std': 144.92611694335938, 'logps/chosen': -189.33114624023438, 'logps/rejected': -331.18914794921875, 'logps/ref_chosen': -75.4022216796875, 'logps/ref_rejected': -114.80821990966797, 'KL/chosen_KL_mean': -113.9289321899414, 'KL/rejected_KL_mean': -216.38092041015625, 'KL/mean': -165.15493774414062, 'KL/std': 129.3989715576172, 'logits/chosen': -0.3590313792228699, 'logits/rejected': -0.3559607267379761, 'epoch': 0.3} + 30%|██▉ | 201/681 [08:32<20:47, 2.60s/it] 30%|██▉ | 202/681 [08:34<20:46, 2.60s/it] {'loss': 1.1841, 'grad_norm': 20.812585830688477, 'learning_rate': 4.4477014363141755e-07, 'fcm_dpo/beta': 0.0043277074582874775, 'fcm_dpo/q_t': 0.42985087633132935, 'fcm_dpo/delta': 0.08649900555610657, 'fcm_dpo/margin': 73.06129455566406, 'margin_dpo/margin_mean': 73.0613021850586, 'margin_dpo/margin_std': 143.23988342285156, 'logps/chosen': -166.3647918701172, 'logps/rejected': -276.309814453125, 'logps/ref_chosen': -50.101318359375, 'logps/ref_rejected': -86.98503112792969, 'KL/chosen_KL_mean': -116.26347351074219, 'KL/rejected_KL_mean': -189.32476806640625, 'KL/mean': -152.79412841796875, 'KL/std': 111.22699737548828, 'logits/chosen': -0.2794426679611206, 'logits/rejected': -0.293861985206604, 'epoch': 0.3} + 30%|██▉ | 202/681 [08:34<20:46, 2.60s/it] 30%|██▉ | 203/681 [08:37<20:59, 2.64s/it] {'loss': 1.0993, 'grad_norm': 21.901674270629883, 'learning_rate': 4.439630306414758e-07, 'fcm_dpo/beta': 0.004343975335359573, 'fcm_dpo/q_t': 0.4100680649280548, 'fcm_dpo/delta': 0.00605600792914629, 'fcm_dpo/margin': 90.73900604248047, 'margin_dpo/margin_mean': 90.73899841308594, 'margin_dpo/margin_std': 125.68807983398438, 'logps/chosen': -175.4761505126953, 'logps/rejected': -291.50140380859375, 'logps/ref_chosen': -60.60969543457031, 'logps/ref_rejected': -85.89596557617188, 'KL/chosen_KL_mean': -114.866455078125, 'KL/rejected_KL_mean': -205.60543823242188, 'KL/mean': -160.2359619140625, 'KL/std': 114.47230529785156, 'logits/chosen': -0.3357563614845276, 'logits/rejected': -0.32634925842285156, 'epoch': 0.3} + 30%|██▉ | 203/681 [08:37<20:59, 2.64s/it] 30%|██▉ | 204/681 [08:40<21:10, 2.66s/it] {'loss': 1.1522, 'grad_norm': 22.241016387939453, 'learning_rate': 4.431508065452897e-07, 'fcm_dpo/beta': 0.00437512993812561, 'fcm_dpo/q_t': 0.42096078395843506, 'fcm_dpo/delta': 0.04220545291900635, 'fcm_dpo/margin': 82.12925720214844, 'margin_dpo/margin_mean': 82.12925720214844, 'margin_dpo/margin_std': 144.77645874023438, 'logps/chosen': -208.19058227539062, 'logps/rejected': -297.8507995605469, 'logps/ref_chosen': -80.16496276855469, 'logps/ref_rejected': -87.69590759277344, 'KL/chosen_KL_mean': -128.02561950683594, 'KL/rejected_KL_mean': -210.15489196777344, 'KL/mean': -169.0902557373047, 'KL/std': 124.49624633789062, 'logits/chosen': -0.4248543977737427, 'logits/rejected': -0.38815587759017944, 'epoch': 0.3} + 30%|██▉ | 204/681 [08:40<21:10, 2.66s/it] 30%|███ | 205/681 [08:42<21:05, 2.66s/it] {'loss': 1.0534, 'grad_norm': 21.1467342376709, 'learning_rate': 4.4233349274571974e-07, 'fcm_dpo/beta': 0.004297832027077675, 'fcm_dpo/q_t': 0.39113306999206543, 'fcm_dpo/delta': -0.08044849336147308, 'fcm_dpo/margin': 110.42106628417969, 'margin_dpo/margin_mean': 110.42106628417969, 'margin_dpo/margin_std': 136.45323181152344, 'logps/chosen': -180.17422485351562, 'logps/rejected': -316.33563232421875, 'logps/ref_chosen': -59.384735107421875, 'logps/ref_rejected': -85.12505340576172, 'KL/chosen_KL_mean': -120.78949737548828, 'KL/rejected_KL_mean': -231.2105712890625, 'KL/mean': -176.00003051757812, 'KL/std': 126.26949310302734, 'logits/chosen': -0.32940664887428284, 'logits/rejected': -0.29995858669281006, 'epoch': 0.3} + 30%|███ | 205/681 [08:43<21:05, 2.66s/it] 30%|███ | 206/681 [08:45<20:10, 2.55s/it] {'loss': 1.0145, 'grad_norm': 25.72849464416504, 'learning_rate': 4.415111107797445e-07, 'fcm_dpo/beta': 0.004232403822243214, 'fcm_dpo/q_t': 0.38320374488830566, 'fcm_dpo/delta': -0.10859975218772888, 'fcm_dpo/margin': 118.73365783691406, 'margin_dpo/margin_mean': 118.73365783691406, 'margin_dpo/margin_std': 128.0810089111328, 'logps/chosen': -157.33773803710938, 'logps/rejected': -328.0604248046875, 'logps/ref_chosen': -46.964500427246094, 'logps/ref_rejected': -98.9534912109375, 'KL/chosen_KL_mean': -110.37324523925781, 'KL/rejected_KL_mean': -229.10691833496094, 'KL/mean': -169.74008178710938, 'KL/std': 117.97074127197266, 'logits/chosen': -0.26661020517349243, 'logits/rejected': -0.2699154019355774, 'epoch': 0.3} + 30%|███ | 206/681 [08:45<20:10, 2.55s/it] 30%|███ | 207/681 [08:47<20:14, 2.56s/it] {'loss': 0.9974, 'grad_norm': 22.681591033935547, 'learning_rate': 4.4068368231789365e-07, 'fcm_dpo/beta': 0.004127143882215023, 'fcm_dpo/q_t': 0.3774099349975586, 'fcm_dpo/delta': -0.1631755232810974, 'fcm_dpo/margin': 134.29855346679688, 'margin_dpo/margin_mean': 134.298583984375, 'margin_dpo/margin_std': 156.59857177734375, 'logps/chosen': -156.96231079101562, 'logps/rejected': -319.65240478515625, 'logps/ref_chosen': -56.05625915527344, 'logps/ref_rejected': -84.44779968261719, 'KL/chosen_KL_mean': -100.90605163574219, 'KL/rejected_KL_mean': -235.20462036132812, 'KL/mean': -168.05532836914062, 'KL/std': 134.08450317382812, 'logits/chosen': -0.35407179594039917, 'logits/rejected': -0.32842785120010376, 'epoch': 0.3} + 30%|███ | 207/681 [08:47<20:14, 2.56s/it] 31%|███ | 208/681 [08:50<20:23, 2.59s/it] {'loss': 1.096, 'grad_norm': 23.56682014465332, 'learning_rate': 4.398512291636768e-07, 'fcm_dpo/beta': 0.004062125459313393, 'fcm_dpo/q_t': 0.40312352776527405, 'fcm_dpo/delta': -0.02891511656343937, 'fcm_dpo/margin': 105.24872589111328, 'margin_dpo/margin_mean': 105.24872589111328, 'margin_dpo/margin_std': 155.59713745117188, 'logps/chosen': -221.88238525390625, 'logps/rejected': -354.35040283203125, 'logps/ref_chosen': -67.06761169433594, 'logps/ref_rejected': -94.28689575195312, 'KL/chosen_KL_mean': -154.8147735595703, 'KL/rejected_KL_mean': -260.06353759765625, 'KL/mean': -207.43914794921875, 'KL/std': 128.05979919433594, 'logits/chosen': -0.38881534337997437, 'logits/rejected': -0.37188804149627686, 'epoch': 0.31} + 31%|███ | 208/681 [08:50<20:23, 2.59s/it] 31%|███ | 209/681 [08:52<19:45, 2.51s/it] {'loss': 1.1292, 'grad_norm': 26.791549682617188, 'learning_rate': 4.3901377325300857e-07, 'fcm_dpo/beta': 0.004076983779668808, 'fcm_dpo/q_t': 0.41346555948257446, 'fcm_dpo/delta': 0.018282007426023483, 'fcm_dpo/margin': 93.79732513427734, 'margin_dpo/margin_mean': 93.79731750488281, 'margin_dpo/margin_std': 149.582763671875, 'logps/chosen': -185.5780029296875, 'logps/rejected': -304.1351623535156, 'logps/ref_chosen': -56.18169403076172, 'logps/ref_rejected': -80.94152069091797, 'KL/chosen_KL_mean': -129.39630126953125, 'KL/rejected_KL_mean': -223.19363403320312, 'KL/mean': -176.29495239257812, 'KL/std': 115.95198059082031, 'logits/chosen': -0.26864010095596313, 'logits/rejected': -0.2571912109851837, 'epoch': 0.31} + 31%|███ | 209/681 [08:52<19:45, 2.51s/it] 31%|███ | 210/681 [08:55<19:34, 2.49s/it] {'loss': 1.0747, 'grad_norm': 23.223583221435547, 'learning_rate': 4.381713366536311e-07, 'fcm_dpo/beta': 0.004069700837135315, 'fcm_dpo/q_t': 0.40055060386657715, 'fcm_dpo/delta': -0.04440900310873985, 'fcm_dpo/margin': 108.67684936523438, 'margin_dpo/margin_mean': 108.67683410644531, 'margin_dpo/margin_std': 146.201904296875, 'logps/chosen': -163.31729125976562, 'logps/rejected': -302.303955078125, 'logps/ref_chosen': -46.371822357177734, 'logps/ref_rejected': -76.68162536621094, 'KL/chosen_KL_mean': -116.94548034667969, 'KL/rejected_KL_mean': -225.622314453125, 'KL/mean': -171.28390502929688, 'KL/std': 119.73749542236328, 'logits/chosen': -0.2933782935142517, 'logits/rejected': -0.2853144705295563, 'epoch': 0.31} + 31%|███ | 210/681 [08:55<19:34, 2.49s/it] 31%|███ | 211/681 [08:57<19:03, 2.43s/it] {'loss': 1.1462, 'grad_norm': 30.78042221069336, 'learning_rate': 4.373239415645323e-07, 'fcm_dpo/beta': 0.004061352461576462, 'fcm_dpo/q_t': 0.41933655738830566, 'fcm_dpo/delta': 0.02644379436969757, 'fcm_dpo/margin': 92.21820831298828, 'margin_dpo/margin_mean': 92.21821594238281, 'margin_dpo/margin_std': 160.97451782226562, 'logps/chosen': -247.54046630859375, 'logps/rejected': -347.6473083496094, 'logps/ref_chosen': -78.93235778808594, 'logps/ref_rejected': -86.82098388671875, 'KL/chosen_KL_mean': -168.60812377929688, 'KL/rejected_KL_mean': -260.8263244628906, 'KL/mean': -214.71722412109375, 'KL/std': 136.29385375976562, 'logits/chosen': -0.3214316964149475, 'logits/rejected': -0.2823808193206787, 'epoch': 0.31} + 31%|███ | 211/681 [08:57<19:03, 2.43s/it] 31%|███ | 212/681 [09:00<19:16, 2.47s/it] {'loss': 1.0319, 'grad_norm': 24.799522399902344, 'learning_rate': 4.3647161031536086e-07, 'fcm_dpo/beta': 0.003954698797315359, 'fcm_dpo/q_t': 0.3826107978820801, 'fcm_dpo/delta': -0.13367314636707306, 'fcm_dpo/margin': 132.64306640625, 'margin_dpo/margin_mean': 132.64306640625, 'margin_dpo/margin_std': 163.62814331054688, 'logps/chosen': -198.0283203125, 'logps/rejected': -375.5322265625, 'logps/ref_chosen': -58.19701385498047, 'logps/ref_rejected': -103.05785369873047, 'KL/chosen_KL_mean': -139.83131408691406, 'KL/rejected_KL_mean': -272.474365234375, 'KL/mean': -206.15283203125, 'KL/std': 148.2513427734375, 'logits/chosen': -0.3196195363998413, 'logits/rejected': -0.31085437536239624, 'epoch': 0.31} + 31%|███ | 212/681 [09:00<19:16, 2.47s/it] 31%|███▏ | 213/681 [09:02<19:33, 2.51s/it] {'loss': 1.0333, 'grad_norm': 29.145305633544922, 'learning_rate': 4.3561436536583774e-07, 'fcm_dpo/beta': 0.0038848065305501223, 'fcm_dpo/q_t': 0.3872129023075104, 'fcm_dpo/delta': -0.10088707506656647, 'fcm_dpo/margin': 127.49536895751953, 'margin_dpo/margin_mean': 127.49537658691406, 'margin_dpo/margin_std': 153.2450408935547, 'logps/chosen': -199.65298461914062, 'logps/rejected': -353.55035400390625, 'logps/ref_chosen': -67.51271057128906, 'logps/ref_rejected': -93.91471862792969, 'KL/chosen_KL_mean': -132.14027404785156, 'KL/rejected_KL_mean': -259.6356506347656, 'KL/mean': -195.88795471191406, 'KL/std': 129.16009521484375, 'logits/chosen': -0.3360249698162079, 'logits/rejected': -0.31101077795028687, 'epoch': 0.31} + 31%|███▏ | 213/681 [09:02<19:33, 2.51s/it] 31%|███▏ | 214/681 [09:04<18:54, 2.43s/it] {'loss': 1.0671, 'grad_norm': 23.445825576782227, 'learning_rate': 4.3475222930516473e-07, 'fcm_dpo/beta': 0.003853208851069212, 'fcm_dpo/q_t': 0.4001784920692444, 'fcm_dpo/delta': -0.043163709342479706, 'fcm_dpo/margin': 114.5154800415039, 'margin_dpo/margin_mean': 114.51548767089844, 'margin_dpo/margin_std': 147.68756103515625, 'logps/chosen': -154.4561004638672, 'logps/rejected': -304.88409423828125, 'logps/ref_chosen': -41.604888916015625, 'logps/ref_rejected': -77.51741027832031, 'KL/chosen_KL_mean': -112.85121154785156, 'KL/rejected_KL_mean': -227.36666870117188, 'KL/mean': -170.10894775390625, 'KL/std': 127.09822082519531, 'logits/chosen': -0.25710099935531616, 'logits/rejected': -0.26210659742355347, 'epoch': 0.31} + 31%|███▏ | 214/681 [09:05<18:54, 2.43s/it] 32%|███▏ | 215/681 [09:07<19:23, 2.50s/it] {'loss': 1.0433, 'grad_norm': 26.497583389282227, 'learning_rate': 4.3388522485142885e-07, 'fcm_dpo/beta': 0.0038004510570317507, 'fcm_dpo/q_t': 0.39448457956314087, 'fcm_dpo/delta': -0.06129283457994461, 'fcm_dpo/margin': 120.56410217285156, 'margin_dpo/margin_mean': 120.56410217285156, 'margin_dpo/margin_std': 137.23513793945312, 'logps/chosen': -187.30520629882812, 'logps/rejected': -344.5546875, 'logps/ref_chosen': -53.279266357421875, 'logps/ref_rejected': -89.96464538574219, 'KL/chosen_KL_mean': -134.02593994140625, 'KL/rejected_KL_mean': -254.5900421142578, 'KL/mean': -194.3079833984375, 'KL/std': 132.08059692382812, 'logits/chosen': -0.2794630229473114, 'logits/rejected': -0.27032387256622314, 'epoch': 0.32} + 32%|███▏ | 215/681 [09:07<19:23, 2.50s/it] 32%|███▏ | 216/681 [09:10<20:07, 2.60s/it] {'loss': 1.0807, 'grad_norm': 24.74566078186035, 'learning_rate': 4.330133748510036e-07, 'fcm_dpo/beta': 0.003782880725339055, 'fcm_dpo/q_t': 0.39900004863739014, 'fcm_dpo/delta': -0.05413120239973068, 'fcm_dpo/margin': 119.37692260742188, 'margin_dpo/margin_mean': 119.37692260742188, 'margin_dpo/margin_std': 169.870849609375, 'logps/chosen': -187.59117126464844, 'logps/rejected': -335.27923583984375, 'logps/ref_chosen': -48.887794494628906, 'logps/ref_rejected': -77.19892883300781, 'KL/chosen_KL_mean': -138.703369140625, 'KL/rejected_KL_mean': -258.0802917480469, 'KL/mean': -198.391845703125, 'KL/std': 138.0404510498047, 'logits/chosen': -0.2884059250354767, 'logits/rejected': -0.27266985177993774, 'epoch': 0.32} + 32%|███▏ | 216/681 [09:10<20:07, 2.60s/it] 32%|███▏ | 217/681 [09:13<20:07, 2.60s/it] {'loss': 1.0116, 'grad_norm': 20.693517684936523, 'learning_rate': 4.3213670227794757e-07, 'fcm_dpo/beta': 0.003682144917547703, 'fcm_dpo/q_t': 0.3833308517932892, 'fcm_dpo/delta': -0.12343692779541016, 'fcm_dpo/margin': 140.3418731689453, 'margin_dpo/margin_mean': 140.34185791015625, 'margin_dpo/margin_std': 159.26388549804688, 'logps/chosen': -191.657958984375, 'logps/rejected': -382.23284912109375, 'logps/ref_chosen': -49.845306396484375, 'logps/ref_rejected': -100.07832336425781, 'KL/chosen_KL_mean': -141.81265258789062, 'KL/rejected_KL_mean': -282.154541015625, 'KL/mean': -211.98358154296875, 'KL/std': 141.36019897460938, 'logits/chosen': -0.26268115639686584, 'logits/rejected': -0.2574685513973236, 'epoch': 0.32} + 32%|███▏ | 217/681 [09:13<20:07, 2.60s/it] 32%|███▏ | 218/681 [09:15<20:08, 2.61s/it] {'loss': 1.1106, 'grad_norm': 21.03861427307129, 'learning_rate': 4.3125523023339815e-07, 'fcm_dpo/beta': 0.003660230664536357, 'fcm_dpo/q_t': 0.410659521818161, 'fcm_dpo/delta': 0.0013791173696517944, 'fcm_dpo/margin': 108.90492248535156, 'margin_dpo/margin_mean': 108.9049301147461, 'margin_dpo/margin_std': 163.36837768554688, 'logps/chosen': -207.3604736328125, 'logps/rejected': -345.5351257324219, 'logps/ref_chosen': -58.576683044433594, 'logps/ref_rejected': -87.84639739990234, 'KL/chosen_KL_mean': -148.78378295898438, 'KL/rejected_KL_mean': -257.688720703125, 'KL/mean': -203.23626708984375, 'KL/std': 139.531494140625, 'logits/chosen': -0.28771138191223145, 'logits/rejected': -0.28279104828834534, 'epoch': 0.32} + 32%|███▏ | 218/681 [09:15<20:08, 2.61s/it] 32%|███▏ | 219/681 [09:18<20:08, 2.61s/it] {'loss': 1.1738, 'grad_norm': 28.875822067260742, 'learning_rate': 4.303689819449636e-07, 'fcm_dpo/beta': 0.0037173782475292683, 'fcm_dpo/q_t': 0.4234854578971863, 'fcm_dpo/delta': 0.05955355241894722, 'fcm_dpo/margin': 91.93537139892578, 'margin_dpo/margin_mean': 91.93536376953125, 'margin_dpo/margin_std': 175.91717529296875, 'logps/chosen': -220.817138671875, 'logps/rejected': -337.49908447265625, 'logps/ref_chosen': -61.083858489990234, 'logps/ref_rejected': -85.83042907714844, 'KL/chosen_KL_mean': -159.7332763671875, 'KL/rejected_KL_mean': -251.6686553955078, 'KL/mean': -205.7009735107422, 'KL/std': 144.26089477539062, 'logits/chosen': -0.3138810992240906, 'logits/rejected': -0.30747318267822266, 'epoch': 0.32} + 32%|███▏ | 219/681 [09:18<20:08, 2.61s/it] 32%|███▏ | 220/681 [09:20<20:05, 2.61s/it] {'loss': 1.1674, 'grad_norm': 28.49346160888672, 'learning_rate': 4.2947798076611047e-07, 'fcm_dpo/beta': 0.0037627811543643475, 'fcm_dpo/q_t': 0.43049296736717224, 'fcm_dpo/delta': 0.1060803085565567, 'fcm_dpo/margin': 78.98377990722656, 'margin_dpo/margin_mean': 78.98377990722656, 'margin_dpo/margin_std': 128.49771118164062, 'logps/chosen': -249.44964599609375, 'logps/rejected': -346.087646484375, 'logps/ref_chosen': -70.03128051757812, 'logps/ref_rejected': -87.68551635742188, 'KL/chosen_KL_mean': -179.41836547851562, 'KL/rejected_KL_mean': -258.40216064453125, 'KL/mean': -218.91024780273438, 'KL/std': 125.53445434570312, 'logits/chosen': -0.2874869704246521, 'logits/rejected': -0.2643676996231079, 'epoch': 0.32} + 32%|███▏ | 220/681 [09:20<20:05, 2.61s/it] 32%|███▏ | 221/681 [09:23<19:45, 2.58s/it] {'loss': 0.9343, 'grad_norm': 25.883392333984375, 'learning_rate': 4.285822501755485e-07, 'fcm_dpo/beta': 0.003666388336569071, 'fcm_dpo/q_t': 0.35655221343040466, 'fcm_dpo/delta': -0.24369555711746216, 'fcm_dpo/margin': 171.518798828125, 'margin_dpo/margin_mean': 171.518798828125, 'margin_dpo/margin_std': 161.14630126953125, 'logps/chosen': -205.25103759765625, 'logps/rejected': -431.08282470703125, 'logps/ref_chosen': -52.15470886230469, 'logps/ref_rejected': -106.46768188476562, 'KL/chosen_KL_mean': -153.0963134765625, 'KL/rejected_KL_mean': -324.6151428222656, 'KL/mean': -238.85574340820312, 'KL/std': 157.85043334960938, 'logits/chosen': -0.28886061906814575, 'logits/rejected': -0.2952112555503845, 'epoch': 0.32} + 32%|███▏ | 221/681 [09:23<19:45, 2.58s/it] 33%|███▎ | 222/681 [09:26<19:40, 2.57s/it] {'loss': 1.0554, 'grad_norm': 20.063804626464844, 'learning_rate': 4.276818137766118e-07, 'fcm_dpo/beta': 0.003590481821447611, 'fcm_dpo/q_t': 0.39516395330429077, 'fcm_dpo/delta': -0.06407497823238373, 'fcm_dpo/margin': 128.40843200683594, 'margin_dpo/margin_mean': 128.40843200683594, 'margin_dpo/margin_std': 161.19532775878906, 'logps/chosen': -218.37655639648438, 'logps/rejected': -385.8150634765625, 'logps/ref_chosen': -60.971099853515625, 'logps/ref_rejected': -100.00115203857422, 'KL/chosen_KL_mean': -157.40545654296875, 'KL/rejected_KL_mean': -285.81390380859375, 'KL/mean': -221.60968017578125, 'KL/std': 144.6243133544922, 'logits/chosen': -0.32411831617355347, 'logits/rejected': -0.32718104124069214, 'epoch': 0.33} + 33%|███▎ | 222/681 [09:26<19:40, 2.57s/it] 33%|███▎ | 223/681 [09:28<18:44, 2.45s/it] {'loss': 1.1337, 'grad_norm': 23.098182678222656, 'learning_rate': 4.2677669529663686e-07, 'fcm_dpo/beta': 0.0035675265826284885, 'fcm_dpo/q_t': 0.41345182061195374, 'fcm_dpo/delta': 0.015362029895186424, 'fcm_dpo/margin': 107.97785186767578, 'margin_dpo/margin_mean': 107.97784423828125, 'margin_dpo/margin_std': 181.26332092285156, 'logps/chosen': -218.69198608398438, 'logps/rejected': -356.854248046875, 'logps/ref_chosen': -52.64057540893555, 'logps/ref_rejected': -82.82502746582031, 'KL/chosen_KL_mean': -166.05140686035156, 'KL/rejected_KL_mean': -274.02923583984375, 'KL/mean': -220.04031372070312, 'KL/std': 142.65538024902344, 'logits/chosen': -0.2400050163269043, 'logits/rejected': -0.2347499132156372, 'epoch': 0.33} + 33%|███▎ | 223/681 [09:28<18:44, 2.45s/it] 33%|███▎ | 224/681 [09:30<17:54, 2.35s/it] {'loss': 1.088, 'grad_norm': 24.251049041748047, 'learning_rate': 4.2586691858633747e-07, 'fcm_dpo/beta': 0.0035286881029605865, 'fcm_dpo/q_t': 0.40351927280426025, 'fcm_dpo/delta': -0.03949831798672676, 'fcm_dpo/margin': 123.69601440429688, 'margin_dpo/margin_mean': 123.69600677490234, 'margin_dpo/margin_std': 177.5772247314453, 'logps/chosen': -189.56932067871094, 'logps/rejected': -341.7864074707031, 'logps/ref_chosen': -48.59541320800781, 'logps/ref_rejected': -77.11648559570312, 'KL/chosen_KL_mean': -140.97390747070312, 'KL/rejected_KL_mean': -264.669921875, 'KL/mean': -202.82191467285156, 'KL/std': 155.19711303710938, 'logits/chosen': -0.3118600845336914, 'logits/rejected': -0.2952437102794647, 'epoch': 0.33} + 33%|███▎ | 224/681 [09:30<17:54, 2.35s/it] 33%|███▎ | 225/681 [09:32<17:41, 2.33s/it] {'loss': 1.0326, 'grad_norm': 21.628904342651367, 'learning_rate': 4.249525076191759e-07, 'fcm_dpo/beta': 0.0034855613484978676, 'fcm_dpo/q_t': 0.38641393184661865, 'fcm_dpo/delta': -0.11032609641551971, 'fcm_dpo/margin': 144.7752227783203, 'margin_dpo/margin_mean': 144.7752227783203, 'margin_dpo/margin_std': 180.18701171875, 'logps/chosen': -217.09906005859375, 'logps/rejected': -403.7767333984375, 'logps/ref_chosen': -58.000465393066406, 'logps/ref_rejected': -99.90291595458984, 'KL/chosen_KL_mean': -159.09860229492188, 'KL/rejected_KL_mean': -303.8738098144531, 'KL/mean': -231.4862060546875, 'KL/std': 148.37298583984375, 'logits/chosen': -0.3304445743560791, 'logits/rejected': -0.32175442576408386, 'epoch': 0.33} + 33%|███▎ | 225/681 [09:32<17:41, 2.33s/it] 33%|███▎ | 226/681 [09:35<18:18, 2.41s/it] {'loss': 1.1124, 'grad_norm': 28.347190856933594, 'learning_rate': 4.2403348649073167e-07, 'fcm_dpo/beta': 0.003453510347753763, 'fcm_dpo/q_t': 0.4127262234687805, 'fcm_dpo/delta': 0.008469540625810623, 'fcm_dpo/margin': 113.28192901611328, 'margin_dpo/margin_mean': 113.28193664550781, 'margin_dpo/margin_std': 167.6037139892578, 'logps/chosen': -192.44720458984375, 'logps/rejected': -325.51806640625, 'logps/ref_chosen': -58.898799896240234, 'logps/ref_rejected': -78.68775939941406, 'KL/chosen_KL_mean': -133.54840087890625, 'KL/rejected_KL_mean': -246.830322265625, 'KL/mean': -190.18936157226562, 'KL/std': 143.92837524414062, 'logits/chosen': -0.396121621131897, 'logits/rejected': -0.3598354160785675, 'epoch': 0.33} + 33%|███▎ | 226/681 [09:35<18:18, 2.41s/it] 33%|███▎ | 227/681 [09:37<18:06, 2.39s/it] {'loss': 1.0302, 'grad_norm': 21.415340423583984, 'learning_rate': 4.2310987941806615e-07, 'fcm_dpo/beta': 0.003411718178540468, 'fcm_dpo/q_t': 0.38779282569885254, 'fcm_dpo/delta': -0.09843979775905609, 'fcm_dpo/margin': 144.4818878173828, 'margin_dpo/margin_mean': 144.4818878173828, 'margin_dpo/margin_std': 171.448486328125, 'logps/chosen': -212.69837951660156, 'logps/rejected': -397.52044677734375, 'logps/ref_chosen': -59.072181701660156, 'logps/ref_rejected': -99.41236877441406, 'KL/chosen_KL_mean': -153.62620544433594, 'KL/rejected_KL_mean': -298.1080627441406, 'KL/mean': -225.86712646484375, 'KL/std': 163.63131713867188, 'logits/chosen': -0.3376998007297516, 'logits/rejected': -0.3247716724872589, 'epoch': 0.33} + 33%|███▎ | 227/681 [09:37<18:06, 2.39s/it] 33%|███▎ | 228/681 [09:40<18:58, 2.51s/it] {'loss': 1.1416, 'grad_norm': 22.723651885986328, 'learning_rate': 4.2218171073908463e-07, 'fcm_dpo/beta': 0.00343983992934227, 'fcm_dpo/q_t': 0.4198562502861023, 'fcm_dpo/delta': 0.05273807793855667, 'fcm_dpo/margin': 101.45245361328125, 'margin_dpo/margin_mean': 101.45246887207031, 'margin_dpo/margin_std': 162.93731689453125, 'logps/chosen': -223.56256103515625, 'logps/rejected': -350.1724853515625, 'logps/ref_chosen': -65.89128875732422, 'logps/ref_rejected': -91.04875183105469, 'KL/chosen_KL_mean': -157.6712646484375, 'KL/rejected_KL_mean': -259.12371826171875, 'KL/mean': -208.39749145507812, 'KL/std': 133.16519165039062, 'logits/chosen': -0.34845787286758423, 'logits/rejected': -0.3311355710029602, 'epoch': 0.33} + 33%|███▎ | 228/681 [09:40<18:58, 2.51s/it] 34%|███▎ | 229/681 [09:42<18:51, 2.50s/it] {'loss': 1.1173, 'grad_norm': 30.321849822998047, 'learning_rate': 4.212490049118951e-07, 'fcm_dpo/beta': 0.0034589767456054688, 'fcm_dpo/q_t': 0.41331931948661804, 'fcm_dpo/delta': 0.024636760354042053, 'fcm_dpo/margin': 108.77888488769531, 'margin_dpo/margin_mean': 108.77888488769531, 'margin_dpo/margin_std': 161.33079528808594, 'logps/chosen': -232.48545837402344, 'logps/rejected': -355.08538818359375, 'logps/ref_chosen': -70.70637512207031, 'logps/ref_rejected': -84.52741241455078, 'KL/chosen_KL_mean': -161.77908325195312, 'KL/rejected_KL_mean': -270.5579528808594, 'KL/mean': -216.16851806640625, 'KL/std': 153.45242309570312, 'logits/chosen': -0.4106701612472534, 'logits/rejected': -0.3795148730278015, 'epoch': 0.34} + 34%|███▎ | 229/681 [09:42<18:51, 2.50s/it] 34%|███▍ | 230/681 [09:45<18:29, 2.46s/it] {'loss': 0.9724, 'grad_norm': 28.437881469726562, 'learning_rate': 4.203117865141635e-07, 'fcm_dpo/beta': 0.003373272018507123, 'fcm_dpo/q_t': 0.3711177110671997, 'fcm_dpo/delta': -0.16629549860954285, 'fcm_dpo/margin': 165.08203125, 'margin_dpo/margin_mean': 165.08203125, 'margin_dpo/margin_std': 161.2001495361328, 'logps/chosen': -164.85462951660156, 'logps/rejected': -376.27655029296875, 'logps/ref_chosen': -39.282005310058594, 'logps/ref_rejected': -85.62191009521484, 'KL/chosen_KL_mean': -125.57262420654297, 'KL/rejected_KL_mean': -290.6546630859375, 'KL/mean': -208.11363220214844, 'KL/std': 146.30148315429688, 'logits/chosen': -0.31211984157562256, 'logits/rejected': -0.3166271448135376, 'epoch': 0.34} + 34%|███▍ | 230/681 [09:45<18:29, 2.46s/it] 34%|███▍ | 231/681 [09:47<18:48, 2.51s/it] {'loss': 1.0947, 'grad_norm': 23.887901306152344, 'learning_rate': 4.1937008024246625e-07, 'fcm_dpo/beta': 0.0033540253061801195, 'fcm_dpo/q_t': 0.4116860628128052, 'fcm_dpo/delta': 0.011585213243961334, 'fcm_dpo/margin': 115.93913269042969, 'margin_dpo/margin_mean': 115.93913269042969, 'margin_dpo/margin_std': 152.4516143798828, 'logps/chosen': -211.89138793945312, 'logps/rejected': -338.677978515625, 'logps/ref_chosen': -63.27644348144531, 'logps/ref_rejected': -74.1239013671875, 'KL/chosen_KL_mean': -148.6149444580078, 'KL/rejected_KL_mean': -264.5540771484375, 'KL/mean': -206.58450317382812, 'KL/std': 131.30169677734375, 'logits/chosen': -0.3661789894104004, 'logits/rejected': -0.33695119619369507, 'epoch': 0.34} + 34%|███▍ | 231/681 [09:47<18:48, 2.51s/it] 34%|███▍ | 232/681 [09:50<19:13, 2.57s/it] {'loss': 1.1629, 'grad_norm': 25.093761444091797, 'learning_rate': 4.1842391091163933e-07, 'fcm_dpo/beta': 0.0033917182590812445, 'fcm_dpo/q_t': 0.4292943477630615, 'fcm_dpo/delta': 0.08317073434591293, 'fcm_dpo/margin': 94.21968841552734, 'margin_dpo/margin_mean': 94.21968078613281, 'margin_dpo/margin_std': 164.27349853515625, 'logps/chosen': -258.03704833984375, 'logps/rejected': -365.48504638671875, 'logps/ref_chosen': -70.74876403808594, 'logps/ref_rejected': -83.97706604003906, 'KL/chosen_KL_mean': -187.2882843017578, 'KL/rejected_KL_mean': -281.5079650878906, 'KL/mean': -234.39813232421875, 'KL/std': 158.7782745361328, 'logits/chosen': -0.35640761256217957, 'logits/rejected': -0.33361750841140747, 'epoch': 0.34} + 34%|███▍ | 232/681 [09:50<19:13, 2.57s/it] 34%|███▍ | 233/681 [09:53<19:30, 2.61s/it] {'loss': 1.0649, 'grad_norm': 27.809114456176758, 'learning_rate': 4.174733034541245e-07, 'fcm_dpo/beta': 0.003362037241458893, 'fcm_dpo/q_t': 0.3922462463378906, 'fcm_dpo/delta': -0.10381458699703217, 'fcm_dpo/margin': 148.33291625976562, 'margin_dpo/margin_mean': 148.33291625976562, 'margin_dpo/margin_std': 215.14837646484375, 'logps/chosen': -225.41647338867188, 'logps/rejected': -426.3465270996094, 'logps/ref_chosen': -54.8829345703125, 'logps/ref_rejected': -107.4800796508789, 'KL/chosen_KL_mean': -170.53353881835938, 'KL/rejected_KL_mean': -318.866455078125, 'KL/mean': -244.69998168945312, 'KL/std': 167.12754821777344, 'logits/chosen': -0.3662954270839691, 'logits/rejected': -0.37061402201652527, 'epoch': 0.34} + 34%|███▍ | 233/681 [09:53<19:30, 2.61s/it] 34%|███▍ | 234/681 [09:55<19:35, 2.63s/it] {'loss': 1.0153, 'grad_norm': 36.372398376464844, 'learning_rate': 4.165182829193126e-07, 'fcm_dpo/beta': 0.003259950317442417, 'fcm_dpo/q_t': 0.3844042122364044, 'fcm_dpo/delta': -0.11555645614862442, 'fcm_dpo/margin': 155.75323486328125, 'margin_dpo/margin_mean': 155.75323486328125, 'margin_dpo/margin_std': 169.97021484375, 'logps/chosen': -211.44100952148438, 'logps/rejected': -423.1063232421875, 'logps/ref_chosen': -44.094520568847656, 'logps/ref_rejected': -100.00663757324219, 'KL/chosen_KL_mean': -167.3464813232422, 'KL/rejected_KL_mean': -323.0997009277344, 'KL/mean': -245.2230987548828, 'KL/std': 150.866455078125, 'logits/chosen': -0.3135479688644409, 'logits/rejected': -0.34066638350486755, 'epoch': 0.34} + 34%|███▍ | 234/681 [09:55<19:35, 2.63s/it] 35%|███▍ | 235/681 [09:58<19:06, 2.57s/it] {'loss': 1.1651, 'grad_norm': 27.246450424194336, 'learning_rate': 4.1555887447288255e-07, 'fcm_dpo/beta': 0.0033134431578218937, 'fcm_dpo/q_t': 0.42514321208000183, 'fcm_dpo/delta': 0.07656269520521164, 'fcm_dpo/margin': 98.23170471191406, 'margin_dpo/margin_mean': 98.2317123413086, 'margin_dpo/margin_std': 172.09481811523438, 'logps/chosen': -260.32012939453125, 'logps/rejected': -386.708984375, 'logps/ref_chosen': -62.237911224365234, 'logps/ref_rejected': -90.39506530761719, 'KL/chosen_KL_mean': -198.0821990966797, 'KL/rejected_KL_mean': -296.31390380859375, 'KL/mean': -247.19805908203125, 'KL/std': 143.0350341796875, 'logits/chosen': -0.38035351037979126, 'logits/rejected': -0.3624608516693115, 'epoch': 0.35} + 35%|███▍ | 235/681 [09:58<19:06, 2.57s/it] 35%|███▍ | 236/681 [10:00<19:17, 2.60s/it] {'loss': 0.9848, 'grad_norm': 40.99539566040039, 'learning_rate': 4.1459510339613946e-07, 'fcm_dpo/beta': 0.003255967516452074, 'fcm_dpo/q_t': 0.3787830173969269, 'fcm_dpo/delta': -0.12453138083219528, 'fcm_dpo/margin': 159.128173828125, 'margin_dpo/margin_mean': 159.128173828125, 'margin_dpo/margin_std': 143.73922729492188, 'logps/chosen': -190.48707580566406, 'logps/rejected': -403.7855224609375, 'logps/ref_chosen': -49.34136199951172, 'logps/ref_rejected': -103.51162719726562, 'KL/chosen_KL_mean': -141.14572143554688, 'KL/rejected_KL_mean': -300.2738952636719, 'KL/mean': -220.7097930908203, 'KL/std': 150.90029907226562, 'logits/chosen': -0.3324674963951111, 'logits/rejected': -0.33202531933784485, 'epoch': 0.35} + 35%|███▍ | 236/681 [10:01<19:17, 2.60s/it] 35%|███▍ | 237/681 [10:03<19:17, 2.61s/it] {'loss': 1.1085, 'grad_norm': 26.916481018066406, 'learning_rate': 4.136269950853473e-07, 'fcm_dpo/beta': 0.0032444519456475973, 'fcm_dpo/q_t': 0.411772221326828, 'fcm_dpo/delta': 0.012344859540462494, 'fcm_dpo/margin': 119.61395263671875, 'margin_dpo/margin_mean': 119.61394500732422, 'margin_dpo/margin_std': 173.78614807128906, 'logps/chosen': -240.53787231445312, 'logps/rejected': -400.7640686035156, 'logps/ref_chosen': -54.168121337890625, 'logps/ref_rejected': -94.78036499023438, 'KL/chosen_KL_mean': -186.3697509765625, 'KL/rejected_KL_mean': -305.98370361328125, 'KL/mean': -246.17672729492188, 'KL/std': 150.6981964111328, 'logits/chosen': -0.39190009236335754, 'logits/rejected': -0.38742589950561523, 'epoch': 0.35} + 35%|███▍ | 237/681 [10:03<19:17, 2.61s/it] 35%|███▍ | 238/681 [10:06<19:30, 2.64s/it] {'loss': 1.1035, 'grad_norm': 22.910091400146484, 'learning_rate': 4.126545750510605e-07, 'fcm_dpo/beta': 0.003233974566683173, 'fcm_dpo/q_t': 0.41353076696395874, 'fcm_dpo/delta': 0.013382863253355026, 'fcm_dpo/margin': 119.60325622558594, 'margin_dpo/margin_mean': 119.60325622558594, 'margin_dpo/margin_std': 166.632080078125, 'logps/chosen': -218.61337280273438, 'logps/rejected': -373.66143798828125, 'logps/ref_chosen': -53.973121643066406, 'logps/ref_rejected': -89.41795349121094, 'KL/chosen_KL_mean': -164.6402587890625, 'KL/rejected_KL_mean': -284.2435302734375, 'KL/mean': -224.44186401367188, 'KL/std': 150.41282653808594, 'logits/chosen': -0.3664902448654175, 'logits/rejected': -0.38237977027893066, 'epoch': 0.35} + 35%|███▍ | 238/681 [10:06<19:30, 2.64s/it] 35%|███▌ | 239/681 [10:08<18:43, 2.54s/it] {'loss': 1.0658, 'grad_norm': 43.435367584228516, 'learning_rate': 4.116778689174514e-07, 'fcm_dpo/beta': 0.003213751595467329, 'fcm_dpo/q_t': 0.40008848905563354, 'fcm_dpo/delta': -0.03296435624361038, 'fcm_dpo/margin': 133.9548797607422, 'margin_dpo/margin_mean': 133.9548797607422, 'margin_dpo/margin_std': 157.7518310546875, 'logps/chosen': -231.19131469726562, 'logps/rejected': -400.64129638671875, 'logps/ref_chosen': -58.09782409667969, 'logps/ref_rejected': -93.59294128417969, 'KL/chosen_KL_mean': -173.09349060058594, 'KL/rejected_KL_mean': -307.04833984375, 'KL/mean': -240.07090759277344, 'KL/std': 139.98458862304688, 'logits/chosen': -0.357890248298645, 'logits/rejected': -0.34496229887008667, 'epoch': 0.35} + 35%|███▌ | 239/681 [10:08<18:43, 2.54s/it] 35%|███▌ | 240/681 [10:11<19:10, 2.61s/it] {'loss': 1.14, 'grad_norm': 38.87122344970703, 'learning_rate': 4.106969024216348e-07, 'fcm_dpo/beta': 0.0032359175384044647, 'fcm_dpo/q_t': 0.41720300912857056, 'fcm_dpo/delta': 0.03676654398441315, 'fcm_dpo/margin': 112.62922668457031, 'margin_dpo/margin_mean': 112.62922668457031, 'margin_dpo/margin_std': 183.985595703125, 'logps/chosen': -250.30943298339844, 'logps/rejected': -376.44268798828125, 'logps/ref_chosen': -60.6144905090332, 'logps/ref_rejected': -74.1185302734375, 'KL/chosen_KL_mean': -189.6949462890625, 'KL/rejected_KL_mean': -302.32415771484375, 'KL/mean': -246.00955200195312, 'KL/std': 153.5874786376953, 'logits/chosen': -0.3707585334777832, 'logits/rejected': -0.3482241630554199, 'epoch': 0.35} + 35%|███▌ | 240/681 [10:11<19:10, 2.61s/it] 35%|███▌ | 241/681 [10:13<18:47, 2.56s/it] {'loss': 0.992, 'grad_norm': 22.715147018432617, 'learning_rate': 4.097117014129903e-07, 'fcm_dpo/beta': 0.0031772879883646965, 'fcm_dpo/q_t': 0.3749390244483948, 'fcm_dpo/delta': -0.1714785099029541, 'fcm_dpo/margin': 176.90191650390625, 'margin_dpo/margin_mean': 176.90191650390625, 'margin_dpo/margin_std': 199.16400146484375, 'logps/chosen': -228.56027221679688, 'logps/rejected': -427.4320068359375, 'logps/ref_chosen': -66.091064453125, 'logps/ref_rejected': -88.06088256835938, 'KL/chosen_KL_mean': -162.46920776367188, 'KL/rejected_KL_mean': -339.3711242675781, 'KL/mean': -250.920166015625, 'KL/std': 172.28445434570312, 'logits/chosen': -0.41761964559555054, 'logits/rejected': -0.39237093925476074, 'epoch': 0.35} + 35%|███▌ | 241/681 [10:13<18:47, 2.56s/it] 36%|███▌ | 242/681 [10:16<18:30, 2.53s/it] {'loss': 1.1001, 'grad_norm': 31.792686462402344, 'learning_rate': 4.087222918524807e-07, 'fcm_dpo/beta': 0.003157552797347307, 'fcm_dpo/q_t': 0.4103991985321045, 'fcm_dpo/delta': 0.00024249032139778137, 'fcm_dpo/margin': 126.4863510131836, 'margin_dpo/margin_mean': 126.48635864257812, 'margin_dpo/margin_std': 179.0635986328125, 'logps/chosen': -258.4462890625, 'logps/rejected': -400.4290771484375, 'logps/ref_chosen': -67.86392974853516, 'logps/ref_rejected': -83.36033630371094, 'KL/chosen_KL_mean': -190.58236694335938, 'KL/rejected_KL_mean': -317.0687255859375, 'KL/mean': -253.82553100585938, 'KL/std': 150.39723205566406, 'logits/chosen': -0.35047098994255066, 'logits/rejected': -0.32680755853652954, 'epoch': 0.36} + 36%|███▌ | 242/681 [10:16<18:30, 2.53s/it] 36%|███▌ | 243/681 [10:18<18:27, 2.53s/it] {'loss': 1.0381, 'grad_norm': 23.673492431640625, 'learning_rate': 4.07728699811968e-07, 'fcm_dpo/beta': 0.0030988508369773626, 'fcm_dpo/q_t': 0.3906670808792114, 'fcm_dpo/delta': -0.08498271554708481, 'fcm_dpo/margin': 155.12164306640625, 'margin_dpo/margin_mean': 155.12164306640625, 'margin_dpo/margin_std': 187.38595581054688, 'logps/chosen': -250.7888641357422, 'logps/rejected': -419.1618957519531, 'logps/ref_chosen': -63.0842399597168, 'logps/ref_rejected': -76.33563232421875, 'KL/chosen_KL_mean': -187.70462036132812, 'KL/rejected_KL_mean': -342.8262634277344, 'KL/mean': -265.26544189453125, 'KL/std': 165.91262817382812, 'logits/chosen': -0.37325674295425415, 'logits/rejected': -0.3453625440597534, 'epoch': 0.36} + 36%|███▌ | 243/681 [10:18<18:27, 2.53s/it] 36%|███▌ | 244/681 [10:21<18:23, 2.53s/it] {'loss': 1.0162, 'grad_norm': 31.374834060668945, 'learning_rate': 4.067309514735267e-07, 'fcm_dpo/beta': 0.0030439933761954308, 'fcm_dpo/q_t': 0.38761717081069946, 'fcm_dpo/delta': -0.09161465615034103, 'fcm_dpo/margin': 159.97470092773438, 'margin_dpo/margin_mean': 159.97470092773438, 'margin_dpo/margin_std': 164.85641479492188, 'logps/chosen': -227.1039276123047, 'logps/rejected': -420.82989501953125, 'logps/ref_chosen': -61.140689849853516, 'logps/ref_rejected': -94.89193725585938, 'KL/chosen_KL_mean': -165.96322631835938, 'KL/rejected_KL_mean': -325.93792724609375, 'KL/mean': -245.95059204101562, 'KL/std': 159.3692626953125, 'logits/chosen': -0.42891860008239746, 'logits/rejected': -0.42122605443000793, 'epoch': 0.36} + 36%|███▌ | 244/681 [10:21<18:23, 2.53s/it] 36%|███▌ | 245/681 [10:24<18:46, 2.58s/it] {'loss': 1.1139, 'grad_norm': 26.153411865234375, 'learning_rate': 4.057290731287531e-07, 'fcm_dpo/beta': 0.0030218339525163174, 'fcm_dpo/q_t': 0.4137793779373169, 'fcm_dpo/delta': 0.02460547536611557, 'fcm_dpo/margin': 124.18669128417969, 'margin_dpo/margin_mean': 124.18669128417969, 'margin_dpo/margin_std': 170.16293334960938, 'logps/chosen': -253.64962768554688, 'logps/rejected': -398.214111328125, 'logps/ref_chosen': -67.26228332519531, 'logps/ref_rejected': -87.64010620117188, 'KL/chosen_KL_mean': -186.3873291015625, 'KL/rejected_KL_mean': -310.57403564453125, 'KL/mean': -248.48068237304688, 'KL/std': 153.88400268554688, 'logits/chosen': -0.4331769049167633, 'logits/rejected': -0.40836483240127563, 'epoch': 0.36} + 36%|███▌ | 245/681 [10:24<18:46, 2.58s/it] 36%|███▌ | 246/681 [10:26<18:42, 2.58s/it] {'loss': 1.109, 'grad_norm': 24.42864418029785, 'learning_rate': 4.047230911780736e-07, 'fcm_dpo/beta': 0.003048623912036419, 'fcm_dpo/q_t': 0.4134928584098816, 'fcm_dpo/delta': 0.016290059313178062, 'fcm_dpo/margin': 126.03412628173828, 'margin_dpo/margin_mean': 126.03411865234375, 'margin_dpo/margin_std': 182.46896362304688, 'logps/chosen': -248.84593200683594, 'logps/rejected': -392.5294189453125, 'logps/ref_chosen': -66.69696807861328, 'logps/ref_rejected': -84.34634399414062, 'KL/chosen_KL_mean': -182.14895629882812, 'KL/rejected_KL_mean': -308.18310546875, 'KL/mean': -245.166015625, 'KL/std': 170.09425354003906, 'logits/chosen': -0.440762996673584, 'logits/rejected': -0.40071290731430054, 'epoch': 0.36} + 36%|███▌ | 246/681 [10:26<18:42, 2.58s/it] 36%|███▋ | 247/681 [10:29<18:28, 2.55s/it] {'loss': 1.0026, 'grad_norm': 33.18147277832031, 'learning_rate': 4.0371303213004814e-07, 'fcm_dpo/beta': 0.002994304057210684, 'fcm_dpo/q_t': 0.3789059519767761, 'fcm_dpo/delta': -0.13526105880737305, 'fcm_dpo/margin': 176.29876708984375, 'margin_dpo/margin_mean': 176.29876708984375, 'margin_dpo/margin_std': 190.67379760742188, 'logps/chosen': -266.4715881347656, 'logps/rejected': -492.45831298828125, 'logps/ref_chosen': -56.6053466796875, 'logps/ref_rejected': -106.29326629638672, 'KL/chosen_KL_mean': -209.86624145507812, 'KL/rejected_KL_mean': -386.1650390625, 'KL/mean': -298.015625, 'KL/std': 176.03521728515625, 'logits/chosen': -0.3571593761444092, 'logits/rejected': -0.355099618434906, 'epoch': 0.36} + 36%|███▋ | 247/681 [10:29<18:28, 2.55s/it] 36%|███▋ | 248/681 [10:31<18:26, 2.55s/it] {'loss': 1.0231, 'grad_norm': 21.914152145385742, 'learning_rate': 4.0269892260067197e-07, 'fcm_dpo/beta': 0.0029416182078421116, 'fcm_dpo/q_t': 0.3923521637916565, 'fcm_dpo/delta': -0.055744655430316925, 'fcm_dpo/margin': 153.89654541015625, 'margin_dpo/margin_mean': 153.89654541015625, 'margin_dpo/margin_std': 135.817138671875, 'logps/chosen': -224.21139526367188, 'logps/rejected': -425.921630859375, 'logps/ref_chosen': -44.043216705322266, 'logps/ref_rejected': -91.85687255859375, 'KL/chosen_KL_mean': -180.16818237304688, 'KL/rejected_KL_mean': -334.06475830078125, 'KL/mean': -257.116455078125, 'KL/std': 142.95608520507812, 'logits/chosen': -0.3768647313117981, 'logits/rejected': -0.3953893482685089, 'epoch': 0.36} + 36%|███▋ | 248/681 [10:31<18:26, 2.55s/it] 37%|███▋ | 249/681 [10:34<18:00, 2.50s/it] {'loss': 1.2227, 'grad_norm': 31.89614486694336, 'learning_rate': 4.0168078931267426e-07, 'fcm_dpo/beta': 0.003009880194440484, 'fcm_dpo/q_t': 0.4420696496963501, 'fcm_dpo/delta': 0.1481824517250061, 'fcm_dpo/margin': 84.93710327148438, 'margin_dpo/margin_mean': 84.93710327148438, 'margin_dpo/margin_std': 187.25634765625, 'logps/chosen': -298.1483459472656, 'logps/rejected': -401.11114501953125, 'logps/ref_chosen': -62.442352294921875, 'logps/ref_rejected': -80.46806335449219, 'KL/chosen_KL_mean': -235.70599365234375, 'KL/rejected_KL_mean': -320.6430969238281, 'KL/mean': -278.174560546875, 'KL/std': 158.992431640625, 'logits/chosen': -0.4115716814994812, 'logits/rejected': -0.38695603609085083, 'epoch': 0.37} + 37%|███▋ | 249/681 [10:34<18:00, 2.50s/it] 37%|███▋ | 250/681 [10:36<17:41, 2.46s/it] {'loss': 1.0195, 'grad_norm': 30.4766845703125, 'learning_rate': 4.006586590948141e-07, 'fcm_dpo/beta': 0.0030103102326393127, 'fcm_dpo/q_t': 0.3886602520942688, 'fcm_dpo/delta': -0.08075070381164551, 'fcm_dpo/margin': 158.41148376464844, 'margin_dpo/margin_mean': 158.41148376464844, 'margin_dpo/margin_std': 157.6683349609375, 'logps/chosen': -272.652587890625, 'logps/rejected': -439.2991943359375, 'logps/ref_chosen': -65.63668823242188, 'logps/ref_rejected': -73.87184143066406, 'KL/chosen_KL_mean': -207.01589965820312, 'KL/rejected_KL_mean': -365.4273681640625, 'KL/mean': -286.22161865234375, 'KL/std': 158.22332763671875, 'logits/chosen': -0.39069664478302, 'logits/rejected': -0.3340034484863281, 'epoch': 0.37} + 37%|███▋ | 250/681 [10:36<17:41, 2.46s/it] 37%|███▋ | 251/681 [10:38<17:34, 2.45s/it] {'loss': 1.1604, 'grad_norm': 35.921974182128906, 'learning_rate': 3.9963255888117325e-07, 'fcm_dpo/beta': 0.0030130401719361544, 'fcm_dpo/q_t': 0.42667731642723083, 'fcm_dpo/delta': 0.08070008456707001, 'fcm_dpo/margin': 106.85773468017578, 'margin_dpo/margin_mean': 106.85773468017578, 'margin_dpo/margin_std': 176.058837890625, 'logps/chosen': -278.3011474609375, 'logps/rejected': -405.63958740234375, 'logps/ref_chosen': -57.182716369628906, 'logps/ref_rejected': -77.66343688964844, 'KL/chosen_KL_mean': -221.11842346191406, 'KL/rejected_KL_mean': -327.97613525390625, 'KL/mean': -274.54730224609375, 'KL/std': 165.54885864257812, 'logits/chosen': -0.3663170635700226, 'logits/rejected': -0.33492955565452576, 'epoch': 0.37} + 37%|███▋ | 251/681 [10:38<17:34, 2.45s/it] 37%|███▋ | 252/681 [10:41<17:55, 2.51s/it] {'loss': 1.046, 'grad_norm': 25.36005210876465, 'learning_rate': 3.9860251571044666e-07, 'fcm_dpo/beta': 0.0030095637775957584, 'fcm_dpo/q_t': 0.39602023363113403, 'fcm_dpo/delta': -0.036197420209646225, 'fcm_dpo/margin': 144.3450164794922, 'margin_dpo/margin_mean': 144.34500122070312, 'margin_dpo/margin_std': 146.50564575195312, 'logps/chosen': -291.4132080078125, 'logps/rejected': -448.83056640625, 'logps/ref_chosen': -71.68563842773438, 'logps/ref_rejected': -84.75799560546875, 'KL/chosen_KL_mean': -219.7275390625, 'KL/rejected_KL_mean': -364.07257080078125, 'KL/mean': -291.9000549316406, 'KL/std': 145.91665649414062, 'logits/chosen': -0.4368041753768921, 'logits/rejected': -0.3982187509536743, 'epoch': 0.37} + 37%|███▋ | 252/681 [10:41<17:55, 2.51s/it] 37%|███▋ | 253/681 [10:44<18:21, 2.57s/it] {'loss': 1.0717, 'grad_norm': 23.17310333251953, 'learning_rate': 3.9756855672522986e-07, 'fcm_dpo/beta': 0.0030172369442880154, 'fcm_dpo/q_t': 0.40122461318969727, 'fcm_dpo/delta': -0.025195002555847168, 'fcm_dpo/margin': 140.40821838378906, 'margin_dpo/margin_mean': 140.40821838378906, 'margin_dpo/margin_std': 169.84083557128906, 'logps/chosen': -256.5468444824219, 'logps/rejected': -426.523681640625, 'logps/ref_chosen': -69.1339340209961, 'logps/ref_rejected': -98.70252990722656, 'KL/chosen_KL_mean': -187.4129180908203, 'KL/rejected_KL_mean': -327.8211364746094, 'KL/mean': -257.6170349121094, 'KL/std': 155.3035125732422, 'logits/chosen': -0.42870020866394043, 'logits/rejected': -0.42114484310150146, 'epoch': 0.37} + 37%|███▋ | 253/681 [10:44<18:21, 2.57s/it] 37%|███▋ | 254/681 [10:46<18:29, 2.60s/it] {'loss': 1.1406, 'grad_norm': 20.98316192626953, 'learning_rate': 3.965307091713037e-07, 'fcm_dpo/beta': 0.003011333290487528, 'fcm_dpo/q_t': 0.42020976543426514, 'fcm_dpo/delta': 0.041351526975631714, 'fcm_dpo/margin': 119.60062408447266, 'margin_dpo/margin_mean': 119.60063171386719, 'margin_dpo/margin_std': 200.53207397460938, 'logps/chosen': -231.12533569335938, 'logps/rejected': -386.87860107421875, 'logps/ref_chosen': -54.154998779296875, 'logps/ref_rejected': -90.30764770507812, 'KL/chosen_KL_mean': -176.9703369140625, 'KL/rejected_KL_mean': -296.5709533691406, 'KL/mean': -236.77066040039062, 'KL/std': 159.91812133789062, 'logits/chosen': -0.4022292196750641, 'logits/rejected': -0.3870220184326172, 'epoch': 0.37} + 37%|███▋ | 254/681 [10:46<18:29, 2.60s/it] 37%|███▋ | 255/681 [10:49<18:01, 2.54s/it] {'loss': 1.1055, 'grad_norm': 21.49859046936035, 'learning_rate': 3.954890003969163e-07, 'fcm_dpo/beta': 0.003016393631696701, 'fcm_dpo/q_t': 0.4103944003582001, 'fcm_dpo/delta': 0.013489855453372002, 'fcm_dpo/margin': 128.23907470703125, 'margin_dpo/margin_mean': 128.23907470703125, 'margin_dpo/margin_std': 176.99844360351562, 'logps/chosen': -233.45236206054688, 'logps/rejected': -394.7583312988281, 'logps/ref_chosen': -57.14167022705078, 'logps/ref_rejected': -90.2085952758789, 'KL/chosen_KL_mean': -176.31068420410156, 'KL/rejected_KL_mean': -304.54974365234375, 'KL/mean': -240.4302215576172, 'KL/std': 140.88638305664062, 'logits/chosen': -0.3628276288509369, 'logits/rejected': -0.3527315855026245, 'epoch': 0.37} + 37%|███▋ | 255/681 [10:49<18:01, 2.54s/it] 38%|███▊ | 256/681 [10:51<17:59, 2.54s/it] {'loss': 1.0802, 'grad_norm': 39.732818603515625, 'learning_rate': 3.944434578520628e-07, 'fcm_dpo/beta': 0.0030256398022174835, 'fcm_dpo/q_t': 0.4049100875854492, 'fcm_dpo/delta': -0.012017881497740746, 'fcm_dpo/margin': 136.01217651367188, 'margin_dpo/margin_mean': 136.01217651367188, 'margin_dpo/margin_std': 172.76388549804688, 'logps/chosen': -204.697021484375, 'logps/rejected': -378.108642578125, 'logps/ref_chosen': -55.163490295410156, 'logps/ref_rejected': -92.56291961669922, 'KL/chosen_KL_mean': -149.53353881835938, 'KL/rejected_KL_mean': -285.5457458496094, 'KL/mean': -217.53964233398438, 'KL/std': 150.42579650878906, 'logits/chosen': -0.3158034384250641, 'logits/rejected': -0.3230699896812439, 'epoch': 0.38} + 38%|███▊ | 256/681 [10:51<17:59, 2.54s/it] 38%|███▊ | 257/681 [10:54<18:08, 2.57s/it] {'loss': 1.0716, 'grad_norm': 23.517911911010742, 'learning_rate': 3.933941090877615e-07, 'fcm_dpo/beta': 0.0029899184592068195, 'fcm_dpo/q_t': 0.4019385874271393, 'fcm_dpo/delta': -0.030670955777168274, 'fcm_dpo/margin': 143.2915802001953, 'margin_dpo/margin_mean': 143.2915802001953, 'margin_dpo/margin_std': 175.0234375, 'logps/chosen': -195.7149658203125, 'logps/rejected': -369.1207580566406, 'logps/ref_chosen': -49.42369842529297, 'logps/ref_rejected': -79.53791809082031, 'KL/chosen_KL_mean': -146.291259765625, 'KL/rejected_KL_mean': -289.58282470703125, 'KL/mean': -217.93704223632812, 'KL/std': 154.93496704101562, 'logits/chosen': -0.36811453104019165, 'logits/rejected': -0.354714035987854, 'epoch': 0.38} + 38%|███▊ | 257/681 [10:54<18:08, 2.57s/it] 38%|███▊ | 258/681 [10:56<17:28, 2.48s/it] {'loss': 1.0907, 'grad_norm': 28.047407150268555, 'learning_rate': 3.923409817553284e-07, 'fcm_dpo/beta': 0.0030003516003489494, 'fcm_dpo/q_t': 0.4030870795249939, 'fcm_dpo/delta': -0.01784433051943779, 'fcm_dpo/margin': 139.01031494140625, 'margin_dpo/margin_mean': 139.01031494140625, 'margin_dpo/margin_std': 193.4696502685547, 'logps/chosen': -252.7172088623047, 'logps/rejected': -428.33349609375, 'logps/ref_chosen': -59.384124755859375, 'logps/ref_rejected': -95.99010467529297, 'KL/chosen_KL_mean': -193.3330841064453, 'KL/rejected_KL_mean': -332.3433837890625, 'KL/mean': -262.8382568359375, 'KL/std': 162.82940673828125, 'logits/chosen': -0.3141087591648102, 'logits/rejected': -0.3110647201538086, 'epoch': 0.38} + 38%|███▊ | 258/681 [10:56<17:28, 2.48s/it] 38%|███▊ | 259/681 [10:59<17:33, 2.50s/it] {'loss': 1.1286, 'grad_norm': 27.507400512695312, 'learning_rate': 3.9128410360564793e-07, 'fcm_dpo/beta': 0.0030027367174625397, 'fcm_dpo/q_t': 0.417955607175827, 'fcm_dpo/delta': 0.04155043140053749, 'fcm_dpo/margin': 119.85458374023438, 'margin_dpo/margin_mean': 119.85458374023438, 'margin_dpo/margin_std': 180.78173828125, 'logps/chosen': -241.32675170898438, 'logps/rejected': -397.5446472167969, 'logps/ref_chosen': -52.828346252441406, 'logps/ref_rejected': -89.191650390625, 'KL/chosen_KL_mean': -188.4984130859375, 'KL/rejected_KL_mean': -308.3529968261719, 'KL/mean': -248.42568969726562, 'KL/std': 159.6024169921875, 'logits/chosen': -0.3825646936893463, 'logits/rejected': -0.3810487985610962, 'epoch': 0.38} + 38%|███▊ | 259/681 [10:59<17:33, 2.50s/it] 38%|███▊ | 260/681 [11:01<17:51, 2.55s/it] {'loss': 1.0251, 'grad_norm': 26.93035125732422, 'learning_rate': 3.9022350248844246e-07, 'fcm_dpo/beta': 0.00299159437417984, 'fcm_dpo/q_t': 0.390764981508255, 'fcm_dpo/delta': -0.0841422975063324, 'fcm_dpo/margin': 160.49334716796875, 'margin_dpo/margin_mean': 160.49334716796875, 'margin_dpo/margin_std': 176.1913604736328, 'logps/chosen': -240.9576416015625, 'logps/rejected': -449.12310791015625, 'logps/ref_chosen': -47.41767501831055, 'logps/ref_rejected': -95.08978271484375, 'KL/chosen_KL_mean': -193.53997802734375, 'KL/rejected_KL_mean': -354.0333251953125, 'KL/mean': -273.78662109375, 'KL/std': 163.99403381347656, 'logits/chosen': -0.33758312463760376, 'logits/rejected': -0.35471126437187195, 'epoch': 0.38} + 38%|███▊ | 260/681 [11:01<17:51, 2.55s/it] 38%|███▊ | 261/681 [11:04<17:06, 2.44s/it] {'loss': 1.0729, 'grad_norm': 23.40955924987793, 'learning_rate': 3.891592063515376e-07, 'fcm_dpo/beta': 0.0029416559264063835, 'fcm_dpo/q_t': 0.39985257387161255, 'fcm_dpo/delta': -0.04529657959938049, 'fcm_dpo/margin': 150.63624572753906, 'margin_dpo/margin_mean': 150.63623046875, 'margin_dpo/margin_std': 203.27151489257812, 'logps/chosen': -256.5679016113281, 'logps/rejected': -442.68768310546875, 'logps/ref_chosen': -53.03137969970703, 'logps/ref_rejected': -88.51494598388672, 'KL/chosen_KL_mean': -203.53651428222656, 'KL/rejected_KL_mean': -354.1727294921875, 'KL/mean': -278.8546142578125, 'KL/std': 180.8629150390625, 'logits/chosen': -0.30870985984802246, 'logits/rejected': -0.3094845414161682, 'epoch': 0.38} + 38%|███▊ | 261/681 [11:04<17:06, 2.44s/it] 38%|███▊ | 262/681 [11:06<16:53, 2.42s/it] {'loss': 1.108, 'grad_norm': 25.519702911376953, 'learning_rate': 3.880912432401264e-07, 'fcm_dpo/beta': 0.002966498024761677, 'fcm_dpo/q_t': 0.41466158628463745, 'fcm_dpo/delta': 0.03342254459857941, 'fcm_dpo/margin': 123.85542297363281, 'margin_dpo/margin_mean': 123.85542297363281, 'margin_dpo/margin_std': 163.9331512451172, 'logps/chosen': -303.0691223144531, 'logps/rejected': -453.7229309082031, 'logps/ref_chosen': -59.620140075683594, 'logps/ref_rejected': -86.41853332519531, 'KL/chosen_KL_mean': -243.448974609375, 'KL/rejected_KL_mean': -367.30438232421875, 'KL/mean': -305.376708984375, 'KL/std': 165.85348510742188, 'logits/chosen': -0.29800450801849365, 'logits/rejected': -0.2692173719406128, 'epoch': 0.38} + 38%|███▊ | 262/681 [11:06<16:53, 2.42s/it] 39%|███▊ | 263/681 [11:08<16:53, 2.42s/it] {'loss': 1.0198, 'grad_norm': 23.3193302154541, 'learning_rate': 3.870196412960302e-07, 'fcm_dpo/beta': 0.0028949188999831676, 'fcm_dpo/q_t': 0.38297536969184875, 'fcm_dpo/delta': -0.12226266413927078, 'fcm_dpo/margin': 177.93978881835938, 'margin_dpo/margin_mean': 177.93978881835938, 'margin_dpo/margin_std': 208.54910278320312, 'logps/chosen': -288.3870849609375, 'logps/rejected': -503.76312255859375, 'logps/ref_chosen': -59.42094421386719, 'logps/ref_rejected': -96.85720825195312, 'KL/chosen_KL_mean': -228.96612548828125, 'KL/rejected_KL_mean': -406.9059143066406, 'KL/mean': -317.9360046386719, 'KL/std': 197.06179809570312, 'logits/chosen': -0.3331921398639679, 'logits/rejected': -0.30877092480659485, 'epoch': 0.39} + 39%|███▊ | 263/681 [11:08<16:53, 2.42s/it] 39%|███▉ | 264/681 [11:11<17:29, 2.52s/it] {'loss': 1.0845, 'grad_norm': 27.41741180419922, 'learning_rate': 3.8594442875695665e-07, 'fcm_dpo/beta': 0.002853479702025652, 'fcm_dpo/q_t': 0.4024280309677124, 'fcm_dpo/delta': -0.03490894287824631, 'fcm_dpo/margin': 151.4447021484375, 'margin_dpo/margin_mean': 151.4447021484375, 'margin_dpo/margin_std': 206.7536163330078, 'logps/chosen': -304.7344665527344, 'logps/rejected': -487.31329345703125, 'logps/ref_chosen': -62.722084045410156, 'logps/ref_rejected': -93.85620880126953, 'KL/chosen_KL_mean': -242.01239013671875, 'KL/rejected_KL_mean': -393.45709228515625, 'KL/mean': -317.7347412109375, 'KL/std': 182.8231201171875, 'logits/chosen': -0.3761428892612457, 'logits/rejected': -0.36597341299057007, 'epoch': 0.39} + 39%|███▉ | 264/681 [11:11<17:29, 2.52s/it] 39%|███▉ | 265/681 [11:14<17:36, 2.54s/it] {'loss': 1.1181, 'grad_norm': 25.906906127929688, 'learning_rate': 3.848656339557562e-07, 'fcm_dpo/beta': 0.0028575900942087173, 'fcm_dpo/q_t': 0.4068824350833893, 'fcm_dpo/delta': -0.021879900246858597, 'fcm_dpo/margin': 147.30393981933594, 'margin_dpo/margin_mean': 147.30393981933594, 'margin_dpo/margin_std': 244.15444946289062, 'logps/chosen': -322.00054931640625, 'logps/rejected': -495.3536376953125, 'logps/ref_chosen': -61.971466064453125, 'logps/ref_rejected': -88.02059936523438, 'KL/chosen_KL_mean': -260.0290832519531, 'KL/rejected_KL_mean': -407.3330383300781, 'KL/mean': -333.6810302734375, 'KL/std': 206.29835510253906, 'logits/chosen': -0.3324648439884186, 'logits/rejected': -0.3139057755470276, 'epoch': 0.39} + 39%|███▉ | 265/681 [11:14<17:36, 2.54s/it] 39%|███▉ | 266/681 [11:16<17:27, 2.52s/it] {'loss': 1.1495, 'grad_norm': 65.29761505126953, 'learning_rate': 3.8378328531967507e-07, 'fcm_dpo/beta': 0.0028820079751312733, 'fcm_dpo/q_t': 0.4227384924888611, 'fcm_dpo/delta': 0.059021368622779846, 'fcm_dpo/margin': 119.01298522949219, 'margin_dpo/margin_mean': 119.01298522949219, 'margin_dpo/margin_std': 200.31187438964844, 'logps/chosen': -327.94775390625, 'logps/rejected': -447.832275390625, 'logps/ref_chosen': -67.09967041015625, 'logps/ref_rejected': -67.97122192382812, 'KL/chosen_KL_mean': -260.84808349609375, 'KL/rejected_KL_mean': -379.8610534667969, 'KL/mean': -320.35455322265625, 'KL/std': 171.5772247314453, 'logits/chosen': -0.3929414451122284, 'logits/rejected': -0.3485182523727417, 'epoch': 0.39} + 39%|███▉ | 266/681 [11:16<17:27, 2.52s/it] 39%|███▉ | 267/681 [11:19<17:28, 2.53s/it] {'loss': 1.0898, 'grad_norm': 41.96255874633789, 'learning_rate': 3.8269741136960646e-07, 'fcm_dpo/beta': 0.002870975062251091, 'fcm_dpo/q_t': 0.40422123670578003, 'fcm_dpo/delta': -0.021463816985487938, 'fcm_dpo/margin': 146.3773193359375, 'margin_dpo/margin_mean': 146.3773193359375, 'margin_dpo/margin_std': 205.5968780517578, 'logps/chosen': -299.8945617675781, 'logps/rejected': -467.4696044921875, 'logps/ref_chosen': -68.97075653076172, 'logps/ref_rejected': -90.16844940185547, 'KL/chosen_KL_mean': -230.92379760742188, 'KL/rejected_KL_mean': -377.3011474609375, 'KL/mean': -304.11248779296875, 'KL/std': 182.7541961669922, 'logits/chosen': -0.3992302715778351, 'logits/rejected': -0.36245715618133545, 'epoch': 0.39} + 39%|███▉ | 267/681 [11:19<17:28, 2.53s/it] 39%|███▉ | 268/681 [11:21<17:27, 2.54s/it] {'loss': 1.0987, 'grad_norm': 30.43846321105957, 'learning_rate': 3.8160804071933894e-07, 'fcm_dpo/beta': 0.002865626011043787, 'fcm_dpo/q_t': 0.4083732068538666, 'fcm_dpo/delta': -0.006617257371544838, 'fcm_dpo/margin': 141.7186279296875, 'margin_dpo/margin_mean': 141.71861267089844, 'margin_dpo/margin_std': 204.48934936523438, 'logps/chosen': -291.12274169921875, 'logps/rejected': -478.58868408203125, 'logps/ref_chosen': -55.90031051635742, 'logps/ref_rejected': -101.64763641357422, 'KL/chosen_KL_mean': -235.22242736816406, 'KL/rejected_KL_mean': -376.9410400390625, 'KL/mean': -306.08172607421875, 'KL/std': 170.38865661621094, 'logits/chosen': -0.37674716114997864, 'logits/rejected': -0.3824624717235565, 'epoch': 0.39} + 39%|███▉ | 268/681 [11:21<17:27, 2.54s/it] 40%|███▉ | 269/681 [11:24<17:13, 2.51s/it] {'loss': 1.0488, 'grad_norm': 27.612655639648438, 'learning_rate': 3.8051520207480204e-07, 'fcm_dpo/beta': 0.002839939435943961, 'fcm_dpo/q_t': 0.3899012804031372, 'fcm_dpo/delta': -0.08905084431171417, 'fcm_dpo/margin': 170.71112060546875, 'margin_dpo/margin_mean': 170.71112060546875, 'margin_dpo/margin_std': 219.97512817382812, 'logps/chosen': -315.3667907714844, 'logps/rejected': -523.3876953125, 'logps/ref_chosen': -70.03955841064453, 'logps/ref_rejected': -107.34937286376953, 'KL/chosen_KL_mean': -245.32723999023438, 'KL/rejected_KL_mean': -416.038330078125, 'KL/mean': -330.68280029296875, 'KL/std': 177.90664672851562, 'logits/chosen': -0.45941269397735596, 'logits/rejected': -0.4373210668563843, 'epoch': 0.4} + 40%|███▉ | 269/681 [11:24<17:13, 2.51s/it] 40%|███▉ | 270/681 [11:26<17:31, 2.56s/it] {'loss': 1.1308, 'grad_norm': 45.69468688964844, 'learning_rate': 3.794189242333106e-07, 'fcm_dpo/beta': 0.002845948562026024, 'fcm_dpo/q_t': 0.41619110107421875, 'fcm_dpo/delta': 0.03565208241343498, 'fcm_dpo/margin': 128.40142822265625, 'margin_dpo/margin_mean': 128.40142822265625, 'margin_dpo/margin_std': 200.96267700195312, 'logps/chosen': -296.3348388671875, 'logps/rejected': -465.1314392089844, 'logps/ref_chosen': -69.53347778320312, 'logps/ref_rejected': -109.92864990234375, 'KL/chosen_KL_mean': -226.80136108398438, 'KL/rejected_KL_mean': -355.2027893066406, 'KL/mean': -291.0020751953125, 'KL/std': 163.08633422851562, 'logits/chosen': -0.4949715733528137, 'logits/rejected': -0.4854864776134491, 'epoch': 0.4} + 40%|███▉ | 270/681 [11:26<17:31, 2.56s/it] 40%|███▉ | 271/681 [11:29<17:09, 2.51s/it] {'loss': 1.0488, 'grad_norm': 25.674280166625977, 'learning_rate': 3.7831923608280514e-07, 'fcm_dpo/beta': 0.0028090826235711575, 'fcm_dpo/q_t': 0.3959714472293854, 'fcm_dpo/delta': -0.058409713208675385, 'fcm_dpo/margin': 162.08523559570312, 'margin_dpo/margin_mean': 162.08523559570312, 'margin_dpo/margin_std': 191.40362548828125, 'logps/chosen': -280.9557800292969, 'logps/rejected': -478.790283203125, 'logps/ref_chosen': -56.76456832885742, 'logps/ref_rejected': -92.51383972167969, 'KL/chosen_KL_mean': -224.1912078857422, 'KL/rejected_KL_mean': -386.27642822265625, 'KL/mean': -305.23382568359375, 'KL/std': 166.82310485839844, 'logits/chosen': -0.4081183075904846, 'logits/rejected': -0.3879523277282715, 'epoch': 0.4} + 40%|███▉ | 271/681 [11:29<17:09, 2.51s/it] 40%|███▉ | 272/681 [11:32<17:32, 2.57s/it] {'loss': 0.973, 'grad_norm': 33.71802520751953, 'learning_rate': 3.772161666010912e-07, 'fcm_dpo/beta': 0.002755315974354744, 'fcm_dpo/q_t': 0.37152132391929626, 'fcm_dpo/delta': -0.16125299036502838, 'fcm_dpo/margin': 200.572265625, 'margin_dpo/margin_mean': 200.572265625, 'margin_dpo/margin_std': 189.1830596923828, 'logps/chosen': -259.80902099609375, 'logps/rejected': -516.4269409179688, 'logps/ref_chosen': -49.497154235839844, 'logps/ref_rejected': -105.54279327392578, 'KL/chosen_KL_mean': -210.31185913085938, 'KL/rejected_KL_mean': -410.8841552734375, 'KL/mean': -310.5980224609375, 'KL/std': 184.2425537109375, 'logits/chosen': -0.3228394389152527, 'logits/rejected': -0.3339686989784241, 'epoch': 0.4} + 40%|███▉ | 272/681 [11:32<17:32, 2.57s/it] 40%|████ | 273/681 [11:34<17:08, 2.52s/it] {'loss': 1.0169, 'grad_norm': 41.27581024169922, 'learning_rate': 3.761097448550755e-07, 'fcm_dpo/beta': 0.0026712960097938776, 'fcm_dpo/q_t': 0.3848682641983032, 'fcm_dpo/delta': -0.10431107878684998, 'fcm_dpo/margin': 186.65921020507812, 'margin_dpo/margin_mean': 186.65921020507812, 'margin_dpo/margin_std': 203.4211883544922, 'logps/chosen': -299.6241455078125, 'logps/rejected': -515.8065185546875, 'logps/ref_chosen': -62.97539520263672, 'logps/ref_rejected': -92.49858093261719, 'KL/chosen_KL_mean': -236.64874267578125, 'KL/rejected_KL_mean': -423.3079833984375, 'KL/mean': -329.9783630371094, 'KL/std': 181.1544189453125, 'logits/chosen': -0.32734841108322144, 'logits/rejected': -0.3092419505119324, 'epoch': 0.4} + 40%|████ | 273/681 [11:34<17:08, 2.52s/it] 40%|████ | 274/681 [11:36<16:47, 2.48s/it] {'loss': 1.1027, 'grad_norm': 41.83562469482422, 'learning_rate': 3.75e-07, 'fcm_dpo/beta': 0.002680413890630007, 'fcm_dpo/q_t': 0.41100966930389404, 'fcm_dpo/delta': 0.013700582087039948, 'fcm_dpo/margin': 144.20184326171875, 'margin_dpo/margin_mean': 144.20184326171875, 'margin_dpo/margin_std': 196.0882568359375, 'logps/chosen': -333.74725341796875, 'logps/rejected': -499.61444091796875, 'logps/ref_chosen': -55.66770935058594, 'logps/ref_rejected': -77.33308410644531, 'KL/chosen_KL_mean': -278.07952880859375, 'KL/rejected_KL_mean': -422.2813415527344, 'KL/mean': -350.180419921875, 'KL/std': 171.34066772460938, 'logits/chosen': -0.30099087953567505, 'logits/rejected': -0.2856178879737854, 'epoch': 0.4} + 40%|████ | 274/681 [11:36<16:47, 2.48s/it] 40%|████ | 275/681 [11:39<17:09, 2.54s/it] {'loss': 1.0836, 'grad_norm': 29.38013458251953, 'learning_rate': 3.738869612786737e-07, 'fcm_dpo/beta': 0.0026775910519063473, 'fcm_dpo/q_t': 0.4061974585056305, 'fcm_dpo/delta': -0.004319606348872185, 'fcm_dpo/margin': 150.89297485351562, 'margin_dpo/margin_mean': 150.89297485351562, 'margin_dpo/margin_std': 189.14598083496094, 'logps/chosen': -261.47314453125, 'logps/rejected': -457.0751037597656, 'logps/ref_chosen': -48.594703674316406, 'logps/ref_rejected': -93.30369567871094, 'KL/chosen_KL_mean': -212.87841796875, 'KL/rejected_KL_mean': -363.7713928222656, 'KL/mean': -288.3249206542969, 'KL/std': 169.57728576660156, 'logits/chosen': -0.35198503732681274, 'logits/rejected': -0.35458293557167053, 'epoch': 0.4} + 40%|████ | 275/681 [11:39<17:09, 2.54s/it] 41%|████ | 276/681 [11:42<17:00, 2.52s/it] {'loss': 1.0856, 'grad_norm': 26.85495376586914, 'learning_rate': 3.7277065802070204e-07, 'fcm_dpo/beta': 0.002657739445567131, 'fcm_dpo/q_t': 0.40572842955589294, 'fcm_dpo/delta': -0.014008134603500366, 'fcm_dpo/margin': 155.49386596679688, 'margin_dpo/margin_mean': 155.49386596679688, 'margin_dpo/margin_std': 207.1399383544922, 'logps/chosen': -279.6072998046875, 'logps/rejected': -448.889404296875, 'logps/ref_chosen': -56.57740783691406, 'logps/ref_rejected': -70.36566925048828, 'KL/chosen_KL_mean': -223.02987670898438, 'KL/rejected_KL_mean': -378.52374267578125, 'KL/mean': -300.77679443359375, 'KL/std': 173.62960815429688, 'logits/chosen': -0.33715903759002686, 'logits/rejected': -0.30880868434906006, 'epoch': 0.41} + 41%|████ | 276/681 [11:42<17:00, 2.52s/it] 41%|████ | 277/681 [11:44<16:38, 2.47s/it] {'loss': 1.0874, 'grad_norm': 23.91503143310547, 'learning_rate': 3.71651119641714e-07, 'fcm_dpo/beta': 0.0026624128222465515, 'fcm_dpo/q_t': 0.405393123626709, 'fcm_dpo/delta': -0.011134679429233074, 'fcm_dpo/margin': 154.23895263671875, 'margin_dpo/margin_mean': 154.2389678955078, 'margin_dpo/margin_std': 204.90927124023438, 'logps/chosen': -297.33123779296875, 'logps/rejected': -488.1799011230469, 'logps/ref_chosen': -56.27156066894531, 'logps/ref_rejected': -92.88127136230469, 'KL/chosen_KL_mean': -241.0596923828125, 'KL/rejected_KL_mean': -395.29864501953125, 'KL/mean': -318.1791687011719, 'KL/std': 173.15396118164062, 'logits/chosen': -0.3526584506034851, 'logits/rejected': -0.3328750431537628, 'epoch': 0.41} + 41%|████ | 277/681 [11:44<16:38, 2.47s/it] 41%|████ | 278/681 [11:47<17:04, 2.54s/it] {'loss': 1.0328, 'grad_norm': 29.900175094604492, 'learning_rate': 3.705283756425872e-07, 'fcm_dpo/beta': 0.002619755920022726, 'fcm_dpo/q_t': 0.3913354277610779, 'fcm_dpo/delta': -0.07888495177030563, 'fcm_dpo/margin': 181.26145935058594, 'margin_dpo/margin_mean': 181.261474609375, 'margin_dpo/margin_std': 205.30252075195312, 'logps/chosen': -257.92236328125, 'logps/rejected': -477.4954833984375, 'logps/ref_chosen': -52.94194030761719, 'logps/ref_rejected': -91.25357818603516, 'KL/chosen_KL_mean': -204.98043823242188, 'KL/rejected_KL_mean': -386.24188232421875, 'KL/mean': -295.61114501953125, 'KL/std': 184.41384887695312, 'logits/chosen': -0.339875727891922, 'logits/rejected': -0.3422485291957855, 'epoch': 0.41} + 41%|████ | 278/681 [11:47<17:04, 2.54s/it] 41%|████ | 279/681 [11:49<16:59, 2.54s/it] {'loss': 1.0538, 'grad_norm': 24.77928924560547, 'learning_rate': 3.6940245560867e-07, 'fcm_dpo/beta': 0.0025727972388267517, 'fcm_dpo/q_t': 0.39369115233421326, 'fcm_dpo/delta': -0.07477246224880219, 'fcm_dpo/margin': 182.81570434570312, 'margin_dpo/margin_mean': 182.81570434570312, 'margin_dpo/margin_std': 233.40939331054688, 'logps/chosen': -278.0234680175781, 'logps/rejected': -500.04931640625, 'logps/ref_chosen': -48.641319274902344, 'logps/ref_rejected': -87.8514404296875, 'KL/chosen_KL_mean': -229.3821563720703, 'KL/rejected_KL_mean': -412.1978759765625, 'KL/mean': -320.7900085449219, 'KL/std': 190.40365600585938, 'logits/chosen': -0.29181522130966187, 'logits/rejected': -0.2875681519508362, 'epoch': 0.41} + 41%|████ | 279/681 [11:49<16:59, 2.54s/it] 41%|████ | 280/681 [11:52<17:10, 2.57s/it] {'loss': 1.0257, 'grad_norm': 30.460899353027344, 'learning_rate': 3.6827338920900253e-07, 'fcm_dpo/beta': 0.0025460803881287575, 'fcm_dpo/q_t': 0.389259934425354, 'fcm_dpo/delta': -0.07485491782426834, 'fcm_dpo/margin': 185.09500122070312, 'margin_dpo/margin_mean': 185.09500122070312, 'margin_dpo/margin_std': 191.9048309326172, 'logps/chosen': -283.5154724121094, 'logps/rejected': -508.43218994140625, 'logps/ref_chosen': -58.797122955322266, 'logps/ref_rejected': -98.61885070800781, 'KL/chosen_KL_mean': -224.71835327148438, 'KL/rejected_KL_mean': -409.8133544921875, 'KL/mean': -317.265869140625, 'KL/std': 175.26214599609375, 'logits/chosen': -0.3362073004245758, 'logits/rejected': -0.33699339628219604, 'epoch': 0.41} + 41%|████ | 280/681 [11:52<17:10, 2.57s/it] 41%|████▏ | 281/681 [11:54<17:06, 2.57s/it] {'loss': 1.0649, 'grad_norm': 23.17946434020996, 'learning_rate': 3.6714120619553435e-07, 'fcm_dpo/beta': 0.002538030967116356, 'fcm_dpo/q_t': 0.3998814821243286, 'fcm_dpo/delta': -0.023100202903151512, 'fcm_dpo/margin': 166.26089477539062, 'margin_dpo/margin_mean': 166.26089477539062, 'margin_dpo/margin_std': 189.45980834960938, 'logps/chosen': -263.7736511230469, 'logps/rejected': -455.4286193847656, 'logps/ref_chosen': -55.488521575927734, 'logps/ref_rejected': -80.88258361816406, 'KL/chosen_KL_mean': -208.28512573242188, 'KL/rejected_KL_mean': -374.5460510253906, 'KL/mean': -291.41558837890625, 'KL/std': 173.07376098632812, 'logits/chosen': -0.3537529706954956, 'logits/rejected': -0.3235951066017151, 'epoch': 0.41} + 41%|████▏ | 281/681 [11:54<17:06, 2.57s/it] 41%|████▏ | 282/681 [11:57<16:57, 2.55s/it] {'loss': 1.1482, 'grad_norm': 21.843597412109375, 'learning_rate': 3.660059364023408e-07, 'fcm_dpo/beta': 0.002562709851190448, 'fcm_dpo/q_t': 0.4261664152145386, 'fcm_dpo/delta': 0.06986706703901291, 'fcm_dpo/margin': 129.51254272460938, 'margin_dpo/margin_mean': 129.5125274658203, 'margin_dpo/margin_std': 212.96649169921875, 'logps/chosen': -318.1844787597656, 'logps/rejected': -469.97784423828125, 'logps/ref_chosen': -73.07014465332031, 'logps/ref_rejected': -95.35098266601562, 'KL/chosen_KL_mean': -245.1143341064453, 'KL/rejected_KL_mean': -374.62689208984375, 'KL/mean': -309.87060546875, 'KL/std': 184.24575805664062, 'logits/chosen': -0.4187248945236206, 'logits/rejected': -0.39314818382263184, 'epoch': 0.41} + 41%|████▏ | 282/681 [11:57<16:57, 2.55s/it] 42%|████▏ | 283/681 [11:59<16:50, 2.54s/it] {'loss': 1.004, 'grad_norm': 25.943897247314453, 'learning_rate': 3.6486760974483685e-07, 'fcm_dpo/beta': 0.00252789119258523, 'fcm_dpo/q_t': 0.3806382119655609, 'fcm_dpo/delta': -0.11851293593645096, 'fcm_dpo/margin': 202.76519775390625, 'margin_dpo/margin_mean': 202.7651824951172, 'margin_dpo/margin_std': 206.27056884765625, 'logps/chosen': -318.5585021972656, 'logps/rejected': -556.4117431640625, 'logps/ref_chosen': -61.89844512939453, 'logps/ref_rejected': -96.98655700683594, 'KL/chosen_KL_mean': -256.6600646972656, 'KL/rejected_KL_mean': -459.42523193359375, 'KL/mean': -358.04266357421875, 'KL/std': 218.51571655273438, 'logits/chosen': -0.3803967833518982, 'logits/rejected': -0.38235464692115784, 'epoch': 0.42} + 42%|████▏ | 283/681 [11:59<16:50, 2.54s/it] 42%|████▏ | 284/681 [12:02<17:10, 2.60s/it] {'loss': 1.0392, 'grad_norm': 28.472728729248047, 'learning_rate': 3.6372625621898863e-07, 'fcm_dpo/beta': 0.002476719208061695, 'fcm_dpo/q_t': 0.3947269022464752, 'fcm_dpo/delta': -0.05925939232110977, 'fcm_dpo/margin': 184.29434204101562, 'margin_dpo/margin_mean': 184.29434204101562, 'margin_dpo/margin_std': 204.0950927734375, 'logps/chosen': -308.0798034667969, 'logps/rejected': -527.4078979492188, 'logps/ref_chosen': -58.4355354309082, 'logps/ref_rejected': -93.46926879882812, 'KL/chosen_KL_mean': -249.64425659179688, 'KL/rejected_KL_mean': -433.9386291503906, 'KL/mean': -341.79144287109375, 'KL/std': 203.23036193847656, 'logits/chosen': -0.4060632884502411, 'logits/rejected': -0.392697274684906, 'epoch': 0.42} + 42%|████▏ | 284/681 [12:02<17:10, 2.60s/it] 42%|████▏ | 285/681 [12:05<17:05, 2.59s/it] {'loss': 1.0822, 'grad_norm': 28.78072738647461, 'learning_rate': 3.625819059005228e-07, 'fcm_dpo/beta': 0.002482138341292739, 'fcm_dpo/q_t': 0.4034787714481354, 'fcm_dpo/delta': -0.00985686480998993, 'fcm_dpo/margin': 164.75491333007812, 'margin_dpo/margin_mean': 164.7549285888672, 'margin_dpo/margin_std': 204.08389282226562, 'logps/chosen': -364.97381591796875, 'logps/rejected': -562.6233520507812, 'logps/ref_chosen': -66.23219299316406, 'logps/ref_rejected': -99.1268310546875, 'KL/chosen_KL_mean': -298.7416076660156, 'KL/rejected_KL_mean': -463.49652099609375, 'KL/mean': -381.11907958984375, 'KL/std': 184.44662475585938, 'logits/chosen': -0.3715532422065735, 'logits/rejected': -0.3519101142883301, 'epoch': 0.42} + 42%|████▏ | 285/681 [12:05<17:05, 2.59s/it] 42%|████▏ | 286/681 [12:07<16:56, 2.57s/it] {'loss': 1.0611, 'grad_norm': 26.472732543945312, 'learning_rate': 3.614345889441346e-07, 'fcm_dpo/beta': 0.002452992368489504, 'fcm_dpo/q_t': 0.3967989683151245, 'fcm_dpo/delta': -0.051132772117853165, 'fcm_dpo/margin': 182.9866485595703, 'margin_dpo/margin_mean': 182.98663330078125, 'margin_dpo/margin_std': 229.21395874023438, 'logps/chosen': -377.8184814453125, 'logps/rejected': -576.4425659179688, 'logps/ref_chosen': -72.95100402832031, 'logps/ref_rejected': -88.58845520019531, 'KL/chosen_KL_mean': -304.86749267578125, 'KL/rejected_KL_mean': -487.8541259765625, 'KL/mean': -396.3608093261719, 'KL/std': 213.21237182617188, 'logits/chosen': -0.3906969428062439, 'logits/rejected': -0.3723870813846588, 'epoch': 0.42} + 42%|████▏ | 286/681 [12:07<16:56, 2.57s/it] 42%|████▏ | 287/681 [12:09<16:18, 2.48s/it] {'loss': 1.1096, 'grad_norm': 32.449649810791016, 'learning_rate': 3.6028433558269275e-07, 'fcm_dpo/beta': 0.002456413581967354, 'fcm_dpo/q_t': 0.4151533842086792, 'fcm_dpo/delta': 0.032731398940086365, 'fcm_dpo/margin': 149.98983764648438, 'margin_dpo/margin_mean': 149.98983764648438, 'margin_dpo/margin_std': 203.369384765625, 'logps/chosen': -347.3873291015625, 'logps/rejected': -513.5321044921875, 'logps/ref_chosen': -61.54115295410156, 'logps/ref_rejected': -77.69607543945312, 'KL/chosen_KL_mean': -285.84619140625, 'KL/rejected_KL_mean': -435.83599853515625, 'KL/mean': -360.84112548828125, 'KL/std': 188.15838623046875, 'logits/chosen': -0.3350308835506439, 'logits/rejected': -0.3064236044883728, 'epoch': 0.42} + 42%|████▏ | 287/681 [12:09<16:18, 2.48s/it] 42%|████▏ | 288/681 [12:12<16:23, 2.50s/it] {'loss': 1.0452, 'grad_norm': 28.688644409179688, 'learning_rate': 3.5913117612644327e-07, 'fcm_dpo/beta': 0.0024195481091737747, 'fcm_dpo/q_t': 0.39451804757118225, 'fcm_dpo/delta': -0.05505270138382912, 'fcm_dpo/margin': 186.38467407226562, 'margin_dpo/margin_mean': 186.38467407226562, 'margin_dpo/margin_std': 199.76907348632812, 'logps/chosen': -339.6158752441406, 'logps/rejected': -556.675048828125, 'logps/ref_chosen': -56.661224365234375, 'logps/ref_rejected': -87.33570098876953, 'KL/chosen_KL_mean': -282.95465087890625, 'KL/rejected_KL_mean': -469.3393249511719, 'KL/mean': -376.14697265625, 'KL/std': 190.60353088378906, 'logits/chosen': -0.3529576063156128, 'logits/rejected': -0.33676382899284363, 'epoch': 0.42} + 42%|████▏ | 288/681 [12:12<16:23, 2.50s/it] 42%|████▏ | 289/681 [12:15<16:22, 2.51s/it] {'loss': 1.0183, 'grad_norm': 35.85638427734375, 'learning_rate': 3.5797514096221024e-07, 'fcm_dpo/beta': 0.002400734229013324, 'fcm_dpo/q_t': 0.3854549527168274, 'fcm_dpo/delta': -0.10641852021217346, 'fcm_dpo/margin': 208.77468872070312, 'margin_dpo/margin_mean': 208.77468872070312, 'margin_dpo/margin_std': 230.7718505859375, 'logps/chosen': -313.2211608886719, 'logps/rejected': -564.4080810546875, 'logps/ref_chosen': -45.23039245605469, 'logps/ref_rejected': -87.64266967773438, 'KL/chosen_KL_mean': -267.9907531738281, 'KL/rejected_KL_mean': -476.76544189453125, 'KL/mean': -372.37811279296875, 'KL/std': 207.67068481445312, 'logits/chosen': -0.30069178342819214, 'logits/rejected': -0.30261388421058655, 'epoch': 0.42} + 42%|████▏ | 289/681 [12:15<16:22, 2.51s/it] 43%|████▎ | 290/681 [12:17<16:29, 2.53s/it] {'loss': 1.0358, 'grad_norm': 25.733200073242188, 'learning_rate': 3.568162605525952e-07, 'fcm_dpo/beta': 0.002342382911592722, 'fcm_dpo/q_t': 0.38870781660079956, 'fcm_dpo/delta': -0.09751632809638977, 'fcm_dpo/margin': 210.26165771484375, 'margin_dpo/margin_mean': 210.26165771484375, 'margin_dpo/margin_std': 258.7429504394531, 'logps/chosen': -336.72406005859375, 'logps/rejected': -608.2227783203125, 'logps/ref_chosen': -55.47149658203125, 'logps/ref_rejected': -116.70857238769531, 'KL/chosen_KL_mean': -281.2525634765625, 'KL/rejected_KL_mean': -491.51422119140625, 'KL/mean': -386.38336181640625, 'KL/std': 217.03399658203125, 'logits/chosen': -0.3384855091571808, 'logits/rejected': -0.35991525650024414, 'epoch': 0.43} + 43%|████▎ | 290/681 [12:17<16:29, 2.53s/it] 43%|████▎ | 291/681 [12:20<16:34, 2.55s/it] {'loss': 1.0465, 'grad_norm': 24.563919067382812, 'learning_rate': 3.5565456543517485e-07, 'fcm_dpo/beta': 0.00232522701844573, 'fcm_dpo/q_t': 0.3946029245853424, 'fcm_dpo/delta': -0.053772568702697754, 'fcm_dpo/margin': 194.061279296875, 'margin_dpo/margin_mean': 194.061279296875, 'margin_dpo/margin_std': 216.65316772460938, 'logps/chosen': -287.3287048339844, 'logps/rejected': -507.42669677734375, 'logps/ref_chosen': -63.26036834716797, 'logps/ref_rejected': -89.29708862304688, 'KL/chosen_KL_mean': -224.06832885742188, 'KL/rejected_KL_mean': -418.129638671875, 'KL/mean': -321.0989685058594, 'KL/std': 182.753662109375, 'logits/chosen': -0.34820133447647095, 'logits/rejected': -0.3331354260444641, 'epoch': 0.43} + 43%|████▎ | 291/681 [12:20<16:34, 2.55s/it] 43%|████▎ | 292/681 [12:22<16:11, 2.50s/it] {'loss': 1.0545, 'grad_norm': 23.112098693847656, 'learning_rate': 3.5449008622169583e-07, 'fcm_dpo/beta': 0.0022970177233219147, 'fcm_dpo/q_t': 0.3946416676044464, 'fcm_dpo/delta': -0.058918386697769165, 'fcm_dpo/margin': 198.60556030273438, 'margin_dpo/margin_mean': 198.60556030273438, 'margin_dpo/margin_std': 243.43380737304688, 'logps/chosen': -294.66961669921875, 'logps/rejected': -529.3179931640625, 'logps/ref_chosen': -53.91852951049805, 'logps/ref_rejected': -89.96138000488281, 'KL/chosen_KL_mean': -240.75106811523438, 'KL/rejected_KL_mean': -439.35662841796875, 'KL/mean': -340.0538330078125, 'KL/std': 209.68600463867188, 'logits/chosen': -0.35769540071487427, 'logits/rejected': -0.3433658480644226, 'epoch': 0.43} + 43%|████▎ | 292/681 [12:22<16:11, 2.50s/it] 43%|████▎ | 293/681 [12:25<16:26, 2.54s/it] {'loss': 1.1312, 'grad_norm': 43.493621826171875, 'learning_rate': 3.5332285359726846e-07, 'fcm_dpo/beta': 0.002302415668964386, 'fcm_dpo/q_t': 0.41990119218826294, 'fcm_dpo/delta': 0.04887353628873825, 'fcm_dpo/margin': 153.2054443359375, 'margin_dpo/margin_mean': 153.2054443359375, 'margin_dpo/margin_std': 233.70460510253906, 'logps/chosen': -295.378173828125, 'logps/rejected': -466.05999755859375, 'logps/ref_chosen': -60.376033782958984, 'logps/ref_rejected': -77.85244750976562, 'KL/chosen_KL_mean': -235.00213623046875, 'KL/rejected_KL_mean': -388.20758056640625, 'KL/mean': -311.6048583984375, 'KL/std': 205.09054565429688, 'logits/chosen': -0.3963527977466583, 'logits/rejected': -0.385434627532959, 'epoch': 0.43} + 43%|████▎ | 293/681 [12:25<16:26, 2.54s/it] 43%|████▎ | 294/681 [12:27<16:08, 2.50s/it] {'loss': 1.0942, 'grad_norm': 27.905162811279297, 'learning_rate': 3.5215289831955786e-07, 'fcm_dpo/beta': 0.002301940694451332, 'fcm_dpo/q_t': 0.4098511040210724, 'fcm_dpo/delta': 0.005669664591550827, 'fcm_dpo/margin': 171.36770629882812, 'margin_dpo/margin_mean': 171.36770629882812, 'margin_dpo/margin_std': 227.7198486328125, 'logps/chosen': -260.23968505859375, 'logps/rejected': -465.4168395996094, 'logps/ref_chosen': -48.0875358581543, 'logps/ref_rejected': -81.89698791503906, 'KL/chosen_KL_mean': -212.15213012695312, 'KL/rejected_KL_mean': -383.51983642578125, 'KL/mean': -297.83599853515625, 'KL/std': 185.86453247070312, 'logits/chosen': -0.3524784743785858, 'logits/rejected': -0.3563184142112732, 'epoch': 0.43} + 43%|████▎ | 294/681 [12:27<16:08, 2.50s/it] 43%|████▎ | 295/681 [12:30<15:57, 2.48s/it] {'loss': 1.0767, 'grad_norm': 32.099098205566406, 'learning_rate': 3.509802512179737e-07, 'fcm_dpo/beta': 0.002300859661772847, 'fcm_dpo/q_t': 0.4024040997028351, 'fcm_dpo/delta': -0.02741077169775963, 'fcm_dpo/margin': 185.25485229492188, 'margin_dpo/margin_mean': 185.25485229492188, 'margin_dpo/margin_std': 240.50933837890625, 'logps/chosen': -307.9371032714844, 'logps/rejected': -530.7236328125, 'logps/ref_chosen': -49.92467498779297, 'logps/ref_rejected': -87.45632934570312, 'KL/chosen_KL_mean': -258.012451171875, 'KL/rejected_KL_mean': -443.26727294921875, 'KL/mean': -350.6398620605469, 'KL/std': 206.24761962890625, 'logits/chosen': -0.3441588878631592, 'logits/rejected': -0.3444691002368927, 'epoch': 0.43} + 43%|████▎ | 295/681 [12:30<15:57, 2.48s/it] 43%|████▎ | 296/681 [12:32<15:52, 2.47s/it] {'loss': 1.1871, 'grad_norm': 31.48113250732422, 'learning_rate': 3.498049431928577e-07, 'fcm_dpo/beta': 0.0022906125523149967, 'fcm_dpo/q_t': 0.42863988876342773, 'fcm_dpo/delta': -0.00922891590744257, 'fcm_dpo/margin': 134.98883056640625, 'margin_dpo/margin_mean': 134.98883056640625, 'margin_dpo/margin_std': 261.055908203125, 'logps/chosen': -387.2337341308594, 'logps/rejected': -549.8204345703125, 'logps/ref_chosen': -65.49124145507812, 'logps/ref_rejected': -93.08908081054688, 'KL/chosen_KL_mean': -321.74249267578125, 'KL/rejected_KL_mean': -456.7313232421875, 'KL/mean': -389.23687744140625, 'KL/std': 204.92347717285156, 'logits/chosen': -0.40720900893211365, 'logits/rejected': -0.3905686140060425, 'epoch': 0.43} + 43%|████▎ | 296/681 [12:32<15:52, 2.47s/it] 44%|████▎ | 297/681 [12:35<16:02, 2.51s/it] {'loss': 1.0962, 'grad_norm': 23.6795711517334, 'learning_rate': 3.486270052146694e-07, 'fcm_dpo/beta': 0.002297044266015291, 'fcm_dpo/q_t': 0.41283226013183594, 'fcm_dpo/delta': 0.028556976467370987, 'fcm_dpo/margin': 162.16717529296875, 'margin_dpo/margin_mean': 162.16717529296875, 'margin_dpo/margin_std': 195.96707153320312, 'logps/chosen': -343.57342529296875, 'logps/rejected': -544.4021606445312, 'logps/ref_chosen': -56.476951599121094, 'logps/ref_rejected': -95.1385498046875, 'KL/chosen_KL_mean': -287.096435546875, 'KL/rejected_KL_mean': -449.26361083984375, 'KL/mean': -368.1800537109375, 'KL/std': 196.81576538085938, 'logits/chosen': -0.3867399990558624, 'logits/rejected': -0.387523889541626, 'epoch': 0.44} + 44%|████▎ | 297/681 [12:35<16:02, 2.51s/it] 44%|████▍ | 298/681 [12:37<16:07, 2.53s/it] {'loss': 1.0678, 'grad_norm': 26.146221160888672, 'learning_rate': 3.474464683231698e-07, 'fcm_dpo/beta': 0.0022826807107776403, 'fcm_dpo/q_t': 0.39509522914886475, 'fcm_dpo/delta': -0.08633655309677124, 'fcm_dpo/margin': 211.24917602539062, 'margin_dpo/margin_mean': 211.24917602539062, 'margin_dpo/margin_std': 307.80206298828125, 'logps/chosen': -391.0260314941406, 'logps/rejected': -651.6121826171875, 'logps/ref_chosen': -67.32516479492188, 'logps/ref_rejected': -116.66217041015625, 'KL/chosen_KL_mean': -323.70086669921875, 'KL/rejected_KL_mean': -534.9500122070312, 'KL/mean': -429.325439453125, 'KL/std': 264.07275390625, 'logits/chosen': -0.39172685146331787, 'logits/rejected': -0.41062480211257935, 'epoch': 0.44} + 44%|████▍ | 298/681 [12:37<16:07, 2.53s/it] 44%|████▍ | 299/681 [12:40<16:06, 2.53s/it] {'loss': 1.095, 'grad_norm': 33.49094772338867, 'learning_rate': 3.462633636266041e-07, 'fcm_dpo/beta': 0.002272904384881258, 'fcm_dpo/q_t': 0.40732306241989136, 'fcm_dpo/delta': -0.011529970914125443, 'fcm_dpo/margin': 180.617431640625, 'margin_dpo/margin_mean': 180.617431640625, 'margin_dpo/margin_std': 251.55711364746094, 'logps/chosen': -323.919921875, 'logps/rejected': -539.9035034179688, 'logps/ref_chosen': -48.96209716796875, 'logps/ref_rejected': -84.32823944091797, 'KL/chosen_KL_mean': -274.95782470703125, 'KL/rejected_KL_mean': -455.57525634765625, 'KL/mean': -365.26654052734375, 'KL/std': 200.08763122558594, 'logits/chosen': -0.36451274156570435, 'logits/rejected': -0.3671821653842926, 'epoch': 0.44} + 44%|████▍ | 299/681 [12:40<16:06, 2.53s/it] 44%|████▍ | 300/681 [12:42<16:17, 2.57s/it] {'loss': 1.0665, 'grad_norm': 41.03089904785156, 'learning_rate': 3.4507772230088147e-07, 'fcm_dpo/beta': 0.002238738350570202, 'fcm_dpo/q_t': 0.3924391269683838, 'fcm_dpo/delta': -0.07992631196975708, 'fcm_dpo/margin': 212.70004272460938, 'margin_dpo/margin_mean': 212.70004272460938, 'margin_dpo/margin_std': 292.86248779296875, 'logps/chosen': -413.8783874511719, 'logps/rejected': -663.47119140625, 'logps/ref_chosen': -59.07371139526367, 'logps/ref_rejected': -95.9664535522461, 'KL/chosen_KL_mean': -354.8046875, 'KL/rejected_KL_mean': -567.5047607421875, 'KL/mean': -461.15472412109375, 'KL/std': 243.07180786132812, 'logits/chosen': -0.35996705293655396, 'logits/rejected': -0.36685582995414734, 'epoch': 0.44} + 44%|████▍ | 300/681 [12:42<16:17, 2.57s/it] 44%|████▍ | 301/681 [12:45<15:42, 2.48s/it] {'loss': 1.051, 'grad_norm': 35.91939163208008, 'learning_rate': 3.4388957558875316e-07, 'fcm_dpo/beta': 0.0021908977068960667, 'fcm_dpo/q_t': 0.3950774669647217, 'fcm_dpo/delta': -0.07324320077896118, 'fcm_dpo/margin': 214.18943786621094, 'margin_dpo/margin_mean': 214.189453125, 'margin_dpo/margin_std': 272.2989807128906, 'logps/chosen': -362.63031005859375, 'logps/rejected': -611.9239501953125, 'logps/ref_chosen': -57.249366760253906, 'logps/ref_rejected': -92.35354614257812, 'KL/chosen_KL_mean': -305.3809814453125, 'KL/rejected_KL_mean': -519.5704345703125, 'KL/mean': -412.4756774902344, 'KL/std': 226.57168579101562, 'logits/chosen': -0.3929128050804138, 'logits/rejected': -0.3964204788208008, 'epoch': 0.44} + 44%|████▍ | 301/681 [12:45<15:42, 2.48s/it] 44%|████▍ | 302/681 [12:47<16:05, 2.55s/it] {'loss': 1.0984, 'grad_norm': 35.29695129394531, 'learning_rate': 3.426989547989902e-07, 'fcm_dpo/beta': 0.0021872916258871555, 'fcm_dpo/q_t': 0.4114551544189453, 'fcm_dpo/delta': 0.013604838401079178, 'fcm_dpo/margin': 176.79473876953125, 'margin_dpo/margin_mean': 176.79470825195312, 'margin_dpo/margin_std': 234.30789184570312, 'logps/chosen': -310.5064392089844, 'logps/rejected': -533.3295288085938, 'logps/ref_chosen': -51.197994232177734, 'logps/ref_rejected': -97.22636413574219, 'KL/chosen_KL_mean': -259.3084411621094, 'KL/rejected_KL_mean': -436.1031494140625, 'KL/mean': -347.705810546875, 'KL/std': 198.4259033203125, 'logits/chosen': -0.37686437368392944, 'logits/rejected': -0.3835713863372803, 'epoch': 0.44} + 44%|████▍ | 302/681 [12:47<16:05, 2.55s/it] 44%|████▍ | 303/681 [12:50<16:27, 2.61s/it] {'loss': 1.1322, 'grad_norm': 39.67668914794922, 'learning_rate': 3.4150589130555773e-07, 'fcm_dpo/beta': 0.002209719270467758, 'fcm_dpo/q_t': 0.4192725419998169, 'fcm_dpo/delta': 0.04984103888273239, 'fcm_dpo/margin': 159.26882934570312, 'margin_dpo/margin_mean': 159.26882934570312, 'margin_dpo/margin_std': 240.13339233398438, 'logps/chosen': -329.64105224609375, 'logps/rejected': -509.141357421875, 'logps/ref_chosen': -66.71394348144531, 'logps/ref_rejected': -86.94542694091797, 'KL/chosen_KL_mean': -262.9270935058594, 'KL/rejected_KL_mean': -422.1959228515625, 'KL/mean': -342.5615234375, 'KL/std': 198.67970275878906, 'logits/chosen': -0.39630812406539917, 'logits/rejected': -0.38712817430496216, 'epoch': 0.44} + 44%|████▍ | 303/681 [12:50<16:27, 2.61s/it] 45%|████▍ | 304/681 [12:53<16:36, 2.64s/it] {'loss': 1.0501, 'grad_norm': 28.95069694519043, 'learning_rate': 3.403104165467883e-07, 'fcm_dpo/beta': 0.0022241733968257904, 'fcm_dpo/q_t': 0.4025576710700989, 'fcm_dpo/delta': -0.006868166849017143, 'fcm_dpo/margin': 182.71343994140625, 'margin_dpo/margin_mean': 182.71343994140625, 'margin_dpo/margin_std': 153.1214599609375, 'logps/chosen': -295.95172119140625, 'logps/rejected': -497.1865234375, 'logps/ref_chosen': -71.95069885253906, 'logps/ref_rejected': -90.47203063964844, 'KL/chosen_KL_mean': -224.00103759765625, 'KL/rejected_KL_mean': -406.7145080566406, 'KL/mean': -315.3577880859375, 'KL/std': 181.42640686035156, 'logits/chosen': -0.4077110290527344, 'logits/rejected': -0.40159422159194946, 'epoch': 0.45} + 45%|████▍ | 304/681 [12:53<16:36, 2.64s/it] 45%|████▍ | 305/681 [12:55<16:32, 2.64s/it] {'loss': 1.1197, 'grad_norm': 25.189720153808594, 'learning_rate': 3.391125620245535e-07, 'fcm_dpo/beta': 0.002216045744717121, 'fcm_dpo/q_t': 0.41752344369888306, 'fcm_dpo/delta': 0.03913535922765732, 'fcm_dpo/margin': 163.2830810546875, 'margin_dpo/margin_mean': 163.2830810546875, 'margin_dpo/margin_std': 229.060546875, 'logps/chosen': -307.9990234375, 'logps/rejected': -497.241455078125, 'logps/ref_chosen': -66.79523468017578, 'logps/ref_rejected': -92.75459289550781, 'KL/chosen_KL_mean': -241.20379638671875, 'KL/rejected_KL_mean': -404.48687744140625, 'KL/mean': -322.8453369140625, 'KL/std': 208.91802978515625, 'logits/chosen': -0.3988510072231293, 'logits/rejected': -0.382416695356369, 'epoch': 0.45} + 45%|████▍ | 305/681 [12:55<16:32, 2.64s/it] 45%|████▍ | 306/681 [12:58<16:30, 2.64s/it] {'loss': 1.1059, 'grad_norm': 25.411108016967773, 'learning_rate': 3.3791235930343417e-07, 'fcm_dpo/beta': 0.0022513873409479856, 'fcm_dpo/q_t': 0.41661351919174194, 'fcm_dpo/delta': 0.04347452521324158, 'fcm_dpo/margin': 158.99728393554688, 'margin_dpo/margin_mean': 158.99728393554688, 'margin_dpo/margin_std': 197.02398681640625, 'logps/chosen': -313.34088134765625, 'logps/rejected': -487.8134765625, 'logps/ref_chosen': -69.68389892578125, 'logps/ref_rejected': -85.15919494628906, 'KL/chosen_KL_mean': -243.65699768066406, 'KL/rejected_KL_mean': -402.654296875, 'KL/mean': -323.1556396484375, 'KL/std': 187.81524658203125, 'logits/chosen': -0.3887644410133362, 'logits/rejected': -0.3672389090061188, 'epoch': 0.45} + 45%|████▍ | 306/681 [12:58<16:30, 2.64s/it] 45%|████▌ | 307/681 [13:01<16:38, 2.67s/it] {'loss': 1.1067, 'grad_norm': 30.055885314941406, 'learning_rate': 3.367098400098881e-07, 'fcm_dpo/beta': 0.002260227221995592, 'fcm_dpo/q_t': 0.41427597403526306, 'fcm_dpo/delta': 0.03280823305249214, 'fcm_dpo/margin': 162.99290466308594, 'margin_dpo/margin_mean': 162.99288940429688, 'margin_dpo/margin_std': 214.08172607421875, 'logps/chosen': -299.565673828125, 'logps/rejected': -479.3654479980469, 'logps/ref_chosen': -70.16542053222656, 'logps/ref_rejected': -86.97230529785156, 'KL/chosen_KL_mean': -229.40023803710938, 'KL/rejected_KL_mean': -392.39312744140625, 'KL/mean': -310.8966979980469, 'KL/std': 175.57015991210938, 'logits/chosen': -0.3810919523239136, 'logits/rejected': -0.36472952365875244, 'epoch': 0.45} + 45%|████▌ | 307/681 [13:01<16:38, 2.67s/it] 45%|████▌ | 308/681 [13:03<16:24, 2.64s/it] {'loss': 1.0539, 'grad_norm': 34.93415451049805, 'learning_rate': 3.355050358314172e-07, 'fcm_dpo/beta': 0.002260176232084632, 'fcm_dpo/q_t': 0.40062737464904785, 'fcm_dpo/delta': -0.021941393613815308, 'fcm_dpo/margin': 186.27633666992188, 'margin_dpo/margin_mean': 186.27633666992188, 'margin_dpo/margin_std': 191.69363403320312, 'logps/chosen': -284.6790771484375, 'logps/rejected': -495.0826416015625, 'logps/ref_chosen': -55.2449951171875, 'logps/ref_rejected': -79.37226104736328, 'KL/chosen_KL_mean': -229.43406677246094, 'KL/rejected_KL_mean': -415.71038818359375, 'KL/mean': -322.5722351074219, 'KL/std': 199.0686492919922, 'logits/chosen': -0.3374328017234802, 'logits/rejected': -0.32412296533584595, 'epoch': 0.45} + 45%|████▌ | 308/681 [13:03<16:24, 2.64s/it] 45%|████▌ | 309/681 [13:06<16:26, 2.65s/it] {'loss': 1.0826, 'grad_norm': 54.994361877441406, 'learning_rate': 3.3429797851573183e-07, 'fcm_dpo/beta': 0.0022653641644865274, 'fcm_dpo/q_t': 0.4036235809326172, 'fcm_dpo/delta': -0.011782001703977585, 'fcm_dpo/margin': 181.3565216064453, 'margin_dpo/margin_mean': 181.3565216064453, 'margin_dpo/margin_std': 227.5936279296875, 'logps/chosen': -282.17041015625, 'logps/rejected': -496.9085693359375, 'logps/ref_chosen': -48.959083557128906, 'logps/ref_rejected': -82.34072875976562, 'KL/chosen_KL_mean': -233.2113037109375, 'KL/rejected_KL_mean': -414.56781005859375, 'KL/mean': -323.8895568847656, 'KL/std': 213.318603515625, 'logits/chosen': -0.33466869592666626, 'logits/rejected': -0.32765206694602966, 'epoch': 0.45} + 45%|████▌ | 309/681 [13:06<16:26, 2.65s/it] 46%|████▌ | 310/681 [13:09<16:25, 2.66s/it] {'loss': 1.0633, 'grad_norm': 21.421035766601562, 'learning_rate': 3.3308869986991487e-07, 'fcm_dpo/beta': 0.002258453518152237, 'fcm_dpo/q_t': 0.40412867069244385, 'fcm_dpo/delta': -0.0059468671679496765, 'fcm_dpo/margin': 179.53656005859375, 'margin_dpo/margin_mean': 179.5365447998047, 'margin_dpo/margin_std': 181.55760192871094, 'logps/chosen': -326.950927734375, 'logps/rejected': -523.676025390625, 'logps/ref_chosen': -62.74177932739258, 'logps/ref_rejected': -79.9302978515625, 'KL/chosen_KL_mean': -264.2091369628906, 'KL/rejected_KL_mean': -443.7456970214844, 'KL/mean': -353.9774169921875, 'KL/std': 186.558349609375, 'logits/chosen': -0.35127896070480347, 'logits/rejected': -0.33690258860588074, 'epoch': 0.46} + 46%|████▌ | 310/681 [13:09<16:25, 2.66s/it] 46%|████▌ | 311/681 [13:11<15:57, 2.59s/it] {'loss': 1.0689, 'grad_norm': 27.19573402404785, 'learning_rate': 3.3187723175958346e-07, 'fcm_dpo/beta': 0.0022359404247254133, 'fcm_dpo/q_t': 0.40077459812164307, 'fcm_dpo/delta': -0.039258040487766266, 'fcm_dpo/margin': 195.67971801757812, 'margin_dpo/margin_mean': 195.67970275878906, 'margin_dpo/margin_std': 252.54080200195312, 'logps/chosen': -337.34857177734375, 'logps/rejected': -557.4384765625, 'logps/ref_chosen': -53.02798080444336, 'logps/ref_rejected': -77.43820190429688, 'KL/chosen_KL_mean': -284.3205871582031, 'KL/rejected_KL_mean': -480.00030517578125, 'KL/mean': -382.16046142578125, 'KL/std': 239.59608459472656, 'logits/chosen': -0.36365634202957153, 'logits/rejected': -0.34147655963897705, 'epoch': 0.46} + 46%|████▌ | 311/681 [13:11<15:57, 2.59s/it] 46%|████▌ | 312/681 [13:13<15:23, 2.50s/it] {'loss': 1.0899, 'grad_norm': 24.042999267578125, 'learning_rate': 3.306636061080487e-07, 'fcm_dpo/beta': 0.002237812615931034, 'fcm_dpo/q_t': 0.4078907370567322, 'fcm_dpo/delta': -0.01030636951327324, 'fcm_dpo/margin': 183.03475952148438, 'margin_dpo/margin_mean': 183.03475952148438, 'margin_dpo/margin_std': 251.37765502929688, 'logps/chosen': -321.888427734375, 'logps/rejected': -531.32373046875, 'logps/ref_chosen': -49.39221954345703, 'logps/ref_rejected': -75.79280853271484, 'KL/chosen_KL_mean': -272.4962158203125, 'KL/rejected_KL_mean': -455.53094482421875, 'KL/mean': -364.0135498046875, 'KL/std': 213.603759765625, 'logits/chosen': -0.2863520681858063, 'logits/rejected': -0.27598023414611816, 'epoch': 0.46} + 46%|████▌ | 312/681 [13:14<15:23, 2.50s/it] 46%|████▌ | 313/681 [13:16<15:03, 2.45s/it] {'loss': 1.0826, 'grad_norm': 25.419347763061523, 'learning_rate': 3.2944785489547537e-07, 'fcm_dpo/beta': 0.0022342309821397066, 'fcm_dpo/q_t': 0.4029422402381897, 'fcm_dpo/delta': -0.025912020355463028, 'fcm_dpo/margin': 189.80377197265625, 'margin_dpo/margin_mean': 189.80377197265625, 'margin_dpo/margin_std': 249.74856567382812, 'logps/chosen': -304.446044921875, 'logps/rejected': -530.5032958984375, 'logps/ref_chosen': -50.152740478515625, 'logps/ref_rejected': -86.40620422363281, 'KL/chosen_KL_mean': -254.2932891845703, 'KL/rejected_KL_mean': -444.0970458984375, 'KL/mean': -349.1951599121094, 'KL/std': 223.68814086914062, 'logits/chosen': -0.36437875032424927, 'logits/rejected': -0.36159804463386536, 'epoch': 0.46} + 46%|████▌ | 313/681 [13:16<15:03, 2.45s/it] 46%|████▌ | 314/681 [13:18<15:23, 2.52s/it] {'loss': 1.1211, 'grad_norm': 22.368419647216797, 'learning_rate': 3.2823001015803857e-07, 'fcm_dpo/beta': 0.0022100405767560005, 'fcm_dpo/q_t': 0.4131169021129608, 'fcm_dpo/delta': 0.010647352784872055, 'fcm_dpo/margin': 176.2192840576172, 'margin_dpo/margin_mean': 176.21929931640625, 'margin_dpo/margin_std': 275.643798828125, 'logps/chosen': -342.46612548828125, 'logps/rejected': -559.04443359375, 'logps/ref_chosen': -57.237579345703125, 'logps/ref_rejected': -97.5965347290039, 'KL/chosen_KL_mean': -285.2285461425781, 'KL/rejected_KL_mean': -461.4478759765625, 'KL/mean': -373.33819580078125, 'KL/std': 217.80224609375, 'logits/chosen': -0.40202397108078003, 'logits/rejected': -0.4044821262359619, 'epoch': 0.46} + 46%|████▌ | 314/681 [13:19<15:23, 2.52s/it] 46%|████▋ | 315/681 [13:21<15:33, 2.55s/it] {'loss': 1.1212, 'grad_norm': 22.533966064453125, 'learning_rate': 3.270101039870797e-07, 'fcm_dpo/beta': 0.0022343965247273445, 'fcm_dpo/q_t': 0.4190711975097656, 'fcm_dpo/delta': 0.049690838903188705, 'fcm_dpo/margin': 157.5762939453125, 'margin_dpo/margin_mean': 157.57626342773438, 'margin_dpo/margin_std': 219.2792205810547, 'logps/chosen': -312.58697509765625, 'logps/rejected': -506.7745361328125, 'logps/ref_chosen': -49.06958770751953, 'logps/ref_rejected': -85.68087768554688, 'KL/chosen_KL_mean': -263.5173645019531, 'KL/rejected_KL_mean': -421.0936279296875, 'KL/mean': -342.3055114746094, 'KL/std': 194.72991943359375, 'logits/chosen': -0.3270511329174042, 'logits/rejected': -0.3310539126396179, 'epoch': 0.46} + 46%|████▋ | 315/681 [13:21<15:33, 2.55s/it] 46%|████▋ | 316/681 [13:24<15:30, 2.55s/it] {'loss': 1.0059, 'grad_norm': 27.900861740112305, 'learning_rate': 3.2578816852826086e-07, 'fcm_dpo/beta': 0.0021924672182649374, 'fcm_dpo/q_t': 0.3829796314239502, 'fcm_dpo/delta': -0.12023768573999405, 'fcm_dpo/margin': 234.0082244873047, 'margin_dpo/margin_mean': 234.00823974609375, 'margin_dpo/margin_std': 248.06161499023438, 'logps/chosen': -311.1673583984375, 'logps/rejected': -592.1963500976562, 'logps/ref_chosen': -54.26074981689453, 'logps/ref_rejected': -101.2814712524414, 'KL/chosen_KL_mean': -256.9066162109375, 'KL/rejected_KL_mean': -490.9148864746094, 'KL/mean': -373.9107360839844, 'KL/std': 229.14527893066406, 'logits/chosen': -0.3719561696052551, 'logits/rejected': -0.3757820725440979, 'epoch': 0.46} + 46%|████▋ | 316/681 [13:24<15:30, 2.55s/it] 47%|████▋ | 317/681 [13:26<15:37, 2.58s/it] {'loss': 0.9834, 'grad_norm': 25.268577575683594, 'learning_rate': 3.2456423598071783e-07, 'fcm_dpo/beta': 0.0021530133672058582, 'fcm_dpo/q_t': 0.3760995864868164, 'fcm_dpo/delta': -0.13990481197834015, 'fcm_dpo/margin': 247.40829467773438, 'margin_dpo/margin_mean': 247.40829467773438, 'margin_dpo/margin_std': 231.6420135498047, 'logps/chosen': -317.3569641113281, 'logps/rejected': -609.3701171875, 'logps/ref_chosen': -56.094207763671875, 'logps/ref_rejected': -100.69905090332031, 'KL/chosen_KL_mean': -261.26275634765625, 'KL/rejected_KL_mean': -508.6710205078125, 'KL/mean': -384.9669189453125, 'KL/std': 205.64381408691406, 'logits/chosen': -0.3866614103317261, 'logits/rejected': -0.3781118392944336, 'epoch': 0.47} + 47%|████▋ | 317/681 [13:26<15:37, 2.58s/it] 47%|████▋ | 318/681 [13:29<15:17, 2.53s/it] {'loss': 1.0885, 'grad_norm': 25.41587257385254, 'learning_rate': 3.233383385962115e-07, 'fcm_dpo/beta': 0.002135781804099679, 'fcm_dpo/q_t': 0.4088175892829895, 'fcm_dpo/delta': 0.00455857440829277, 'fcm_dpo/margin': 185.17642211914062, 'margin_dpo/margin_mean': 185.17642211914062, 'margin_dpo/margin_std': 236.15972900390625, 'logps/chosen': -350.0437316894531, 'logps/rejected': -553.3387451171875, 'logps/ref_chosen': -64.64569854736328, 'logps/ref_rejected': -82.76425170898438, 'KL/chosen_KL_mean': -285.3980407714844, 'KL/rejected_KL_mean': -470.574462890625, 'KL/mean': -377.98626708984375, 'KL/std': 215.07859802246094, 'logits/chosen': -0.43662551045417786, 'logits/rejected': -0.40799379348754883, 'epoch': 0.47} + 47%|████▋ | 318/681 [13:29<15:17, 2.53s/it] 47%|████▋ | 319/681 [13:31<15:33, 2.58s/it] {'loss': 1.0362, 'grad_norm': 22.969181060791016, 'learning_rate': 3.2211050867827805e-07, 'fcm_dpo/beta': 0.002109553199261427, 'fcm_dpo/q_t': 0.3919963836669922, 'fcm_dpo/delta': -0.06939505785703659, 'fcm_dpo/margin': 220.9802703857422, 'margin_dpo/margin_mean': 220.98025512695312, 'margin_dpo/margin_std': 247.80764770507812, 'logps/chosen': -300.35516357421875, 'logps/rejected': -585.858154296875, 'logps/ref_chosen': -49.383758544921875, 'logps/ref_rejected': -113.90650939941406, 'KL/chosen_KL_mean': -250.97140502929688, 'KL/rejected_KL_mean': -471.95166015625, 'KL/mean': -361.4615478515625, 'KL/std': 230.63836669921875, 'logits/chosen': -0.36899369955062866, 'logits/rejected': -0.38391441106796265, 'epoch': 0.47} + 47%|████▋ | 319/681 [13:31<15:33, 2.58s/it] 47%|████▋ | 320/681 [13:34<15:38, 2.60s/it] {'loss': 1.0151, 'grad_norm': 25.455394744873047, 'learning_rate': 3.208807785813777e-07, 'fcm_dpo/beta': 0.002064064610749483, 'fcm_dpo/q_t': 0.3857959806919098, 'fcm_dpo/delta': -0.10146654397249222, 'fcm_dpo/margin': 240.28778076171875, 'margin_dpo/margin_mean': 240.2877960205078, 'margin_dpo/margin_std': 257.2183837890625, 'logps/chosen': -321.2955322265625, 'logps/rejected': -599.74560546875, 'logps/ref_chosen': -59.50489044189453, 'logps/ref_rejected': -97.66717529296875, 'KL/chosen_KL_mean': -261.7906494140625, 'KL/rejected_KL_mean': -502.07843017578125, 'KL/mean': -381.9345703125, 'KL/std': 240.1503448486328, 'logits/chosen': -0.3855065703392029, 'logits/rejected': -0.39106667041778564, 'epoch': 0.47} + 47%|████▋ | 320/681 [13:34<15:38, 2.60s/it] 47%|████▋ | 321/681 [13:37<15:22, 2.56s/it] {'loss': 1.084, 'grad_norm': 22.51926612854004, 'learning_rate': 3.1964918071004217e-07, 'fcm_dpo/beta': 0.002041730796918273, 'fcm_dpo/q_t': 0.4043254256248474, 'fcm_dpo/delta': -0.019452113658189774, 'fcm_dpo/margin': 204.66717529296875, 'margin_dpo/margin_mean': 204.66717529296875, 'margin_dpo/margin_std': 266.84027099609375, 'logps/chosen': -386.05157470703125, 'logps/rejected': -620.8111572265625, 'logps/ref_chosen': -61.548683166503906, 'logps/ref_rejected': -91.64103698730469, 'KL/chosen_KL_mean': -324.5029296875, 'KL/rejected_KL_mean': -529.1701049804688, 'KL/mean': -426.83648681640625, 'KL/std': 247.80294799804688, 'logits/chosen': -0.36786073446273804, 'logits/rejected': -0.35931724309921265, 'epoch': 0.47} + 47%|████▋ | 321/681 [13:37<15:22, 2.56s/it] 47%|████▋ | 322/681 [13:39<15:26, 2.58s/it] {'loss': 1.0349, 'grad_norm': 22.475814819335938, 'learning_rate': 3.184157475180207e-07, 'fcm_dpo/beta': 0.002025635913014412, 'fcm_dpo/q_t': 0.39296412467956543, 'fcm_dpo/delta': -0.0618242546916008, 'fcm_dpo/margin': 226.3704833984375, 'margin_dpo/margin_mean': 226.3704833984375, 'margin_dpo/margin_std': 237.702392578125, 'logps/chosen': -334.31005859375, 'logps/rejected': -599.1404418945312, 'logps/ref_chosen': -57.29003143310547, 'logps/ref_rejected': -95.74992370605469, 'KL/chosen_KL_mean': -277.02001953125, 'KL/rejected_KL_mean': -503.3905029296875, 'KL/mean': -390.20526123046875, 'KL/std': 221.27413940429688, 'logits/chosen': -0.3741741180419922, 'logits/rejected': -0.37576234340667725, 'epoch': 0.47} + 47%|████▋ | 322/681 [13:39<15:26, 2.58s/it] 47%|████▋ | 323/681 [13:42<15:29, 2.60s/it] {'loss': 1.0968, 'grad_norm': 34.564815521240234, 'learning_rate': 3.171805115074251e-07, 'fcm_dpo/beta': 0.002028942573815584, 'fcm_dpo/q_t': 0.4098392724990845, 'fcm_dpo/delta': 0.014963037334382534, 'fcm_dpo/margin': 190.0548095703125, 'margin_dpo/margin_mean': 190.0548095703125, 'margin_dpo/margin_std': 241.6279296875, 'logps/chosen': -350.09918212890625, 'logps/rejected': -563.98193359375, 'logps/ref_chosen': -51.23395919799805, 'logps/ref_rejected': -75.06192016601562, 'KL/chosen_KL_mean': -298.865234375, 'KL/rejected_KL_mean': -488.9200439453125, 'KL/mean': -393.89263916015625, 'KL/std': 218.2051239013672, 'logits/chosen': -0.3972129225730896, 'logits/rejected': -0.39619508385658264, 'epoch': 0.47} + 47%|████▋ | 323/681 [13:42<15:29, 2.60s/it] 48%|████▊ | 324/681 [13:44<15:10, 2.55s/it] {'loss': 1.1193, 'grad_norm': 38.610740661621094, 'learning_rate': 3.1594350522787295e-07, 'fcm_dpo/beta': 0.002055136486887932, 'fcm_dpo/q_t': 0.41455233097076416, 'fcm_dpo/delta': 0.022490426898002625, 'fcm_dpo/margin': 183.44676208496094, 'margin_dpo/margin_mean': 183.44676208496094, 'margin_dpo/margin_std': 259.76324462890625, 'logps/chosen': -404.5556640625, 'logps/rejected': -609.3447265625, 'logps/ref_chosen': -65.13516998291016, 'logps/ref_rejected': -86.47750854492188, 'KL/chosen_KL_mean': -339.42047119140625, 'KL/rejected_KL_mean': -522.8672485351562, 'KL/mean': -431.14385986328125, 'KL/std': 229.73959350585938, 'logits/chosen': -0.37280696630477905, 'logits/rejected': -0.35853368043899536, 'epoch': 0.48} + 48%|████▊ | 324/681 [13:44<15:10, 2.55s/it] 48%|████▊ | 325/681 [13:47<15:39, 2.64s/it] {'loss': 1.1329, 'grad_norm': 25.032848358154297, 'learning_rate': 3.147047612756302e-07, 'fcm_dpo/beta': 0.0020671868696808815, 'fcm_dpo/q_t': 0.42351895570755005, 'fcm_dpo/delta': 0.07653862237930298, 'fcm_dpo/margin': 157.63201904296875, 'margin_dpo/margin_mean': 157.63201904296875, 'margin_dpo/margin_std': 206.6846466064453, 'logps/chosen': -321.5379638671875, 'logps/rejected': -493.0403137207031, 'logps/ref_chosen': -56.215599060058594, 'logps/ref_rejected': -70.08592987060547, 'KL/chosen_KL_mean': -265.3223876953125, 'KL/rejected_KL_mean': -422.95440673828125, 'KL/mean': -344.13836669921875, 'KL/std': 204.55453491210938, 'logits/chosen': -0.4788200259208679, 'logits/rejected': -0.4589323401451111, 'epoch': 0.48} + 48%|████▊ | 325/681 [13:47<15:39, 2.64s/it] 48%|████▊ | 326/681 [13:50<15:49, 2.68s/it] {'loss': 1.1242, 'grad_norm': 34.663387298583984, 'learning_rate': 3.134643122927519e-07, 'fcm_dpo/beta': 0.0020953970961272717, 'fcm_dpo/q_t': 0.42310160398483276, 'fcm_dpo/delta': 0.07834838330745697, 'fcm_dpo/margin': 154.7425537109375, 'margin_dpo/margin_mean': 154.7425537109375, 'margin_dpo/margin_std': 181.62420654296875, 'logps/chosen': -360.988037109375, 'logps/rejected': -522.8524169921875, 'logps/ref_chosen': -72.72496032714844, 'logps/ref_rejected': -79.8467788696289, 'KL/chosen_KL_mean': -288.2630615234375, 'KL/rejected_KL_mean': -443.005615234375, 'KL/mean': -365.63433837890625, 'KL/std': 189.14825439453125, 'logits/chosen': -0.4711052179336548, 'logits/rejected': -0.4501519799232483, 'epoch': 0.48} + 48%|████▊ | 326/681 [13:50<15:49, 2.68s/it] 48%|████▊ | 327/681 [13:53<15:50, 2.68s/it] {'loss': 1.0304, 'grad_norm': 37.02153396606445, 'learning_rate': 3.1222219096622264e-07, 'fcm_dpo/beta': 0.00208103284239769, 'fcm_dpo/q_t': 0.3920641541481018, 'fcm_dpo/delta': -0.05987313389778137, 'fcm_dpo/margin': 219.50950622558594, 'margin_dpo/margin_mean': 219.50949096679688, 'margin_dpo/margin_std': 218.749755859375, 'logps/chosen': -318.3816223144531, 'logps/rejected': -580.6904907226562, 'logps/ref_chosen': -69.13441467285156, 'logps/ref_rejected': -111.93377685546875, 'KL/chosen_KL_mean': -249.2471923828125, 'KL/rejected_KL_mean': -468.7567138671875, 'KL/mean': -359.001953125, 'KL/std': 200.14559936523438, 'logits/chosen': -0.46292924880981445, 'logits/rejected': -0.45082515478134155, 'epoch': 0.48} + 48%|████▊ | 327/681 [13:53<15:50, 2.68s/it] 48%|████▊ | 328/681 [13:55<15:38, 2.66s/it] {'loss': 1.0771, 'grad_norm': 29.511402130126953, 'learning_rate': 3.1097843002709427e-07, 'fcm_dpo/beta': 0.0020799068734049797, 'fcm_dpo/q_t': 0.4056174159049988, 'fcm_dpo/delta': -0.010076452046632767, 'fcm_dpo/margin': 196.9560089111328, 'margin_dpo/margin_mean': 196.95599365234375, 'margin_dpo/margin_std': 243.63185119628906, 'logps/chosen': -322.7398681640625, 'logps/rejected': -550.8636474609375, 'logps/ref_chosen': -59.68719482421875, 'logps/ref_rejected': -90.85499572753906, 'KL/chosen_KL_mean': -263.05267333984375, 'KL/rejected_KL_mean': -460.0086669921875, 'KL/mean': -361.53070068359375, 'KL/std': 220.0689697265625, 'logits/chosen': -0.4060869514942169, 'logits/rejected': -0.4065949320793152, 'epoch': 0.48} + 48%|████▊ | 328/681 [13:55<15:38, 2.66s/it] 48%|████▊ | 329/681 [13:58<15:15, 2.60s/it] {'loss': 1.0616, 'grad_norm': 23.92837142944336, 'learning_rate': 3.0973306224962437e-07, 'fcm_dpo/beta': 0.002052995143458247, 'fcm_dpo/q_t': 0.3972422778606415, 'fcm_dpo/delta': -0.04573259502649307, 'fcm_dpo/margin': 215.82797241210938, 'margin_dpo/margin_mean': 215.82797241210938, 'margin_dpo/margin_std': 258.53314208984375, 'logps/chosen': -360.3170166015625, 'logps/rejected': -611.596435546875, 'logps/ref_chosen': -65.2461929321289, 'logps/ref_rejected': -100.69770812988281, 'KL/chosen_KL_mean': -295.07080078125, 'KL/rejected_KL_mean': -510.89874267578125, 'KL/mean': -402.98480224609375, 'KL/std': 238.93309020996094, 'logits/chosen': -0.4066659212112427, 'logits/rejected': -0.3950307369232178, 'epoch': 0.48} + 48%|████▊ | 329/681 [13:58<15:15, 2.60s/it] 48%|████▊ | 330/681 [14:00<15:18, 2.62s/it] {'loss': 1.04, 'grad_norm': 26.08322525024414, 'learning_rate': 3.084861204504122e-07, 'fcm_dpo/beta': 0.002038386417552829, 'fcm_dpo/q_t': 0.3932916224002838, 'fcm_dpo/delta': -0.06354449689388275, 'fcm_dpo/margin': 225.9734344482422, 'margin_dpo/margin_mean': 225.9734344482422, 'margin_dpo/margin_std': 254.14117431640625, 'logps/chosen': -303.5428161621094, 'logps/rejected': -569.394775390625, 'logps/ref_chosen': -46.998348236083984, 'logps/ref_rejected': -86.87684631347656, 'KL/chosen_KL_mean': -256.5444641113281, 'KL/rejected_KL_mean': -482.51788330078125, 'KL/mean': -369.53118896484375, 'KL/std': 233.46661376953125, 'logits/chosen': -0.40733757615089417, 'logits/rejected': -0.40871596336364746, 'epoch': 0.48} + 48%|████▊ | 330/681 [14:00<15:18, 2.62s/it] 49%|████▊ | 331/681 [14:03<15:01, 2.58s/it] {'loss': 1.015, 'grad_norm': 24.841083526611328, 'learning_rate': 3.072376374875335e-07, 'fcm_dpo/beta': 0.0020131845958530903, 'fcm_dpo/q_t': 0.3916124403476715, 'fcm_dpo/delta': -0.058914512395858765, 'fcm_dpo/margin': 226.6082763671875, 'margin_dpo/margin_mean': 226.6082763671875, 'margin_dpo/margin_std': 190.06936645507812, 'logps/chosen': -333.95367431640625, 'logps/rejected': -599.0531005859375, 'logps/ref_chosen': -50.52424621582031, 'logps/ref_rejected': -89.01544189453125, 'KL/chosen_KL_mean': -283.42938232421875, 'KL/rejected_KL_mean': -510.03765869140625, 'KL/mean': -396.7335205078125, 'KL/std': 195.69345092773438, 'logits/chosen': -0.4177253246307373, 'logits/rejected': -0.41352635622024536, 'epoch': 0.49} + 49%|████▊ | 331/681 [14:03<15:01, 2.58s/it] 49%|████▉ | 332/681 [14:05<14:42, 2.53s/it] {'loss': 1.1317, 'grad_norm': 21.33700180053711, 'learning_rate': 3.059876462596758e-07, 'fcm_dpo/beta': 0.002031027339398861, 'fcm_dpo/q_t': 0.423465371131897, 'fcm_dpo/delta': 0.06868893653154373, 'fcm_dpo/margin': 164.1973876953125, 'margin_dpo/margin_mean': 164.1973876953125, 'margin_dpo/margin_std': 231.62454223632812, 'logps/chosen': -341.8979187011719, 'logps/rejected': -533.4002075195312, 'logps/ref_chosen': -49.18028259277344, 'logps/ref_rejected': -76.48515319824219, 'KL/chosen_KL_mean': -292.7176513671875, 'KL/rejected_KL_mean': -456.9150390625, 'KL/mean': -374.81634521484375, 'KL/std': 211.74179077148438, 'logits/chosen': -0.4500772953033447, 'logits/rejected': -0.4330589771270752, 'epoch': 0.49} + 49%|████▉ | 332/681 [14:05<14:42, 2.53s/it] 49%|████▉ | 333/681 [14:08<14:25, 2.49s/it] {'loss': 1.0836, 'grad_norm': 20.72551918029785, 'learning_rate': 3.0473617970527015e-07, 'fcm_dpo/beta': 0.0020122663117945194, 'fcm_dpo/q_t': 0.40161222219467163, 'fcm_dpo/delta': -0.03633493557572365, 'fcm_dpo/margin': 215.58282470703125, 'margin_dpo/margin_mean': 215.58282470703125, 'margin_dpo/margin_std': 294.230712890625, 'logps/chosen': -384.037353515625, 'logps/rejected': -630.9085693359375, 'logps/ref_chosen': -63.75574493408203, 'logps/ref_rejected': -95.04411315917969, 'KL/chosen_KL_mean': -320.2816162109375, 'KL/rejected_KL_mean': -535.8644409179688, 'KL/mean': -428.072998046875, 'KL/std': 247.35726928710938, 'logits/chosen': -0.4343733787536621, 'logits/rejected': -0.427177369594574, 'epoch': 0.49} + 49%|████▉ | 333/681 [14:08<14:25, 2.49s/it] 49%|████▉ | 334/681 [14:10<14:21, 2.48s/it] {'loss': 1.121, 'grad_norm': 25.060213088989258, 'learning_rate': 3.034832708016243e-07, 'fcm_dpo/beta': 0.002020814223214984, 'fcm_dpo/q_t': 0.411624014377594, 'fcm_dpo/delta': 0.008031206205487251, 'fcm_dpo/margin': 194.11180114746094, 'margin_dpo/margin_mean': 194.11180114746094, 'margin_dpo/margin_std': 303.725341796875, 'logps/chosen': -380.56317138671875, 'logps/rejected': -603.0120849609375, 'logps/ref_chosen': -66.97975158691406, 'logps/ref_rejected': -95.31692504882812, 'KL/chosen_KL_mean': -313.5834045410156, 'KL/rejected_KL_mean': -507.6951904296875, 'KL/mean': -410.6392822265625, 'KL/std': 270.35736083984375, 'logits/chosen': -0.4468313455581665, 'logits/rejected': -0.44486457109451294, 'epoch': 0.49} + 49%|████▉ | 334/681 [14:10<14:21, 2.48s/it] 49%|████▉ | 335/681 [14:13<14:28, 2.51s/it] {'loss': 1.1873, 'grad_norm': 31.06818962097168, 'learning_rate': 3.022289525640531e-07, 'fcm_dpo/beta': 0.0020229285582900047, 'fcm_dpo/q_t': 0.4324929714202881, 'fcm_dpo/delta': -0.0058713615871965885, 'fcm_dpo/margin': 148.0127410888672, 'margin_dpo/margin_mean': 148.0127410888672, 'margin_dpo/margin_std': 279.176025390625, 'logps/chosen': -408.8131408691406, 'logps/rejected': -581.9010620117188, 'logps/ref_chosen': -62.54248046875, 'logps/ref_rejected': -87.61770629882812, 'KL/chosen_KL_mean': -346.2706604003906, 'KL/rejected_KL_mean': -494.2833557128906, 'KL/mean': -420.2770080566406, 'KL/std': 254.03070068359375, 'logits/chosen': -0.4586733281612396, 'logits/rejected': -0.4365878701210022, 'epoch': 0.49} + 49%|████▉ | 335/681 [14:13<14:28, 2.51s/it] 49%|████▉ | 336/681 [14:15<14:32, 2.53s/it] {'loss': 1.0618, 'grad_norm': 26.365034103393555, 'learning_rate': 3.009732580450086e-07, 'fcm_dpo/beta': 0.001999348634853959, 'fcm_dpo/q_t': 0.38974490761756897, 'fcm_dpo/delta': -0.10420601069927216, 'fcm_dpo/margin': 249.61618041992188, 'margin_dpo/margin_mean': 249.61618041992188, 'margin_dpo/margin_std': 351.21514892578125, 'logps/chosen': -398.9140625, 'logps/rejected': -698.4033813476562, 'logps/ref_chosen': -54.53115463256836, 'logps/ref_rejected': -104.40424346923828, 'KL/chosen_KL_mean': -344.3829345703125, 'KL/rejected_KL_mean': -593.9991455078125, 'KL/mean': -469.1910095214844, 'KL/std': 296.28973388671875, 'logits/chosen': -0.43404412269592285, 'logits/rejected': -0.43407052755355835, 'epoch': 0.49} + 49%|████▉ | 336/681 [14:15<14:32, 2.53s/it] 49%|████▉ | 337/681 [14:18<14:27, 2.52s/it] {'loss': 1.026, 'grad_norm': 26.773256301879883, 'learning_rate': 2.9971622033320914e-07, 'fcm_dpo/beta': 0.0019480783957988024, 'fcm_dpo/q_t': 0.38848379254341125, 'fcm_dpo/delta': -0.0921003520488739, 'fcm_dpo/margin': 250.12213134765625, 'margin_dpo/margin_mean': 250.12213134765625, 'margin_dpo/margin_std': 282.658935546875, 'logps/chosen': -378.08172607421875, 'logps/rejected': -664.8021240234375, 'logps/ref_chosen': -65.12869262695312, 'logps/ref_rejected': -101.72701263427734, 'KL/chosen_KL_mean': -312.9530029296875, 'KL/rejected_KL_mean': -563.0751342773438, 'KL/mean': -438.01409912109375, 'KL/std': 238.04376220703125, 'logits/chosen': -0.48545369505882263, 'logits/rejected': -0.47427335381507874, 'epoch': 0.49} + 49%|████▉ | 337/681 [14:18<14:27, 2.52s/it] 50%|████▉ | 338/681 [14:20<14:29, 2.53s/it] {'loss': 1.0356, 'grad_norm': 26.295778274536133, 'learning_rate': 2.984578725527675e-07, 'fcm_dpo/beta': 0.001927088014781475, 'fcm_dpo/q_t': 0.39445608854293823, 'fcm_dpo/delta': -0.05328977108001709, 'fcm_dpo/margin': 233.9475555419922, 'margin_dpo/margin_mean': 233.94757080078125, 'margin_dpo/margin_std': 240.76602172851562, 'logps/chosen': -337.8787536621094, 'logps/rejected': -602.47216796875, 'logps/ref_chosen': -58.422706604003906, 'logps/ref_rejected': -89.06854248046875, 'KL/chosen_KL_mean': -279.4560546875, 'KL/rejected_KL_mean': -513.4036254882812, 'KL/mean': -396.4298400878906, 'KL/std': 230.5653533935547, 'logits/chosen': -0.44427040219306946, 'logits/rejected': -0.438721239566803, 'epoch': 0.5} + 50%|████▉ | 338/681 [14:20<14:29, 2.53s/it] 50%|████▉ | 339/681 [14:23<14:01, 2.46s/it] {'loss': 1.0471, 'grad_norm': 24.693683624267578, 'learning_rate': 2.9719824786231796e-07, 'fcm_dpo/beta': 0.0019262076821178198, 'fcm_dpo/q_t': 0.3960764408111572, 'fcm_dpo/delta': -0.04981581121683121, 'fcm_dpo/margin': 231.79090881347656, 'margin_dpo/margin_mean': 231.79090881347656, 'margin_dpo/margin_std': 240.60723876953125, 'logps/chosen': -365.3131408691406, 'logps/rejected': -641.0196533203125, 'logps/ref_chosen': -59.99531555175781, 'logps/ref_rejected': -103.9109115600586, 'KL/chosen_KL_mean': -305.31781005859375, 'KL/rejected_KL_mean': -537.1087646484375, 'KL/mean': -421.2132568359375, 'KL/std': 242.9071044921875, 'logits/chosen': -0.4791075587272644, 'logits/rejected': -0.46518805623054504, 'epoch': 0.5} + 50%|████▉ | 339/681 [14:23<14:01, 2.46s/it] 50%|████▉ | 340/681 [14:25<14:11, 2.50s/it] {'loss': 1.1133, 'grad_norm': 33.532562255859375, 'learning_rate': 2.959373794541426e-07, 'fcm_dpo/beta': 0.0019024586072191596, 'fcm_dpo/q_t': 0.41325610876083374, 'fcm_dpo/delta': 0.0222429558634758, 'fcm_dpo/margin': 198.86572265625, 'margin_dpo/margin_mean': 198.86572265625, 'margin_dpo/margin_std': 284.8116455078125, 'logps/chosen': -375.77899169921875, 'logps/rejected': -594.9216918945312, 'logps/ref_chosen': -52.83022689819336, 'logps/ref_rejected': -73.10723114013672, 'KL/chosen_KL_mean': -322.9487609863281, 'KL/rejected_KL_mean': -521.814453125, 'KL/mean': -422.3816223144531, 'KL/std': 234.76840209960938, 'logits/chosen': -0.4173485040664673, 'logits/rejected': -0.3965187966823578, 'epoch': 0.5} + 50%|████▉ | 340/681 [14:25<14:11, 2.50s/it] 50%|█████ | 341/681 [14:27<13:46, 2.43s/it] {'loss': 1.0612, 'grad_norm': 26.986059188842773, 'learning_rate': 2.946753005532965e-07, 'fcm_dpo/beta': 0.0018979123560711741, 'fcm_dpo/q_t': 0.40071308612823486, 'fcm_dpo/delta': -0.03151214122772217, 'fcm_dpo/margin': 226.50100708007812, 'margin_dpo/margin_mean': 226.50100708007812, 'margin_dpo/margin_std': 261.280517578125, 'logps/chosen': -367.7166442871094, 'logps/rejected': -648.127685546875, 'logps/ref_chosen': -47.899803161621094, 'logps/ref_rejected': -101.80987548828125, 'KL/chosen_KL_mean': -319.81683349609375, 'KL/rejected_KL_mean': -546.3178100585938, 'KL/mean': -433.06732177734375, 'KL/std': 254.18038940429688, 'logits/chosen': -0.43833357095718384, 'logits/rejected': -0.4382708966732025, 'epoch': 0.5} + 50%|█████ | 341/681 [14:27<13:46, 2.43s/it] 50%|█████ | 342/681 [14:30<14:04, 2.49s/it] {'loss': 1.1035, 'grad_norm': 24.853057861328125, 'learning_rate': 2.934120444167326e-07, 'fcm_dpo/beta': 0.0019124182872474194, 'fcm_dpo/q_t': 0.41131168603897095, 'fcm_dpo/delta': 0.014296330511569977, 'fcm_dpo/margin': 201.77845764160156, 'margin_dpo/margin_mean': 201.77847290039062, 'margin_dpo/margin_std': 277.33526611328125, 'logps/chosen': -389.6605224609375, 'logps/rejected': -612.031982421875, 'logps/ref_chosen': -71.99664306640625, 'logps/ref_rejected': -92.58959197998047, 'KL/chosen_KL_mean': -317.66387939453125, 'KL/rejected_KL_mean': -519.4423828125, 'KL/mean': -418.5531005859375, 'KL/std': 237.76806640625, 'logits/chosen': -0.4720449149608612, 'logits/rejected': -0.4480515718460083, 'epoch': 0.5} + 50%|█████ | 342/681 [14:30<14:04, 2.49s/it] 50%|█████ | 343/681 [14:32<13:45, 2.44s/it] {'loss': 1.0242, 'grad_norm': 22.807832717895508, 'learning_rate': 2.9214764433242476e-07, 'fcm_dpo/beta': 0.0018888043705374002, 'fcm_dpo/q_t': 0.392503947019577, 'fcm_dpo/delta': -0.05659223720431328, 'fcm_dpo/margin': 240.35008239746094, 'margin_dpo/margin_mean': 240.35009765625, 'margin_dpo/margin_std': 217.2589874267578, 'logps/chosen': -359.5283203125, 'logps/rejected': -656.51416015625, 'logps/ref_chosen': -54.405616760253906, 'logps/ref_rejected': -111.04142761230469, 'KL/chosen_KL_mean': -305.1226806640625, 'KL/rejected_KL_mean': -545.4727783203125, 'KL/mean': -425.2977294921875, 'KL/std': 241.86111450195312, 'logits/chosen': -0.4819292724132538, 'logits/rejected': -0.4859675168991089, 'epoch': 0.5} + 50%|█████ | 343/681 [14:32<13:45, 2.44s/it] 51%|█████ | 344/681 [14:35<13:44, 2.45s/it] {'loss': 1.0943, 'grad_norm': 28.06170654296875, 'learning_rate': 2.9088213361849126e-07, 'fcm_dpo/beta': 0.0019029853865504265, 'fcm_dpo/q_t': 0.41077619791030884, 'fcm_dpo/delta': 0.0050534456968307495, 'fcm_dpo/margin': 206.81173706054688, 'margin_dpo/margin_mean': 206.81173706054688, 'margin_dpo/margin_std': 261.9127197265625, 'logps/chosen': -353.10736083984375, 'logps/rejected': -596.5778198242188, 'logps/ref_chosen': -53.96466827392578, 'logps/ref_rejected': -90.62336730957031, 'KL/chosen_KL_mean': -299.1427001953125, 'KL/rejected_KL_mean': -505.9544677734375, 'KL/mean': -402.548583984375, 'KL/std': 257.1475524902344, 'logits/chosen': -0.45593854784965515, 'logits/rejected': -0.45924174785614014, 'epoch': 0.51} + 51%|█████ | 344/681 [14:35<13:44, 2.45s/it] 51%|█████ | 345/681 [14:37<14:06, 2.52s/it] {'loss': 1.0559, 'grad_norm': 19.72064208984375, 'learning_rate': 2.896155456223163e-07, 'fcm_dpo/beta': 0.0018797038355842233, 'fcm_dpo/q_t': 0.3980991244316101, 'fcm_dpo/delta': -0.04108835384249687, 'fcm_dpo/margin': 233.6870880126953, 'margin_dpo/margin_mean': 233.68710327148438, 'margin_dpo/margin_std': 272.0915222167969, 'logps/chosen': -415.907470703125, 'logps/rejected': -687.3992919921875, 'logps/ref_chosen': -61.685699462890625, 'logps/ref_rejected': -99.49041748046875, 'KL/chosen_KL_mean': -354.2217712402344, 'KL/rejected_KL_mean': -587.9088745117188, 'KL/mean': -471.0653076171875, 'KL/std': 249.9000244140625, 'logits/chosen': -0.46400630474090576, 'logits/rejected': -0.4607963263988495, 'epoch': 0.51} + 51%|█████ | 345/681 [14:38<14:06, 2.52s/it] 51%|█████ | 346/681 [14:40<13:58, 2.50s/it] {'loss': 1.0708, 'grad_norm': 25.529489517211914, 'learning_rate': 2.883479137196714e-07, 'fcm_dpo/beta': 0.0018681611400097609, 'fcm_dpo/q_t': 0.4032723307609558, 'fcm_dpo/delta': -0.011793499812483788, 'fcm_dpo/margin': 220.168212890625, 'margin_dpo/margin_mean': 220.168212890625, 'margin_dpo/margin_std': 256.2587890625, 'logps/chosen': -398.75372314453125, 'logps/rejected': -641.0809326171875, 'logps/ref_chosen': -55.256263732910156, 'logps/ref_rejected': -77.41532135009766, 'KL/chosen_KL_mean': -343.4974365234375, 'KL/rejected_KL_mean': -563.6656494140625, 'KL/mean': -453.58154296875, 'KL/std': 240.47105407714844, 'logits/chosen': -0.43244969844818115, 'logits/rejected': -0.42079615592956543, 'epoch': 0.51} + 51%|█████ | 346/681 [14:40<13:58, 2.50s/it] 51%|█████ | 347/681 [14:42<13:23, 2.41s/it] {'loss': 1.0836, 'grad_norm': 21.177968978881836, 'learning_rate': 2.8707927131383614e-07, 'fcm_dpo/beta': 0.00186370057053864, 'fcm_dpo/q_t': 0.4054613709449768, 'fcm_dpo/delta': -0.010882144793868065, 'fcm_dpo/margin': 220.22601318359375, 'margin_dpo/margin_mean': 220.22601318359375, 'margin_dpo/margin_std': 285.551513671875, 'logps/chosen': -397.52606201171875, 'logps/rejected': -652.5409545898438, 'logps/ref_chosen': -57.56623840332031, 'logps/ref_rejected': -92.35509490966797, 'KL/chosen_KL_mean': -339.9598388671875, 'KL/rejected_KL_mean': -560.1858520507812, 'KL/mean': -450.0728454589844, 'KL/std': 254.47296142578125, 'logits/chosen': -0.4151489734649658, 'logits/rejected': -0.407335102558136, 'epoch': 0.51} + 51%|█████ | 347/681 [14:42<13:23, 2.41s/it] 51%|█████ | 348/681 [14:44<13:11, 2.38s/it] {'loss': 1.131, 'grad_norm': 25.129812240600586, 'learning_rate': 2.858096518347179e-07, 'fcm_dpo/beta': 0.0018694268073886633, 'fcm_dpo/q_t': 0.42170295119285583, 'fcm_dpo/delta': 0.06262210756540298, 'fcm_dpo/margin': 181.44418334960938, 'margin_dpo/margin_mean': 181.44418334960938, 'margin_dpo/margin_std': 251.6490020751953, 'logps/chosen': -355.5946044921875, 'logps/rejected': -569.8594970703125, 'logps/ref_chosen': -56.31770324707031, 'logps/ref_rejected': -89.13836669921875, 'KL/chosen_KL_mean': -299.27691650390625, 'KL/rejected_KL_mean': -480.7210998535156, 'KL/mean': -389.9990234375, 'KL/std': 215.0816650390625, 'logits/chosen': -0.49058231711387634, 'logits/rejected': -0.49180328845977783, 'epoch': 0.51} + 51%|█████ | 348/681 [14:44<13:11, 2.38s/it] 51%|█████ | 349/681 [14:47<13:44, 2.48s/it] {'loss': 1.0975, 'grad_norm': 20.649566650390625, 'learning_rate': 2.845390887379706e-07, 'fcm_dpo/beta': 0.0018920442089438438, 'fcm_dpo/q_t': 0.40846750140190125, 'fcm_dpo/delta': -0.0041931793093681335, 'fcm_dpo/margin': 213.40219116210938, 'margin_dpo/margin_mean': 213.40220642089844, 'margin_dpo/margin_std': 298.5577392578125, 'logps/chosen': -338.0409851074219, 'logps/rejected': -590.9228515625, 'logps/ref_chosen': -58.025516510009766, 'logps/ref_rejected': -97.50515747070312, 'KL/chosen_KL_mean': -280.0154724121094, 'KL/rejected_KL_mean': -493.41766357421875, 'KL/mean': -386.7165832519531, 'KL/std': 241.07833862304688, 'logits/chosen': -0.43403786420822144, 'logits/rejected': -0.43399712443351746, 'epoch': 0.51} + 51%|█████ | 349/681 [14:47<13:44, 2.48s/it] 51%|█████▏ | 350/681 [14:50<14:04, 2.55s/it] {'loss': 1.1045, 'grad_norm': 27.677011489868164, 'learning_rate': 2.8326761550411346e-07, 'fcm_dpo/beta': 0.0018797710072249174, 'fcm_dpo/q_t': 0.4099721312522888, 'fcm_dpo/delta': 0.012750823050737381, 'fcm_dpo/margin': 206.09213256835938, 'margin_dpo/margin_mean': 206.09213256835938, 'margin_dpo/margin_std': 283.2620544433594, 'logps/chosen': -362.17919921875, 'logps/rejected': -593.8125, 'logps/ref_chosen': -64.33049011230469, 'logps/ref_rejected': -89.87164306640625, 'KL/chosen_KL_mean': -297.84869384765625, 'KL/rejected_KL_mean': -503.94085693359375, 'KL/mean': -400.894775390625, 'KL/std': 231.921875, 'logits/chosen': -0.4817023277282715, 'logits/rejected': -0.48322421312332153, 'epoch': 0.51} + 51%|█████▏ | 350/681 [14:50<14:04, 2.55s/it] 52%|█████▏ | 351/681 [14:52<14:05, 2.56s/it] {'loss': 1.0495, 'grad_norm': 27.421785354614258, 'learning_rate': 2.819952656376487e-07, 'fcm_dpo/beta': 0.0018625000957399607, 'fcm_dpo/q_t': 0.39376571774482727, 'fcm_dpo/delta': -0.07697418332099915, 'fcm_dpo/margin': 253.93798828125, 'margin_dpo/margin_mean': 253.93798828125, 'margin_dpo/margin_std': 320.8644104003906, 'logps/chosen': -342.8303527832031, 'logps/rejected': -637.66162109375, 'logps/ref_chosen': -60.6721305847168, 'logps/ref_rejected': -101.5654296875, 'KL/chosen_KL_mean': -282.1582336425781, 'KL/rejected_KL_mean': -536.09619140625, 'KL/mean': -409.1272277832031, 'KL/std': 275.96209716796875, 'logits/chosen': -0.4568382501602173, 'logits/rejected': -0.4523654282093048, 'epoch': 0.52} + 52%|█████▏ | 351/681 [14:53<14:05, 2.56s/it] 52%|█████▏ | 352/681 [14:55<14:04, 2.57s/it] {'loss': 1.1774, 'grad_norm': 38.70182800292969, 'learning_rate': 2.8072207266617854e-07, 'fcm_dpo/beta': 0.0018845018930733204, 'fcm_dpo/q_t': 0.43189874291419983, 'fcm_dpo/delta': 0.1046164482831955, 'fcm_dpo/margin': 158.46998596191406, 'margin_dpo/margin_mean': 158.46998596191406, 'margin_dpo/margin_std': 281.17779541015625, 'logps/chosen': -405.1116943359375, 'logps/rejected': -569.2802124023438, 'logps/ref_chosen': -70.9434585571289, 'logps/ref_rejected': -76.6419677734375, 'KL/chosen_KL_mean': -334.1682434082031, 'KL/rejected_KL_mean': -492.63824462890625, 'KL/mean': -413.40325927734375, 'KL/std': 255.41502380371094, 'logits/chosen': -0.4881801903247833, 'logits/rejected': -0.45606744289398193, 'epoch': 0.52} + 52%|█████▏ | 352/681 [14:55<14:04, 2.57s/it] 52%|█████▏ | 353/681 [14:58<13:58, 2.56s/it] {'loss': 1.1081, 'grad_norm': 27.271644592285156, 'learning_rate': 2.794480701395219e-07, 'fcm_dpo/beta': 0.0018944459734484553, 'fcm_dpo/q_t': 0.41237473487854004, 'fcm_dpo/delta': 0.015432950109243393, 'fcm_dpo/margin': 203.0735626220703, 'margin_dpo/margin_mean': 203.0735626220703, 'margin_dpo/margin_std': 283.2236328125, 'logps/chosen': -374.350341796875, 'logps/rejected': -599.3640747070312, 'logps/ref_chosen': -58.39533996582031, 'logps/ref_rejected': -80.33553314208984, 'KL/chosen_KL_mean': -315.9549865722656, 'KL/rejected_KL_mean': -519.028564453125, 'KL/mean': -417.49176025390625, 'KL/std': 250.93426513671875, 'logits/chosen': -0.5225635170936584, 'logits/rejected': -0.5105962157249451, 'epoch': 0.52} + 52%|█████▏ | 353/681 [14:58<13:58, 2.56s/it] 52%|█████▏ | 354/681 [15:00<14:08, 2.59s/it] {'loss': 1.0474, 'grad_norm': 26.834888458251953, 'learning_rate': 2.781732916288303e-07, 'fcm_dpo/beta': 0.0018932155799120665, 'fcm_dpo/q_t': 0.3988182246685028, 'fcm_dpo/delta': -0.0335673987865448, 'fcm_dpo/margin': 228.191650390625, 'margin_dpo/margin_mean': 228.191650390625, 'margin_dpo/margin_std': 236.67225646972656, 'logps/chosen': -324.29638671875, 'logps/rejected': -581.4425659179688, 'logps/ref_chosen': -59.80299377441406, 'logps/ref_rejected': -88.75750732421875, 'KL/chosen_KL_mean': -264.493408203125, 'KL/rejected_KL_mean': -492.68505859375, 'KL/mean': -378.5892333984375, 'KL/std': 236.89236450195312, 'logits/chosen': -0.48592621088027954, 'logits/rejected': -0.4727493226528168, 'epoch': 0.52} + 52%|█████▏ | 354/681 [15:00<14:08, 2.59s/it] 52%|█████▏ | 355/681 [15:03<13:47, 2.54s/it] {'loss': 1.056, 'grad_norm': 34.09590530395508, 'learning_rate': 2.7689777072570284e-07, 'fcm_dpo/beta': 0.0018815842922776937, 'fcm_dpo/q_t': 0.400523841381073, 'fcm_dpo/delta': -0.023417077958583832, 'fcm_dpo/margin': 224.40811157226562, 'margin_dpo/margin_mean': 224.4081268310547, 'margin_dpo/margin_std': 237.8057403564453, 'logps/chosen': -320.97601318359375, 'logps/rejected': -573.6617431640625, 'logps/ref_chosen': -54.12849807739258, 'logps/ref_rejected': -82.40606689453125, 'KL/chosen_KL_mean': -266.8475341796875, 'KL/rejected_KL_mean': -491.2556457519531, 'KL/mean': -379.05157470703125, 'KL/std': 240.4083251953125, 'logits/chosen': -0.5412899255752563, 'logits/rejected': -0.5298917293548584, 'epoch': 0.52} + 52%|█████▏ | 355/681 [15:03<13:47, 2.54s/it] 52%|█████▏ | 356/681 [15:05<14:03, 2.59s/it] {'loss': 1.241, 'grad_norm': 29.567127227783203, 'learning_rate': 2.7562154104130176e-07, 'fcm_dpo/beta': 0.001889348030090332, 'fcm_dpo/q_t': 0.4463768005371094, 'fcm_dpo/delta': 0.027527010068297386, 'fcm_dpo/margin': 125.88235473632812, 'margin_dpo/margin_mean': 125.88235473632812, 'margin_dpo/margin_std': 300.37933349609375, 'logps/chosen': -393.366943359375, 'logps/rejected': -530.4747314453125, 'logps/ref_chosen': -64.6738052368164, 'logps/ref_rejected': -75.89926147460938, 'KL/chosen_KL_mean': -328.6931457519531, 'KL/rejected_KL_mean': -454.5754699707031, 'KL/mean': -391.6343078613281, 'KL/std': 245.00680541992188, 'logits/chosen': -0.5134952068328857, 'logits/rejected': -0.49404820799827576, 'epoch': 0.52} + 52%|█████▏ | 356/681 [15:05<14:03, 2.59s/it] 52%|█████▏ | 357/681 [15:08<14:04, 2.61s/it] {'loss': 1.0964, 'grad_norm': 25.547868728637695, 'learning_rate': 2.7434463620546594e-07, 'fcm_dpo/beta': 0.0018927913624793291, 'fcm_dpo/q_t': 0.4112818241119385, 'fcm_dpo/delta': 0.021241577342152596, 'fcm_dpo/margin': 200.40087890625, 'margin_dpo/margin_mean': 200.40087890625, 'margin_dpo/margin_std': 248.13658142089844, 'logps/chosen': -358.19384765625, 'logps/rejected': -592.7100830078125, 'logps/ref_chosen': -52.725799560546875, 'logps/ref_rejected': -86.84115600585938, 'KL/chosen_KL_mean': -305.4680480957031, 'KL/rejected_KL_mean': -505.8689270019531, 'KL/mean': -405.6684875488281, 'KL/std': 243.4521484375, 'logits/chosen': -0.5083039999008179, 'logits/rejected': -0.49678516387939453, 'epoch': 0.52} + 52%|█████▏ | 357/681 [15:08<14:04, 2.61s/it] 53%|█████▎ | 358/681 [15:11<14:19, 2.66s/it] {'loss': 1.1142, 'grad_norm': 26.77370262145996, 'learning_rate': 2.730670898658255e-07, 'fcm_dpo/beta': 0.0019136819755658507, 'fcm_dpo/q_t': 0.41719043254852295, 'fcm_dpo/delta': 0.039984140545129776, 'fcm_dpo/margin': 188.8855743408203, 'margin_dpo/margin_mean': 188.8855743408203, 'margin_dpo/margin_std': 258.85284423828125, 'logps/chosen': -344.40167236328125, 'logps/rejected': -558.455078125, 'logps/ref_chosen': -63.20543670654297, 'logps/ref_rejected': -88.373291015625, 'KL/chosen_KL_mean': -281.19622802734375, 'KL/rejected_KL_mean': -470.081787109375, 'KL/mean': -375.6390380859375, 'KL/std': 238.79005432128906, 'logits/chosen': -0.49271106719970703, 'logits/rejected': -0.4746229648590088, 'epoch': 0.53} + 53%|█████▎ | 358/681 [15:11<14:19, 2.66s/it] 53%|█████▎ | 359/681 [15:13<14:07, 2.63s/it] {'loss': 1.0748, 'grad_norm': 35.96456527709961, 'learning_rate': 2.717889356869146e-07, 'fcm_dpo/beta': 0.0019082968356087804, 'fcm_dpo/q_t': 0.40221983194351196, 'fcm_dpo/delta': -0.030117180198431015, 'fcm_dpo/margin': 224.70556640625, 'margin_dpo/margin_mean': 224.70556640625, 'margin_dpo/margin_std': 287.6982421875, 'logps/chosen': -378.186279296875, 'logps/rejected': -628.6954345703125, 'logps/ref_chosen': -56.370216369628906, 'logps/ref_rejected': -82.17375183105469, 'KL/chosen_KL_mean': -321.8160400390625, 'KL/rejected_KL_mean': -546.5216674804688, 'KL/mean': -434.1688537597656, 'KL/std': 240.7170867919922, 'logits/chosen': -0.4789687991142273, 'logits/rejected': -0.4714996814727783, 'epoch': 0.53} + 53%|█████▎ | 359/681 [15:13<14:07, 2.63s/it] 53%|█████▎ | 360/681 [15:16<13:51, 2.59s/it] {'loss': 1.1367, 'grad_norm': 55.63818359375, 'learning_rate': 2.7051020734928443e-07, 'fcm_dpo/beta': 0.0019348189234733582, 'fcm_dpo/q_t': 0.42690205574035645, 'fcm_dpo/delta': 0.09300471842288971, 'fcm_dpo/margin': 160.10589599609375, 'margin_dpo/margin_mean': 160.10589599609375, 'margin_dpo/margin_std': 199.83489990234375, 'logps/chosen': -376.77264404296875, 'logps/rejected': -555.257080078125, 'logps/ref_chosen': -51.460384368896484, 'logps/ref_rejected': -69.83892059326172, 'KL/chosen_KL_mean': -325.312255859375, 'KL/rejected_KL_mean': -485.41815185546875, 'KL/mean': -405.36517333984375, 'KL/std': 198.17393493652344, 'logits/chosen': -0.4359634816646576, 'logits/rejected': -0.422908216714859, 'epoch': 0.53} + 53%|█████▎ | 360/681 [15:16<13:51, 2.59s/it] 53%|█████▎ | 361/681 [15:19<14:00, 2.63s/it] {'loss': 1.1513, 'grad_norm': 43.57426071166992, 'learning_rate': 2.6923093854861593e-07, 'fcm_dpo/beta': 0.0019699514377862215, 'fcm_dpo/q_t': 0.4253769516944885, 'fcm_dpo/delta': 0.07963744550943375, 'fcm_dpo/margin': 163.78407287597656, 'margin_dpo/margin_mean': 163.7840576171875, 'margin_dpo/margin_std': 258.46649169921875, 'logps/chosen': -392.43408203125, 'logps/rejected': -593.117919921875, 'logps/ref_chosen': -53.86951446533203, 'logps/ref_rejected': -90.7692642211914, 'KL/chosen_KL_mean': -338.5645751953125, 'KL/rejected_KL_mean': -502.3486328125, 'KL/mean': -420.45660400390625, 'KL/std': 235.536376953125, 'logits/chosen': -0.44554078578948975, 'logits/rejected': -0.4408929944038391, 'epoch': 0.53} + 53%|█████▎ | 361/681 [15:19<14:00, 2.63s/it] 53%|█████▎ | 362/681 [15:21<13:50, 2.60s/it] {'loss': 0.991, 'grad_norm': 25.4000301361084, 'learning_rate': 2.679511629948319e-07, 'fcm_dpo/beta': 0.0019274294609203935, 'fcm_dpo/q_t': 0.38001787662506104, 'fcm_dpo/delta': -0.1298113465309143, 'fcm_dpo/margin': 270.9095153808594, 'margin_dpo/margin_mean': 270.9095458984375, 'margin_dpo/margin_std': 266.97491455078125, 'logps/chosen': -352.044677734375, 'logps/rejected': -669.8970947265625, 'logps/ref_chosen': -58.639060974121094, 'logps/ref_rejected': -105.58195495605469, 'KL/chosen_KL_mean': -293.4056396484375, 'KL/rejected_KL_mean': -564.315185546875, 'KL/mean': -428.86041259765625, 'KL/std': 247.4333038330078, 'logits/chosen': -0.4862041473388672, 'logits/rejected': -0.4968222975730896, 'epoch': 0.53} + 53%|█████▎ | 362/681 [15:21<13:50, 2.60s/it] 53%|█████▎ | 363/681 [15:23<13:19, 2.51s/it] {'loss': 0.9916, 'grad_norm': 24.668289184570312, 'learning_rate': 2.6667091441120816e-07, 'fcm_dpo/beta': 0.0018918986897915602, 'fcm_dpo/q_t': 0.37962085008621216, 'fcm_dpo/delta': -0.12874022126197815, 'fcm_dpo/margin': 276.02301025390625, 'margin_dpo/margin_mean': 276.02301025390625, 'margin_dpo/margin_std': 272.3272705078125, 'logps/chosen': -308.30194091796875, 'logps/rejected': -614.4615478515625, 'logps/ref_chosen': -44.558380126953125, 'logps/ref_rejected': -74.69496154785156, 'KL/chosen_KL_mean': -263.7435607910156, 'KL/rejected_KL_mean': -539.7665405273438, 'KL/mean': -401.75506591796875, 'KL/std': 247.50381469726562, 'logits/chosen': -0.4323340654373169, 'logits/rejected': -0.4228121340274811, 'epoch': 0.53} + 53%|█████▎ | 363/681 [15:23<13:19, 2.51s/it] 53%|█████▎ | 364/681 [15:26<13:23, 2.53s/it] {'loss': 1.0996, 'grad_norm': 24.834049224853516, 'learning_rate': 2.6539022653348575e-07, 'fcm_dpo/beta': 0.0018847124883905053, 'fcm_dpo/q_t': 0.40976476669311523, 'fcm_dpo/delta': 0.010454859584569931, 'fcm_dpo/margin': 206.69784545898438, 'margin_dpo/margin_mean': 206.69784545898438, 'margin_dpo/margin_std': 276.881103515625, 'logps/chosen': -340.779052734375, 'logps/rejected': -589.97802734375, 'logps/ref_chosen': -48.894622802734375, 'logps/ref_rejected': -91.395751953125, 'KL/chosen_KL_mean': -291.88446044921875, 'KL/rejected_KL_mean': -498.5823059082031, 'KL/mean': -395.2333984375, 'KL/std': 239.70700073242188, 'logits/chosen': -0.4836190342903137, 'logits/rejected': -0.4936879873275757, 'epoch': 0.53} + 53%|█████▎ | 364/681 [15:26<13:23, 2.53s/it] 54%|█████▎ | 365/681 [15:29<13:27, 2.56s/it] {'loss': 1.0731, 'grad_norm': 22.168062210083008, 'learning_rate': 2.641091331089811e-07, 'fcm_dpo/beta': 0.0018760417588055134, 'fcm_dpo/q_t': 0.40619686245918274, 'fcm_dpo/delta': -0.009370389394462109, 'fcm_dpo/margin': 218.00540161132812, 'margin_dpo/margin_mean': 218.00540161132812, 'margin_dpo/margin_std': 258.5311279296875, 'logps/chosen': -331.12017822265625, 'logps/rejected': -590.3345336914062, 'logps/ref_chosen': -51.49274444580078, 'logps/ref_rejected': -92.70166778564453, 'KL/chosen_KL_mean': -279.62744140625, 'KL/rejected_KL_mean': -497.63287353515625, 'KL/mean': -388.6301574707031, 'KL/std': 249.4620361328125, 'logits/chosen': -0.4246390461921692, 'logits/rejected': -0.43436652421951294, 'epoch': 0.54} + 54%|█████▎ | 365/681 [15:29<13:27, 2.56s/it] 54%|█████▎ | 366/681 [15:31<13:26, 2.56s/it] {'loss': 1.086, 'grad_norm': 22.63542366027832, 'learning_rate': 2.6282766789569736e-07, 'fcm_dpo/beta': 0.0018647974357008934, 'fcm_dpo/q_t': 0.4064916968345642, 'fcm_dpo/delta': -0.006713632494211197, 'fcm_dpo/margin': 217.76443481445312, 'margin_dpo/margin_mean': 217.76443481445312, 'margin_dpo/margin_std': 280.0198669433594, 'logps/chosen': -301.8753662109375, 'logps/rejected': -558.2296142578125, 'logps/ref_chosen': -44.7205696105957, 'logps/ref_rejected': -83.31040954589844, 'KL/chosen_KL_mean': -257.15478515625, 'KL/rejected_KL_mean': -474.91925048828125, 'KL/mean': -366.0369873046875, 'KL/std': 234.462890625, 'logits/chosen': -0.4495304822921753, 'logits/rejected': -0.46502619981765747, 'epoch': 0.54} + 54%|█████▎ | 366/681 [15:31<13:26, 2.56s/it] 54%|█████▍ | 367/681 [15:34<13:41, 2.62s/it] {'loss': 1.1141, 'grad_norm': 18.776704788208008, 'learning_rate': 2.615458646614349e-07, 'fcm_dpo/beta': 0.0018905512988567352, 'fcm_dpo/q_t': 0.4182465672492981, 'fcm_dpo/delta': 0.05568384379148483, 'fcm_dpo/margin': 183.06765747070312, 'margin_dpo/margin_mean': 183.06765747070312, 'margin_dpo/margin_std': 226.84693908691406, 'logps/chosen': -323.12744140625, 'logps/rejected': -524.541015625, 'logps/ref_chosen': -58.405418395996094, 'logps/ref_rejected': -76.75132751464844, 'KL/chosen_KL_mean': -264.7220458984375, 'KL/rejected_KL_mean': -447.7897033691406, 'KL/mean': -356.255859375, 'KL/std': 209.75563049316406, 'logits/chosen': -0.4651241898536682, 'logits/rejected': -0.44852566719055176, 'epoch': 0.54} + 54%|█████▍ | 367/681 [15:34<13:41, 2.62s/it] 54%|█████▍ | 368/681 [15:37<13:36, 2.61s/it] {'loss': 0.9614, 'grad_norm': 33.4195671081543, 'learning_rate': 2.6026375718290083e-07, 'fcm_dpo/beta': 0.0018544028280302882, 'fcm_dpo/q_t': 0.373285174369812, 'fcm_dpo/delta': -0.14009898900985718, 'fcm_dpo/margin': 287.25018310546875, 'margin_dpo/margin_mean': 287.25018310546875, 'margin_dpo/margin_std': 218.70684814453125, 'logps/chosen': -296.36761474609375, 'logps/rejected': -637.7205810546875, 'logps/ref_chosen': -44.452518463134766, 'logps/ref_rejected': -98.55526733398438, 'KL/chosen_KL_mean': -251.9151153564453, 'KL/rejected_KL_mean': -539.165283203125, 'KL/mean': -395.54022216796875, 'KL/std': 242.84780883789062, 'logits/chosen': -0.4662426710128784, 'logits/rejected': -0.47398853302001953, 'epoch': 0.54} + 54%|█████▍ | 368/681 [15:37<13:36, 2.61s/it] 54%|█████▍ | 369/681 [15:39<13:39, 2.63s/it] {'loss': 1.1808, 'grad_norm': 27.64653968811035, 'learning_rate': 2.589813792448196e-07, 'fcm_dpo/beta': 0.0018784540006890893, 'fcm_dpo/q_t': 0.43329665064811707, 'fcm_dpo/delta': 0.11527148634195328, 'fcm_dpo/margin': 153.03839111328125, 'margin_dpo/margin_mean': 153.0383758544922, 'margin_dpo/margin_std': 270.01470947265625, 'logps/chosen': -396.97332763671875, 'logps/rejected': -569.926025390625, 'logps/ref_chosen': -71.38150024414062, 'logps/ref_rejected': -91.29582214355469, 'KL/chosen_KL_mean': -325.591796875, 'KL/rejected_KL_mean': -478.63018798828125, 'KL/mean': -402.11102294921875, 'KL/std': 241.74417114257812, 'logits/chosen': -0.44844913482666016, 'logits/rejected': -0.4270949065685272, 'epoch': 0.54} + 54%|█████▍ | 369/681 [15:39<13:39, 2.63s/it] 54%|█████▍ | 370/681 [15:42<13:45, 2.65s/it] {'loss': 1.1983, 'grad_norm': 27.10540199279785, 'learning_rate': 2.5769876463904263e-07, 'fcm_dpo/beta': 0.0019235580693930387, 'fcm_dpo/q_t': 0.43653106689453125, 'fcm_dpo/delta': 0.12600602209568024, 'fcm_dpo/margin': 144.05679321289062, 'margin_dpo/margin_mean': 144.05679321289062, 'margin_dpo/margin_std': 280.3094482421875, 'logps/chosen': -421.312744140625, 'logps/rejected': -591.0218505859375, 'logps/ref_chosen': -71.60749816894531, 'logps/ref_rejected': -97.25978088378906, 'KL/chosen_KL_mean': -349.7052307128906, 'KL/rejected_KL_mean': -493.76202392578125, 'KL/mean': -421.733642578125, 'KL/std': 252.10357666015625, 'logits/chosen': -0.4947051405906677, 'logits/rejected': -0.487566202878952, 'epoch': 0.54} + 54%|█████▍ | 370/681 [15:42<13:45, 2.65s/it] 54%|█████▍ | 371/681 [15:45<13:46, 2.67s/it] {'loss': 1.095, 'grad_norm': 26.90560531616211, 'learning_rate': 2.5641594716365744e-07, 'fcm_dpo/beta': 0.0019333376549184322, 'fcm_dpo/q_t': 0.40682950615882874, 'fcm_dpo/delta': -0.011104363948106766, 'fcm_dpo/margin': 212.34832763671875, 'margin_dpo/margin_mean': 212.34832763671875, 'margin_dpo/margin_std': 297.38665771484375, 'logps/chosen': -406.81439208984375, 'logps/rejected': -648.9203491210938, 'logps/ref_chosen': -69.41448974609375, 'logps/ref_rejected': -99.17217254638672, 'KL/chosen_KL_mean': -337.39990234375, 'KL/rejected_KL_mean': -549.7481689453125, 'KL/mean': -443.57403564453125, 'KL/std': 258.0762939453125, 'logits/chosen': -0.5080785751342773, 'logits/rejected': -0.4954741299152374, 'epoch': 0.54} + 54%|█████▍ | 371/681 [15:45<13:46, 2.67s/it] 55%|█████▍ | 372/681 [15:47<13:06, 2.55s/it] {'loss': 1.0443, 'grad_norm': 22.939546585083008, 'learning_rate': 2.551329606220976e-07, 'fcm_dpo/beta': 0.0018996518338099122, 'fcm_dpo/q_t': 0.3917329013347626, 'fcm_dpo/delta': -0.08990687876939774, 'fcm_dpo/margin': 255.53970336914062, 'margin_dpo/margin_mean': 255.53970336914062, 'margin_dpo/margin_std': 328.15814208984375, 'logps/chosen': -385.1048583984375, 'logps/rejected': -657.3660888671875, 'logps/ref_chosen': -61.8179931640625, 'logps/ref_rejected': -78.53948974609375, 'KL/chosen_KL_mean': -323.286865234375, 'KL/rejected_KL_mean': -578.8265991210938, 'KL/mean': -451.05670166015625, 'KL/std': 294.25408935546875, 'logits/chosen': -0.4645116329193115, 'logits/rejected': -0.444297730922699, 'epoch': 0.55} + 55%|█████▍ | 372/681 [15:47<13:06, 2.55s/it] 55%|█████▍ | 373/681 [15:49<12:59, 2.53s/it] {'loss': 1.0473, 'grad_norm': 27.45345115661621, 'learning_rate': 2.538498388222517e-07, 'fcm_dpo/beta': 0.001885814475826919, 'fcm_dpo/q_t': 0.393940806388855, 'fcm_dpo/delta': -0.060002297163009644, 'fcm_dpo/margin': 242.35189819335938, 'margin_dpo/margin_mean': 242.35189819335938, 'margin_dpo/margin_std': 272.34967041015625, 'logps/chosen': -418.4765319824219, 'logps/rejected': -682.5708618164062, 'logps/ref_chosen': -64.21713256835938, 'logps/ref_rejected': -85.95960998535156, 'KL/chosen_KL_mean': -354.2593994140625, 'KL/rejected_KL_mean': -596.6112060546875, 'KL/mean': -475.4353332519531, 'KL/std': 283.9608459472656, 'logits/chosen': -0.46065136790275574, 'logits/rejected': -0.438961923122406, 'epoch': 0.55} + 55%|█████▍ | 373/681 [15:49<12:59, 2.53s/it] 55%|█████▍ | 374/681 [15:52<13:20, 2.61s/it] {'loss': 1.1182, 'grad_norm': 24.44922637939453, 'learning_rate': 2.525666155755725e-07, 'fcm_dpo/beta': 0.0018586989026516676, 'fcm_dpo/q_t': 0.4113299250602722, 'fcm_dpo/delta': -0.0067335814237594604, 'fcm_dpo/margin': 218.37728881835938, 'margin_dpo/margin_mean': 218.37728881835938, 'margin_dpo/margin_std': 353.18353271484375, 'logps/chosen': -392.97943115234375, 'logps/rejected': -634.3466796875, 'logps/ref_chosen': -70.65018463134766, 'logps/ref_rejected': -93.64016723632812, 'KL/chosen_KL_mean': -322.3292236328125, 'KL/rejected_KL_mean': -540.70654296875, 'KL/mean': -431.51788330078125, 'KL/std': 311.81536865234375, 'logits/chosen': -0.524357795715332, 'logits/rejected': -0.5057187676429749, 'epoch': 0.55} + 55%|█████▍ | 374/681 [15:52<13:20, 2.61s/it] 55%|█████▌ | 375/681 [15:55<13:25, 2.63s/it] {'loss': 1.1099, 'grad_norm': 27.943613052368164, 'learning_rate': 2.512833246961859e-07, 'fcm_dpo/beta': 0.0018582877237349749, 'fcm_dpo/q_t': 0.4099903106689453, 'fcm_dpo/delta': 0.008078165352344513, 'fcm_dpo/margin': 210.68402099609375, 'margin_dpo/margin_mean': 210.68402099609375, 'margin_dpo/margin_std': 301.5238952636719, 'logps/chosen': -393.9966735839844, 'logps/rejected': -633.5387573242188, 'logps/ref_chosen': -60.080223083496094, 'logps/ref_rejected': -88.93830871582031, 'KL/chosen_KL_mean': -333.91644287109375, 'KL/rejected_KL_mean': -544.6004638671875, 'KL/mean': -439.2584228515625, 'KL/std': 251.31211853027344, 'logits/chosen': -0.4510612487792969, 'logits/rejected': -0.44956958293914795, 'epoch': 0.55} + 55%|█████▌ | 375/681 [15:55<13:25, 2.63s/it] 55%|█████▌ | 376/681 [15:58<13:27, 2.65s/it] {'loss': 1.0383, 'grad_norm': 23.84757423400879, 'learning_rate': 2.5e-07, 'fcm_dpo/beta': 0.001843743957579136, 'fcm_dpo/q_t': 0.3887389302253723, 'fcm_dpo/delta': -0.09088477492332458, 'fcm_dpo/margin': 263.78704833984375, 'margin_dpo/margin_mean': 263.7870788574219, 'margin_dpo/margin_std': 319.8635559082031, 'logps/chosen': -398.1259460449219, 'logps/rejected': -704.779296875, 'logps/ref_chosen': -62.660308837890625, 'logps/ref_rejected': -105.52660369873047, 'KL/chosen_KL_mean': -335.46563720703125, 'KL/rejected_KL_mean': -599.252685546875, 'KL/mean': -467.35919189453125, 'KL/std': 275.66827392578125, 'logits/chosen': -0.4562457203865051, 'logits/rejected': -0.446555495262146, 'epoch': 0.55} + 55%|█████▌ | 376/681 [15:58<13:27, 2.65s/it] 55%|█████▌ | 377/681 [16:00<13:11, 2.60s/it] {'loss': 1.0428, 'grad_norm': 21.212696075439453, 'learning_rate': 2.487166753038141e-07, 'fcm_dpo/beta': 0.0018218334298580885, 'fcm_dpo/q_t': 0.3930322229862213, 'fcm_dpo/delta': -0.0696791335940361, 'fcm_dpo/margin': 256.0513916015625, 'margin_dpo/margin_mean': 256.0513916015625, 'margin_dpo/margin_std': 300.54119873046875, 'logps/chosen': -388.82452392578125, 'logps/rejected': -689.1005249023438, 'logps/ref_chosen': -54.478736877441406, 'logps/ref_rejected': -98.70335388183594, 'KL/chosen_KL_mean': -334.3457946777344, 'KL/rejected_KL_mean': -590.397216796875, 'KL/mean': -462.3714599609375, 'KL/std': 288.103515625, 'logits/chosen': -0.40101295709609985, 'logits/rejected': -0.3998126685619354, 'epoch': 0.55} + 55%|█████▌ | 377/681 [16:00<13:11, 2.60s/it] 56%|█████▌ | 378/681 [16:02<12:27, 2.47s/it] {'loss': 1.025, 'grad_norm': 26.153120040893555, 'learning_rate': 2.4743338442442754e-07, 'fcm_dpo/beta': 0.001788057736121118, 'fcm_dpo/q_t': 0.38815170526504517, 'fcm_dpo/delta': -0.08244302868843079, 'fcm_dpo/margin': 267.56414794921875, 'margin_dpo/margin_mean': 267.56414794921875, 'margin_dpo/margin_std': 286.3600769042969, 'logps/chosen': -360.5497131347656, 'logps/rejected': -671.1402587890625, 'logps/ref_chosen': -45.02053451538086, 'logps/ref_rejected': -88.0469741821289, 'KL/chosen_KL_mean': -315.5291748046875, 'KL/rejected_KL_mean': -583.0933227539062, 'KL/mean': -449.3112487792969, 'KL/std': 265.04840087890625, 'logits/chosen': -0.42576664686203003, 'logits/rejected': -0.4415106773376465, 'epoch': 0.56} + 56%|█████▌ | 378/681 [16:02<12:27, 2.47s/it] 56%|█████▌ | 379/681 [16:05<12:23, 2.46s/it] {'loss': 1.0578, 'grad_norm': 28.71318244934082, 'learning_rate': 2.461501611777483e-07, 'fcm_dpo/beta': 0.0017578438855707645, 'fcm_dpo/q_t': 0.3962175250053406, 'fcm_dpo/delta': -0.05908029526472092, 'fcm_dpo/margin': 259.2121887207031, 'margin_dpo/margin_mean': 259.21221923828125, 'margin_dpo/margin_std': 324.53790283203125, 'logps/chosen': -409.1219177246094, 'logps/rejected': -729.4521484375, 'logps/ref_chosen': -53.182098388671875, 'logps/ref_rejected': -114.3001708984375, 'KL/chosen_KL_mean': -355.9398193359375, 'KL/rejected_KL_mean': -615.1519775390625, 'KL/mean': -485.5458984375, 'KL/std': 267.96209716796875, 'logits/chosen': -0.42304420471191406, 'logits/rejected': -0.44598186016082764, 'epoch': 0.56} + 56%|█████▌ | 379/681 [16:05<12:23, 2.46s/it] 56%|█████▌ | 380/681 [16:07<12:09, 2.42s/it] {'loss': 1.0264, 'grad_norm': 25.73267364501953, 'learning_rate': 2.4486703937790243e-07, 'fcm_dpo/beta': 0.0017373515293002129, 'fcm_dpo/q_t': 0.38500848412513733, 'fcm_dpo/delta': -0.09990786015987396, 'fcm_dpo/margin': 284.94622802734375, 'margin_dpo/margin_mean': 284.94622802734375, 'margin_dpo/margin_std': 328.1457824707031, 'logps/chosen': -388.29193115234375, 'logps/rejected': -726.0767822265625, 'logps/ref_chosen': -51.3530387878418, 'logps/ref_rejected': -104.19169616699219, 'KL/chosen_KL_mean': -336.93890380859375, 'KL/rejected_KL_mean': -621.8851318359375, 'KL/mean': -479.41204833984375, 'KL/std': 297.79949951171875, 'logits/chosen': -0.43626442551612854, 'logits/rejected': -0.4630964398384094, 'epoch': 0.56} + 56%|█████▌ | 380/681 [16:07<12:09, 2.42s/it] 56%|█████▌ | 381/681 [16:09<12:12, 2.44s/it] {'loss': 1.1478, 'grad_norm': 24.38262939453125, 'learning_rate': 2.435840528363426e-07, 'fcm_dpo/beta': 0.0017377103213220835, 'fcm_dpo/q_t': 0.42109525203704834, 'fcm_dpo/delta': 0.055430181324481964, 'fcm_dpo/margin': 199.41497802734375, 'margin_dpo/margin_mean': 199.41497802734375, 'margin_dpo/margin_std': 332.27398681640625, 'logps/chosen': -407.77587890625, 'logps/rejected': -628.607177734375, 'logps/ref_chosen': -57.80306625366211, 'logps/ref_rejected': -79.21940612792969, 'KL/chosen_KL_mean': -349.9728088378906, 'KL/rejected_KL_mean': -549.3878173828125, 'KL/mean': -449.6802978515625, 'KL/std': 246.59634399414062, 'logits/chosen': -0.4588872790336609, 'logits/rejected': -0.4429172873497009, 'epoch': 0.56} + 56%|█████▌ | 381/681 [16:10<12:12, 2.44s/it] 56%|█████▌ | 382/681 [16:12<12:23, 2.49s/it] {'loss': 1.0484, 'grad_norm': 26.342195510864258, 'learning_rate': 2.4230123536095745e-07, 'fcm_dpo/beta': 0.001735961064696312, 'fcm_dpo/q_t': 0.3991192877292633, 'fcm_dpo/delta': -0.031305499374866486, 'fcm_dpo/margin': 247.65719604492188, 'margin_dpo/margin_mean': 247.65719604492188, 'margin_dpo/margin_std': 255.80958557128906, 'logps/chosen': -394.7601623535156, 'logps/rejected': -687.107177734375, 'logps/ref_chosen': -66.02030181884766, 'logps/ref_rejected': -110.71016693115234, 'KL/chosen_KL_mean': -328.7398681640625, 'KL/rejected_KL_mean': -576.39697265625, 'KL/mean': -452.5684509277344, 'KL/std': 232.17242431640625, 'logits/chosen': -0.48217618465423584, 'logits/rejected': -0.48925304412841797, 'epoch': 0.56} + 56%|█████▌ | 382/681 [16:12<12:23, 2.49s/it] 56%|█████▌ | 383/681 [16:15<12:27, 2.51s/it] {'loss': 1.0938, 'grad_norm': 30.611806869506836, 'learning_rate': 2.4101862075518037e-07, 'fcm_dpo/beta': 0.0017293533310294151, 'fcm_dpo/q_t': 0.40462052822113037, 'fcm_dpo/delta': -0.015089768916368484, 'fcm_dpo/margin': 239.6513671875, 'margin_dpo/margin_mean': 239.65135192871094, 'margin_dpo/margin_std': 338.258544921875, 'logps/chosen': -388.0343017578125, 'logps/rejected': -671.0100708007812, 'logps/ref_chosen': -50.39148712158203, 'logps/ref_rejected': -93.71589660644531, 'KL/chosen_KL_mean': -337.642822265625, 'KL/rejected_KL_mean': -577.294189453125, 'KL/mean': -457.468505859375, 'KL/std': 262.3541564941406, 'logits/chosen': -0.4417022466659546, 'logits/rejected': -0.4511658549308777, 'epoch': 0.56} + 56%|█████▌ | 383/681 [16:15<12:27, 2.51s/it] 56%|█████▋ | 384/681 [16:17<12:29, 2.53s/it] {'loss': 1.1222, 'grad_norm': 24.98710060119629, 'learning_rate': 2.397362428170992e-07, 'fcm_dpo/beta': 0.0017538972897455096, 'fcm_dpo/q_t': 0.4214822053909302, 'fcm_dpo/delta': 0.06988409906625748, 'fcm_dpo/margin': 189.2479248046875, 'margin_dpo/margin_mean': 189.2479248046875, 'margin_dpo/margin_std': 231.77182006835938, 'logps/chosen': -404.26898193359375, 'logps/rejected': -627.231689453125, 'logps/ref_chosen': -52.046104431152344, 'logps/ref_rejected': -85.76089477539062, 'KL/chosen_KL_mean': -352.222900390625, 'KL/rejected_KL_mean': -541.4708251953125, 'KL/mean': -446.8468322753906, 'KL/std': 242.38162231445312, 'logits/chosen': -0.4990885853767395, 'logits/rejected': -0.4952540993690491, 'epoch': 0.56} + 56%|█████▋ | 384/681 [16:17<12:29, 2.53s/it] 57%|█████▋ | 385/681 [16:20<12:23, 2.51s/it] {'loss': 1.055, 'grad_norm': 29.25759506225586, 'learning_rate': 2.3845413533856514e-07, 'fcm_dpo/beta': 0.0017491495236754417, 'fcm_dpo/q_t': 0.40170639753341675, 'fcm_dpo/delta': -0.015889476984739304, 'fcm_dpo/margin': 237.3870086669922, 'margin_dpo/margin_mean': 237.38702392578125, 'margin_dpo/margin_std': 238.87646484375, 'logps/chosen': -382.8681945800781, 'logps/rejected': -632.531005859375, 'logps/ref_chosen': -65.55215454101562, 'logps/ref_rejected': -77.82792663574219, 'KL/chosen_KL_mean': -317.3160400390625, 'KL/rejected_KL_mean': -554.7030639648438, 'KL/mean': -436.0095520019531, 'KL/std': 214.3333740234375, 'logits/chosen': -0.5247458219528198, 'logits/rejected': -0.5023648738861084, 'epoch': 0.57} + 57%|█████▋ | 385/681 [16:20<12:23, 2.51s/it] 57%|█████▋ | 386/681 [16:22<12:30, 2.54s/it] {'loss': 1.0633, 'grad_norm': 26.947490692138672, 'learning_rate': 2.3717233210430254e-07, 'fcm_dpo/beta': 0.0017403860110789537, 'fcm_dpo/q_t': 0.3999601900577545, 'fcm_dpo/delta': -0.03472103923559189, 'fcm_dpo/margin': 248.92791748046875, 'margin_dpo/margin_mean': 248.9279022216797, 'margin_dpo/margin_std': 302.4674987792969, 'logps/chosen': -392.3741455078125, 'logps/rejected': -675.4075927734375, 'logps/ref_chosen': -58.22185516357422, 'logps/ref_rejected': -92.32742309570312, 'KL/chosen_KL_mean': -334.15228271484375, 'KL/rejected_KL_mean': -583.0802001953125, 'KL/mean': -458.6162109375, 'KL/std': 262.49871826171875, 'logits/chosen': -0.5123308300971985, 'logits/rejected': -0.5101590156555176, 'epoch': 0.57} + 57%|█████▋ | 386/681 [16:22<12:30, 2.54s/it] 57%|█████▋ | 387/681 [16:25<12:45, 2.60s/it] {'loss': 1.1027, 'grad_norm': 30.391345977783203, 'learning_rate': 2.3589086689101889e-07, 'fcm_dpo/beta': 0.0017379240598529577, 'fcm_dpo/q_t': 0.4142611622810364, 'fcm_dpo/delta': 0.036699328571558, 'fcm_dpo/margin': 209.68359375, 'margin_dpo/margin_mean': 209.68357849121094, 'margin_dpo/margin_std': 252.79678344726562, 'logps/chosen': -427.83062744140625, 'logps/rejected': -663.263916015625, 'logps/ref_chosen': -66.41944885253906, 'logps/ref_rejected': -92.16915893554688, 'KL/chosen_KL_mean': -361.41119384765625, 'KL/rejected_KL_mean': -571.0947265625, 'KL/mean': -466.2529602050781, 'KL/std': 245.76097106933594, 'logits/chosen': -0.5567930340766907, 'logits/rejected': -0.5412279367446899, 'epoch': 0.57} + 57%|█████▋ | 387/681 [16:25<12:45, 2.60s/it] 57%|█████▋ | 388/681 [16:27<12:20, 2.53s/it] {'loss': 1.03, 'grad_norm': 26.922496795654297, 'learning_rate': 2.3460977346651428e-07, 'fcm_dpo/beta': 0.0017218522261828184, 'fcm_dpo/q_t': 0.390036940574646, 'fcm_dpo/delta': -0.08878612518310547, 'fcm_dpo/margin': 281.1998291015625, 'margin_dpo/margin_mean': 281.1997985839844, 'margin_dpo/margin_std': 325.59906005859375, 'logps/chosen': -394.0858459472656, 'logps/rejected': -729.5892333984375, 'logps/ref_chosen': -50.129459381103516, 'logps/ref_rejected': -104.43305969238281, 'KL/chosen_KL_mean': -343.9563903808594, 'KL/rejected_KL_mean': -625.1561889648438, 'KL/mean': -484.5562744140625, 'KL/std': 287.8593444824219, 'logits/chosen': -0.46857941150665283, 'logits/rejected': -0.48115378618240356, 'epoch': 0.57} + 57%|█████▋ | 388/681 [16:27<12:20, 2.53s/it] 57%|█████▋ | 389/681 [16:30<12:07, 2.49s/it] {'loss': 1.0799, 'grad_norm': 24.15456771850586, 'learning_rate': 2.3332908558879177e-07, 'fcm_dpo/beta': 0.0017109981272369623, 'fcm_dpo/q_t': 0.4042537808418274, 'fcm_dpo/delta': -0.01739252358675003, 'fcm_dpo/margin': 243.48245239257812, 'margin_dpo/margin_mean': 243.48245239257812, 'margin_dpo/margin_std': 314.66058349609375, 'logps/chosen': -443.9896240234375, 'logps/rejected': -707.47998046875, 'logps/ref_chosen': -57.906593322753906, 'logps/ref_rejected': -77.91454315185547, 'KL/chosen_KL_mean': -386.0830383300781, 'KL/rejected_KL_mean': -629.5654296875, 'KL/mean': -507.82427978515625, 'KL/std': 286.28631591796875, 'logits/chosen': -0.5262615084648132, 'logits/rejected': -0.5186604261398315, 'epoch': 0.57} + 57%|█████▋ | 389/681 [16:30<12:07, 2.49s/it] 57%|█████▋ | 390/681 [16:32<12:06, 2.50s/it] {'loss': 1.1069, 'grad_norm': 26.528804779052734, 'learning_rate': 2.320488370051681e-07, 'fcm_dpo/beta': 0.0017028467264026403, 'fcm_dpo/q_t': 0.4092911183834076, 'fcm_dpo/delta': -0.011735277250409126, 'fcm_dpo/margin': 241.33126831054688, 'margin_dpo/margin_mean': 241.33126831054688, 'margin_dpo/margin_std': 371.08599853515625, 'logps/chosen': -433.7308349609375, 'logps/rejected': -711.3643798828125, 'logps/ref_chosen': -49.22591781616211, 'logps/ref_rejected': -85.5281982421875, 'KL/chosen_KL_mean': -384.50494384765625, 'KL/rejected_KL_mean': -625.836181640625, 'KL/mean': -505.17059326171875, 'KL/std': 288.66546630859375, 'logits/chosen': -0.46930596232414246, 'logits/rejected': -0.46219387650489807, 'epoch': 0.57} + 57%|█████▋ | 390/681 [16:32<12:06, 2.50s/it] 57%|█████▋ | 391/681 [16:35<12:01, 2.49s/it] {'loss': 1.2173, 'grad_norm': 45.76322555541992, 'learning_rate': 2.3076906145138405e-07, 'fcm_dpo/beta': 0.0017502898117527366, 'fcm_dpo/q_t': 0.4439963400363922, 'fcm_dpo/delta': 0.15936514735221863, 'fcm_dpo/margin': 139.651611328125, 'margin_dpo/margin_mean': 139.65162658691406, 'margin_dpo/margin_std': 283.3598937988281, 'logps/chosen': -451.00726318359375, 'logps/rejected': -613.0674438476562, 'logps/ref_chosen': -64.32965087890625, 'logps/ref_rejected': -86.73820495605469, 'KL/chosen_KL_mean': -386.6776123046875, 'KL/rejected_KL_mean': -526.3292236328125, 'KL/mean': -456.50341796875, 'KL/std': 271.3560791015625, 'logits/chosen': -0.5254815220832825, 'logits/rejected': -0.5183066725730896, 'epoch': 0.57} + 57%|█████▋ | 391/681 [16:35<12:01, 2.49s/it] 58%|█████▊ | 392/681 [16:37<12:15, 2.54s/it] {'loss': 1.0049, 'grad_norm': 23.80723762512207, 'learning_rate': 2.294897926507156e-07, 'fcm_dpo/beta': 0.00174234458245337, 'fcm_dpo/q_t': 0.38457435369491577, 'fcm_dpo/delta': -0.10435783863067627, 'fcm_dpo/margin': 286.5428771972656, 'margin_dpo/margin_mean': 286.5428771972656, 'margin_dpo/margin_std': 283.0867004394531, 'logps/chosen': -379.7579345703125, 'logps/rejected': -715.1427001953125, 'logps/ref_chosen': -53.50397872924805, 'logps/ref_rejected': -102.34584045410156, 'KL/chosen_KL_mean': -326.25396728515625, 'KL/rejected_KL_mean': -612.796875, 'KL/mean': -469.5254211425781, 'KL/std': 288.09954833984375, 'logits/chosen': -0.4814883768558502, 'logits/rejected': -0.4757598340511322, 'epoch': 0.58} + 58%|█████▊ | 392/681 [16:37<12:15, 2.54s/it] 58%|█████▊ | 393/681 [16:40<12:00, 2.50s/it] {'loss': 1.1214, 'grad_norm': 21.790613174438477, 'learning_rate': 2.2821106431308543e-07, 'fcm_dpo/beta': 0.0017277842853218317, 'fcm_dpo/q_t': 0.41501516103744507, 'fcm_dpo/delta': 0.014872867614030838, 'fcm_dpo/margin': 223.2145233154297, 'margin_dpo/margin_mean': 223.21453857421875, 'margin_dpo/margin_std': 357.96539306640625, 'logps/chosen': -371.00787353515625, 'logps/rejected': -619.7174072265625, 'logps/ref_chosen': -46.473915100097656, 'logps/ref_rejected': -71.96885681152344, 'KL/chosen_KL_mean': -324.5339660644531, 'KL/rejected_KL_mean': -547.74853515625, 'KL/mean': -436.1412353515625, 'KL/std': 283.82720947265625, 'logits/chosen': -0.46388766169548035, 'logits/rejected': -0.46215295791625977, 'epoch': 0.58} + 58%|█████▊ | 393/681 [16:40<12:00, 2.50s/it] 58%|█████▊ | 394/681 [16:42<12:07, 2.54s/it] {'loss': 1.0907, 'grad_norm': 26.26580810546875, 'learning_rate': 2.2693291013417452e-07, 'fcm_dpo/beta': 0.001729074981994927, 'fcm_dpo/q_t': 0.4082695245742798, 'fcm_dpo/delta': -0.0038064131513237953, 'fcm_dpo/margin': 233.43197631835938, 'margin_dpo/margin_mean': 233.43197631835938, 'margin_dpo/margin_std': 314.2247314453125, 'logps/chosen': -423.50616455078125, 'logps/rejected': -694.8492431640625, 'logps/ref_chosen': -52.91154861450195, 'logps/ref_rejected': -90.8226318359375, 'KL/chosen_KL_mean': -370.5946044921875, 'KL/rejected_KL_mean': -604.026611328125, 'KL/mean': -487.31060791015625, 'KL/std': 303.3579406738281, 'logits/chosen': -0.4947393238544464, 'logits/rejected': -0.4953378438949585, 'epoch': 0.58} + 58%|█████▊ | 394/681 [16:42<12:07, 2.54s/it] 58%|█████▊ | 395/681 [16:45<11:49, 2.48s/it] {'loss': 1.0783, 'grad_norm': 25.020362854003906, 'learning_rate': 2.2565536379453404e-07, 'fcm_dpo/beta': 0.001716281520202756, 'fcm_dpo/q_t': 0.4020352363586426, 'fcm_dpo/delta': -0.03543686866760254, 'fcm_dpo/margin': 252.64047241210938, 'margin_dpo/margin_mean': 252.64048767089844, 'margin_dpo/margin_std': 341.7522888183594, 'logps/chosen': -430.51708984375, 'logps/rejected': -704.39404296875, 'logps/ref_chosen': -62.546112060546875, 'logps/ref_rejected': -83.78262329101562, 'KL/chosen_KL_mean': -367.9709777832031, 'KL/rejected_KL_mean': -620.6114501953125, 'KL/mean': -494.29119873046875, 'KL/std': 292.8189697265625, 'logits/chosen': -0.5321957468986511, 'logits/rejected': -0.5300949811935425, 'epoch': 0.58} + 58%|█████▊ | 395/681 [16:45<11:49, 2.48s/it] 58%|█████▊ | 396/681 [16:47<11:52, 2.50s/it] {'loss': 1.0883, 'grad_norm': 26.507614135742188, 'learning_rate': 2.2437845895869825e-07, 'fcm_dpo/beta': 0.001719313906505704, 'fcm_dpo/q_t': 0.4104015827178955, 'fcm_dpo/delta': 0.016418248414993286, 'fcm_dpo/margin': 223.4409942626953, 'margin_dpo/margin_mean': 223.44097900390625, 'margin_dpo/margin_std': 268.4827880859375, 'logps/chosen': -439.5453186035156, 'logps/rejected': -682.6370239257812, 'logps/ref_chosen': -68.99594116210938, 'logps/ref_rejected': -88.64665985107422, 'KL/chosen_KL_mean': -370.54937744140625, 'KL/rejected_KL_mean': -593.9903564453125, 'KL/mean': -482.26983642578125, 'KL/std': 286.0380554199219, 'logits/chosen': -0.5050040483474731, 'logits/rejected': -0.4854010343551636, 'epoch': 0.58} + 58%|█████▊ | 396/681 [16:47<11:52, 2.50s/it] 58%|█████▊ | 397/681 [16:50<11:48, 2.49s/it] {'loss': 1.0113, 'grad_norm': 32.973846435546875, 'learning_rate': 2.2310222927429716e-07, 'fcm_dpo/beta': 0.0016906873788684607, 'fcm_dpo/q_t': 0.38490188121795654, 'fcm_dpo/delta': -0.10405933111906052, 'fcm_dpo/margin': 294.6309509277344, 'margin_dpo/margin_mean': 294.6309509277344, 'margin_dpo/margin_std': 307.0301513671875, 'logps/chosen': -412.1397705078125, 'logps/rejected': -748.6097412109375, 'logps/ref_chosen': -61.27716827392578, 'logps/ref_rejected': -103.11612701416016, 'KL/chosen_KL_mean': -350.86260986328125, 'KL/rejected_KL_mean': -645.4935302734375, 'KL/mean': -498.1780700683594, 'KL/std': 290.6457214355469, 'logits/chosen': -0.49156516790390015, 'logits/rejected': -0.4970093369483948, 'epoch': 0.58} + 58%|█████▊ | 397/681 [16:50<11:48, 2.49s/it] 58%|█████▊ | 398/681 [16:52<11:32, 2.45s/it] {'loss': 1.0636, 'grad_norm': 23.229272842407227, 'learning_rate': 2.2182670837116972e-07, 'fcm_dpo/beta': 0.001674711937084794, 'fcm_dpo/q_t': 0.3986341953277588, 'fcm_dpo/delta': -0.053648628294467926, 'fcm_dpo/margin': 269.42486572265625, 'margin_dpo/margin_mean': 269.42486572265625, 'margin_dpo/margin_std': 354.4512634277344, 'logps/chosen': -444.88018798828125, 'logps/rejected': -754.6771240234375, 'logps/ref_chosen': -68.15155029296875, 'logps/ref_rejected': -108.52360534667969, 'KL/chosen_KL_mean': -376.7286376953125, 'KL/rejected_KL_mean': -646.1535034179688, 'KL/mean': -511.4410400390625, 'KL/std': 304.7323303222656, 'logits/chosen': -0.5308432579040527, 'logits/rejected': -0.5298266410827637, 'epoch': 0.58} + 58%|█████▊ | 398/681 [16:52<11:32, 2.45s/it] 59%|█████▊ | 399/681 [16:55<11:32, 2.46s/it] {'loss': 1.1002, 'grad_norm': 31.19171142578125, 'learning_rate': 2.2055192986047804e-07, 'fcm_dpo/beta': 0.0016672208439558744, 'fcm_dpo/q_t': 0.40930691361427307, 'fcm_dpo/delta': 0.004600860178470612, 'fcm_dpo/margin': 237.18785095214844, 'margin_dpo/margin_mean': 237.1878662109375, 'margin_dpo/margin_std': 328.7167663574219, 'logps/chosen': -387.6163635253906, 'logps/rejected': -641.8800048828125, 'logps/ref_chosen': -60.889801025390625, 'logps/ref_rejected': -77.965576171875, 'KL/chosen_KL_mean': -326.7265625, 'KL/rejected_KL_mean': -563.9144287109375, 'KL/mean': -445.32049560546875, 'KL/std': 262.29473876953125, 'logits/chosen': -0.4892912209033966, 'logits/rejected': -0.45055025815963745, 'epoch': 0.59} + 59%|█████▊ | 399/681 [16:55<11:32, 2.46s/it] 59%|█████▊ | 400/681 [16:57<11:35, 2.47s/it] {'loss': 0.9701, 'grad_norm': 22.955949783325195, 'learning_rate': 2.192779273338215e-07, 'fcm_dpo/beta': 0.001628828700631857, 'fcm_dpo/q_t': 0.3711463212966919, 'fcm_dpo/delta': -0.1600421667098999, 'fcm_dpo/margin': 338.1106872558594, 'margin_dpo/margin_mean': 338.11065673828125, 'margin_dpo/margin_std': 314.16839599609375, 'logps/chosen': -380.385498046875, 'logps/rejected': -760.105224609375, 'logps/ref_chosen': -63.64359664916992, 'logps/ref_rejected': -105.252685546875, 'KL/chosen_KL_mean': -316.741943359375, 'KL/rejected_KL_mean': -654.8525390625, 'KL/mean': -485.7972412109375, 'KL/std': 280.26068115234375, 'logits/chosen': -0.5029030442237854, 'logits/rejected': -0.4994921386241913, 'epoch': 0.59} + 59%|█████▊ | 400/681 [16:57<11:35, 2.47s/it] 59%|█████▉ | 401/681 [17:00<11:40, 2.50s/it] {'loss': 1.1922, 'grad_norm': 27.67872428894043, 'learning_rate': 2.1800473436235136e-07, 'fcm_dpo/beta': 0.001636154600419104, 'fcm_dpo/q_t': 0.43033739924430847, 'fcm_dpo/delta': 0.0874527096748352, 'fcm_dpo/margin': 192.76087951660156, 'margin_dpo/margin_mean': 192.7608642578125, 'margin_dpo/margin_std': 390.67706298828125, 'logps/chosen': -422.0484619140625, 'logps/rejected': -641.4387817382812, 'logps/ref_chosen': -57.16303253173828, 'logps/ref_rejected': -83.79249572753906, 'KL/chosen_KL_mean': -364.88543701171875, 'KL/rejected_KL_mean': -557.6463012695312, 'KL/mean': -461.265869140625, 'KL/std': 291.4111022949219, 'logits/chosen': -0.499002069234848, 'logits/rejected': -0.49258700013160706, 'epoch': 0.59} + 59%|█████▉ | 401/681 [17:00<11:40, 2.50s/it] 59%|█████▉ | 402/681 [17:02<11:20, 2.44s/it] {'loss': 0.9581, 'grad_norm': 34.74311065673828, 'learning_rate': 2.1673238449588665e-07, 'fcm_dpo/beta': 0.0016060995403677225, 'fcm_dpo/q_t': 0.3695389926433563, 'fcm_dpo/delta': -0.17291411757469177, 'fcm_dpo/margin': 350.63134765625, 'margin_dpo/margin_mean': 350.63134765625, 'margin_dpo/margin_std': 317.71551513671875, 'logps/chosen': -326.92205810546875, 'logps/rejected': -707.8590698242188, 'logps/ref_chosen': -50.74037170410156, 'logps/ref_rejected': -81.0460433959961, 'KL/chosen_KL_mean': -276.1816711425781, 'KL/rejected_KL_mean': -626.81298828125, 'KL/mean': -451.49737548828125, 'KL/std': 308.62689208984375, 'logits/chosen': -0.4849190413951874, 'logits/rejected': -0.4742359220981598, 'epoch': 0.59} + 59%|█████▉ | 402/681 [17:02<11:20, 2.44s/it] 59%|█████▉ | 403/681 [17:04<11:15, 2.43s/it] {'loss': 1.0603, 'grad_norm': 23.439414978027344, 'learning_rate': 2.154609112620295e-07, 'fcm_dpo/beta': 0.001585017773322761, 'fcm_dpo/q_t': 0.4014202356338501, 'fcm_dpo/delta': -0.026479586958885193, 'fcm_dpo/margin': 268.3475341796875, 'margin_dpo/margin_mean': 268.3475341796875, 'margin_dpo/margin_std': 303.0990295410156, 'logps/chosen': -359.87139892578125, 'logps/rejected': -658.3381958007812, 'logps/ref_chosen': -47.14731216430664, 'logps/ref_rejected': -77.2666015625, 'KL/chosen_KL_mean': -312.72406005859375, 'KL/rejected_KL_mean': -581.0715942382812, 'KL/mean': -446.8978271484375, 'KL/std': 288.34051513671875, 'logits/chosen': -0.49934089183807373, 'logits/rejected': -0.50015789270401, 'epoch': 0.59} + 59%|█████▉ | 403/681 [17:04<11:15, 2.43s/it] 59%|█████▉ | 404/681 [17:07<11:18, 2.45s/it] {'loss': 1.0909, 'grad_norm': 29.329235076904297, 'learning_rate': 2.1419034816528218e-07, 'fcm_dpo/beta': 0.00157838873565197, 'fcm_dpo/q_t': 0.40540170669555664, 'fcm_dpo/delta': -0.016118429601192474, 'fcm_dpo/margin': 263.1905517578125, 'margin_dpo/margin_mean': 263.1905517578125, 'margin_dpo/margin_std': 365.6813659667969, 'logps/chosen': -394.7882080078125, 'logps/rejected': -687.2584228515625, 'logps/ref_chosen': -47.875274658203125, 'logps/ref_rejected': -77.15499877929688, 'KL/chosen_KL_mean': -346.9129333496094, 'KL/rejected_KL_mean': -610.1033935546875, 'KL/mean': -478.5081787109375, 'KL/std': 282.465087890625, 'logits/chosen': -0.4767064154148102, 'logits/rejected': -0.46850764751434326, 'epoch': 0.59} + 59%|█████▉ | 404/681 [17:07<11:18, 2.45s/it] 59%|█████▉ | 405/681 [17:09<11:04, 2.41s/it] {'loss': 1.1641, 'grad_norm': 30.571292877197266, 'learning_rate': 2.129207286861638e-07, 'fcm_dpo/beta': 0.0015723207034170628, 'fcm_dpo/q_t': 0.423758327960968, 'fcm_dpo/delta': -0.039775051176548004, 'fcm_dpo/margin': 215.46173095703125, 'margin_dpo/margin_mean': 215.4617462158203, 'margin_dpo/margin_std': 380.0777587890625, 'logps/chosen': -453.65576171875, 'logps/rejected': -691.141357421875, 'logps/ref_chosen': -65.16290283203125, 'logps/ref_rejected': -87.18678283691406, 'KL/chosen_KL_mean': -388.49285888671875, 'KL/rejected_KL_mean': -603.95458984375, 'KL/mean': -496.2237243652344, 'KL/std': 306.67413330078125, 'logits/chosen': -0.45147573947906494, 'logits/rejected': -0.441570520401001, 'epoch': 0.59} + 59%|█████▉ | 405/681 [17:09<11:04, 2.41s/it] 60%|█████▉ | 406/681 [17:12<10:58, 2.40s/it] {'loss': 1.0554, 'grad_norm': 23.295684814453125, 'learning_rate': 2.1165208628032861e-07, 'fcm_dpo/beta': 0.0015619369223713875, 'fcm_dpo/q_t': 0.39848363399505615, 'fcm_dpo/delta': -0.044593267142772675, 'fcm_dpo/margin': 283.2893981933594, 'margin_dpo/margin_mean': 283.28936767578125, 'margin_dpo/margin_std': 333.28466796875, 'logps/chosen': -394.34521484375, 'logps/rejected': -719.972412109375, 'logps/ref_chosen': -49.740814208984375, 'logps/ref_rejected': -92.07862854003906, 'KL/chosen_KL_mean': -344.60443115234375, 'KL/rejected_KL_mean': -627.893798828125, 'KL/mean': -486.24908447265625, 'KL/std': 301.5284423828125, 'logits/chosen': -0.5039137005805969, 'logits/rejected': -0.5129928588867188, 'epoch': 0.6} + 60%|█████▉ | 406/681 [17:12<10:58, 2.40s/it] 60%|█████▉ | 407/681 [17:14<11:03, 2.42s/it] {'loss': 1.1985, 'grad_norm': 48.98335647583008, 'learning_rate': 2.1038445437768375e-07, 'fcm_dpo/beta': 0.0015546645736321807, 'fcm_dpo/q_t': 0.436930388212204, 'fcm_dpo/delta': 0.016098780557513237, 'fcm_dpo/margin': 171.37547302246094, 'margin_dpo/margin_mean': 171.37548828125, 'margin_dpo/margin_std': 317.56884765625, 'logps/chosen': -426.3728332519531, 'logps/rejected': -618.9296875, 'logps/ref_chosen': -56.33069610595703, 'logps/ref_rejected': -77.51209259033203, 'KL/chosen_KL_mean': -370.0421142578125, 'KL/rejected_KL_mean': -541.4176025390625, 'KL/mean': -455.7298889160156, 'KL/std': 237.25067138671875, 'logits/chosen': -0.4963209331035614, 'logits/rejected': -0.47049441933631897, 'epoch': 0.6} + 60%|█████▉ | 407/681 [17:14<11:03, 2.42s/it] 60%|█████▉ | 408/681 [17:17<11:21, 2.50s/it] {'loss': 1.1376, 'grad_norm': 24.81488037109375, 'learning_rate': 2.0911786638150872e-07, 'fcm_dpo/beta': 0.0015771770849823952, 'fcm_dpo/q_t': 0.4258885979652405, 'fcm_dpo/delta': 0.08839617669582367, 'fcm_dpo/margin': 199.35397338867188, 'margin_dpo/margin_mean': 199.35397338867188, 'margin_dpo/margin_std': 261.7845764160156, 'logps/chosen': -444.5955810546875, 'logps/rejected': -664.2572021484375, 'logps/ref_chosen': -69.789306640625, 'logps/ref_rejected': -90.09693908691406, 'KL/chosen_KL_mean': -374.8062744140625, 'KL/rejected_KL_mean': -574.1602783203125, 'KL/mean': -474.4832458496094, 'KL/std': 233.34344482421875, 'logits/chosen': -0.484347403049469, 'logits/rejected': -0.4580131769180298, 'epoch': 0.6} + 60%|█████▉ | 408/681 [17:17<11:21, 2.50s/it] 60%|██████ | 409/681 [17:19<11:33, 2.55s/it] {'loss': 1.1435, 'grad_norm': 33.778438568115234, 'learning_rate': 2.0785235566757517e-07, 'fcm_dpo/beta': 0.0016081533394753933, 'fcm_dpo/q_t': 0.42619985342025757, 'fcm_dpo/delta': 0.0854191780090332, 'fcm_dpo/margin': 197.1689453125, 'margin_dpo/margin_mean': 197.1689453125, 'margin_dpo/margin_std': 282.058349609375, 'logps/chosen': -430.7615966796875, 'logps/rejected': -645.5173950195312, 'logps/ref_chosen': -67.31744384765625, 'logps/ref_rejected': -84.904296875, 'KL/chosen_KL_mean': -363.4441833496094, 'KL/rejected_KL_mean': -560.6130981445312, 'KL/mean': -462.02862548828125, 'KL/std': 254.19630432128906, 'logits/chosen': -0.4777407944202423, 'logits/rejected': -0.46434783935546875, 'epoch': 0.6} + 60%|██████ | 409/681 [17:19<11:33, 2.55s/it] 60%|██████ | 410/681 [17:22<11:37, 2.57s/it] {'loss': 1.0996, 'grad_norm': 26.49384880065918, 'learning_rate': 2.065879555832674e-07, 'fcm_dpo/beta': 0.0016180926468223333, 'fcm_dpo/q_t': 0.41249266266822815, 'fcm_dpo/delta': 0.030641639605164528, 'fcm_dpo/margin': 228.97573852539062, 'margin_dpo/margin_mean': 228.97573852539062, 'margin_dpo/margin_std': 282.3933410644531, 'logps/chosen': -390.8802185058594, 'logps/rejected': -651.589599609375, 'logps/ref_chosen': -51.465354919433594, 'logps/ref_rejected': -83.198974609375, 'KL/chosen_KL_mean': -339.41485595703125, 'KL/rejected_KL_mean': -568.390625, 'KL/mean': -453.9027404785156, 'KL/std': 249.15707397460938, 'logits/chosen': -0.5207273960113525, 'logits/rejected': -0.5231969952583313, 'epoch': 0.6} + 60%|██████ | 410/681 [17:22<11:37, 2.57s/it] 60%|██████ | 411/681 [17:24<11:21, 2.52s/it] {'loss': 1.117, 'grad_norm': 34.77162170410156, 'learning_rate': 2.0532469944670343e-07, 'fcm_dpo/beta': 0.0016009939135983586, 'fcm_dpo/q_t': 0.41523507237434387, 'fcm_dpo/delta': -0.06567565351724625, 'fcm_dpo/margin': 230.64710998535156, 'margin_dpo/margin_mean': 230.6470947265625, 'margin_dpo/margin_std': 321.73370361328125, 'logps/chosen': -414.19842529296875, 'logps/rejected': -673.2332153320312, 'logps/ref_chosen': -52.30727005004883, 'logps/ref_rejected': -80.69495391845703, 'KL/chosen_KL_mean': -361.8911437988281, 'KL/rejected_KL_mean': -592.5382080078125, 'KL/mean': -477.2146911621094, 'KL/std': 280.512939453125, 'logits/chosen': -0.4923670291900635, 'logits/rejected': -0.5041638612747192, 'epoch': 0.6} + 60%|██████ | 411/681 [17:24<11:21, 2.52s/it] 60%|██████ | 412/681 [17:27<11:10, 2.49s/it] {'loss': 1.0948, 'grad_norm': 34.43694305419922, 'learning_rate': 2.0406262054585738e-07, 'fcm_dpo/beta': 0.0016049096593633294, 'fcm_dpo/q_t': 0.40966540575027466, 'fcm_dpo/delta': 0.010263003408908844, 'fcm_dpo/margin': 243.07411193847656, 'margin_dpo/margin_mean': 243.0740966796875, 'margin_dpo/margin_std': 316.7934875488281, 'logps/chosen': -416.3619689941406, 'logps/rejected': -706.352783203125, 'logps/ref_chosen': -53.144126892089844, 'logps/ref_rejected': -100.0608139038086, 'KL/chosen_KL_mean': -363.21783447265625, 'KL/rejected_KL_mean': -606.2919311523438, 'KL/mean': -484.7549133300781, 'KL/std': 272.1863098144531, 'logits/chosen': -0.5590307712554932, 'logits/rejected': -0.5907352566719055, 'epoch': 0.6} + 60%|██████ | 412/681 [17:27<11:10, 2.49s/it] 61%|██████ | 413/681 [17:30<11:21, 2.54s/it] {'loss': 1.0935, 'grad_norm': 25.042572021484375, 'learning_rate': 2.0280175213768205e-07, 'fcm_dpo/beta': 0.0016117544146254659, 'fcm_dpo/q_t': 0.40934064984321594, 'fcm_dpo/delta': 0.01580866426229477, 'fcm_dpo/margin': 238.6603240966797, 'margin_dpo/margin_mean': 238.66033935546875, 'margin_dpo/margin_std': 299.20660400390625, 'logps/chosen': -449.2993469238281, 'logps/rejected': -725.8511352539062, 'logps/ref_chosen': -61.58196258544922, 'logps/ref_rejected': -99.47340393066406, 'KL/chosen_KL_mean': -387.7173767089844, 'KL/rejected_KL_mean': -626.3777465820312, 'KL/mean': -507.04754638671875, 'KL/std': 278.0577392578125, 'logits/chosen': -0.5094854235649109, 'logits/rejected': -0.5156064033508301, 'epoch': 0.61} + 61%|██████ | 413/681 [17:30<11:21, 2.54s/it] 61%|██████ | 414/681 [17:32<11:19, 2.54s/it] {'loss': 1.0665, 'grad_norm': 28.529882431030273, 'learning_rate': 2.0154212744723247e-07, 'fcm_dpo/beta': 0.0016139191575348377, 'fcm_dpo/q_t': 0.4009360074996948, 'fcm_dpo/delta': -0.025633584707975388, 'fcm_dpo/margin': 262.623779296875, 'margin_dpo/margin_mean': 262.6237487792969, 'margin_dpo/margin_std': 301.7000732421875, 'logps/chosen': -400.208984375, 'logps/rejected': -703.8477783203125, 'logps/ref_chosen': -46.63148498535156, 'logps/ref_rejected': -87.64653015136719, 'KL/chosen_KL_mean': -353.5774841308594, 'KL/rejected_KL_mean': -616.2012329101562, 'KL/mean': -484.88934326171875, 'KL/std': 261.0632019042969, 'logits/chosen': -0.43805867433547974, 'logits/rejected': -0.43269163370132446, 'epoch': 0.61} + 61%|██████ | 414/681 [17:32<11:19, 2.54s/it] 61%|██████ | 415/681 [17:35<11:32, 2.60s/it] {'loss': 1.1494, 'grad_norm': 25.62877655029297, 'learning_rate': 2.002837796667909e-07, 'fcm_dpo/beta': 0.0016152863390743732, 'fcm_dpo/q_t': 0.42490124702453613, 'fcm_dpo/delta': 0.08179127424955368, 'fcm_dpo/margin': 198.6478271484375, 'margin_dpo/margin_mean': 198.6478271484375, 'margin_dpo/margin_std': 303.8634033203125, 'logps/chosen': -477.1468505859375, 'logps/rejected': -697.6539306640625, 'logps/ref_chosen': -78.6182861328125, 'logps/ref_rejected': -100.47752380371094, 'KL/chosen_KL_mean': -398.528564453125, 'KL/rejected_KL_mean': -597.1763916015625, 'KL/mean': -497.85247802734375, 'KL/std': 269.6009826660156, 'logits/chosen': -0.5635542869567871, 'logits/rejected': -0.5637483596801758, 'epoch': 0.61} + 61%|██████ | 415/681 [17:35<11:32, 2.60s/it] 61%|██████ | 416/681 [17:37<11:25, 2.59s/it] {'loss': 0.9918, 'grad_norm': 45.315086364746094, 'learning_rate': 1.990267419549914e-07, 'fcm_dpo/beta': 0.0016041703056544065, 'fcm_dpo/q_t': 0.38077855110168457, 'fcm_dpo/delta': -0.11606433987617493, 'fcm_dpo/margin': 318.10662841796875, 'margin_dpo/margin_mean': 318.10662841796875, 'margin_dpo/margin_std': 293.16387939453125, 'logps/chosen': -424.39837646484375, 'logps/rejected': -774.7945556640625, 'logps/ref_chosen': -58.27912521362305, 'logps/ref_rejected': -90.56871795654297, 'KL/chosen_KL_mean': -366.1192321777344, 'KL/rejected_KL_mean': -684.225830078125, 'KL/mean': -525.1725463867188, 'KL/std': 304.578369140625, 'logits/chosen': -0.523003101348877, 'logits/rejected': -0.5284410715103149, 'epoch': 0.61} + 61%|██████ | 416/681 [17:37<11:25, 2.59s/it] 61%|██████ | 417/681 [17:40<11:05, 2.52s/it] {'loss': 1.0588, 'grad_norm': 28.700593948364258, 'learning_rate': 1.9777104743594686e-07, 'fcm_dpo/beta': 0.0015893441159278154, 'fcm_dpo/q_t': 0.4028276801109314, 'fcm_dpo/delta': -0.013450254686176777, 'fcm_dpo/margin': 259.77496337890625, 'margin_dpo/margin_mean': 259.77496337890625, 'margin_dpo/margin_std': 269.56451416015625, 'logps/chosen': -413.9339599609375, 'logps/rejected': -691.6620483398438, 'logps/ref_chosen': -50.1987190246582, 'logps/ref_rejected': -68.15184020996094, 'KL/chosen_KL_mean': -363.7352600097656, 'KL/rejected_KL_mean': -623.51025390625, 'KL/mean': -493.62274169921875, 'KL/std': 269.7994384765625, 'logits/chosen': -0.5118107795715332, 'logits/rejected': -0.49247753620147705, 'epoch': 0.61} + 61%|██████ | 417/681 [17:40<11:05, 2.52s/it] 61%|██████▏ | 418/681 [17:42<11:06, 2.54s/it] {'loss': 1.0794, 'grad_norm': 25.165157318115234, 'learning_rate': 1.965167291983757e-07, 'fcm_dpo/beta': 0.0015889217611402273, 'fcm_dpo/q_t': 0.4020264744758606, 'fcm_dpo/delta': -0.039183445274829865, 'fcm_dpo/margin': 274.78253173828125, 'margin_dpo/margin_mean': 274.78253173828125, 'margin_dpo/margin_std': 366.9202575683594, 'logps/chosen': -472.77923583984375, 'logps/rejected': -770.2747802734375, 'logps/ref_chosen': -81.97846984863281, 'logps/ref_rejected': -104.69148254394531, 'KL/chosen_KL_mean': -390.80078125, 'KL/rejected_KL_mean': -665.583251953125, 'KL/mean': -528.1920166015625, 'KL/std': 311.567626953125, 'logits/chosen': -0.6080072522163391, 'logits/rejected': -0.5904369950294495, 'epoch': 0.61} + 61%|██████▏ | 418/681 [17:42<11:06, 2.54s/it] 62%|██████▏ | 419/681 [17:45<11:06, 2.54s/it] {'loss': 1.0359, 'grad_norm': 31.140954971313477, 'learning_rate': 1.9526382029472988e-07, 'fcm_dpo/beta': 0.0015577776357531548, 'fcm_dpo/q_t': 0.39097434282302856, 'fcm_dpo/delta': -0.07237845659255981, 'fcm_dpo/margin': 301.0840759277344, 'margin_dpo/margin_mean': 301.0841064453125, 'margin_dpo/margin_std': 336.95245361328125, 'logps/chosen': -418.7801208496094, 'logps/rejected': -758.4986572265625, 'logps/ref_chosen': -52.948646545410156, 'logps/ref_rejected': -91.58309936523438, 'KL/chosen_KL_mean': -365.83148193359375, 'KL/rejected_KL_mean': -666.91552734375, 'KL/mean': -516.37353515625, 'KL/std': 287.1883544921875, 'logits/chosen': -0.5190507173538208, 'logits/rejected': -0.5203031897544861, 'epoch': 0.62} + 62%|██████▏ | 419/681 [17:45<11:06, 2.54s/it] 62%|██████▏ | 420/681 [17:47<11:01, 2.54s/it] {'loss': 1.2225, 'grad_norm': 58.83283996582031, 'learning_rate': 1.9401235374032425e-07, 'fcm_dpo/beta': 0.0015820781700313091, 'fcm_dpo/q_t': 0.4385032057762146, 'fcm_dpo/delta': 0.12642702460289001, 'fcm_dpo/margin': 175.04342651367188, 'margin_dpo/margin_mean': 175.04344177246094, 'margin_dpo/margin_std': 401.68768310546875, 'logps/chosen': -542.5789794921875, 'logps/rejected': -709.17236328125, 'logps/ref_chosen': -77.7699203491211, 'logps/ref_rejected': -69.31985473632812, 'KL/chosen_KL_mean': -464.80908203125, 'KL/rejected_KL_mean': -639.8525390625, 'KL/mean': -552.330810546875, 'KL/std': 300.29180908203125, 'logits/chosen': -0.5799360275268555, 'logits/rejected': -0.5508887767791748, 'epoch': 0.62} + 62%|██████▏ | 420/681 [17:47<11:01, 2.54s/it] 62%|██████▏ | 421/681 [17:50<11:14, 2.59s/it] {'loss': 1.1441, 'grad_norm': 25.49981117248535, 'learning_rate': 1.9276236251246653e-07, 'fcm_dpo/beta': 0.0016132977325469255, 'fcm_dpo/q_t': 0.4212290644645691, 'fcm_dpo/delta': 0.06797365099191666, 'fcm_dpo/margin': 206.85389709472656, 'margin_dpo/margin_mean': 206.85391235351562, 'margin_dpo/margin_std': 313.36297607421875, 'logps/chosen': -432.689208984375, 'logps/rejected': -675.0587158203125, 'logps/ref_chosen': -53.765865325927734, 'logps/ref_rejected': -89.28144836425781, 'KL/chosen_KL_mean': -378.92333984375, 'KL/rejected_KL_mean': -585.7772216796875, 'KL/mean': -482.3503112792969, 'KL/std': 293.73455810546875, 'logits/chosen': -0.5703746676445007, 'logits/rejected': -0.5595937371253967, 'epoch': 0.62} + 62%|██████▏ | 421/681 [17:50<11:14, 2.59s/it] 62%|██████▏ | 422/681 [17:53<11:29, 2.66s/it] {'loss': 1.1069, 'grad_norm': 32.47233963012695, 'learning_rate': 1.9151387954958792e-07, 'fcm_dpo/beta': 0.001614258624613285, 'fcm_dpo/q_t': 0.4094482660293579, 'fcm_dpo/delta': 0.01060008816421032, 'fcm_dpo/margin': 241.48162841796875, 'margin_dpo/margin_mean': 241.4816436767578, 'margin_dpo/margin_std': 345.95001220703125, 'logps/chosen': -496.50665283203125, 'logps/rejected': -757.218017578125, 'logps/ref_chosen': -68.6337661743164, 'logps/ref_rejected': -87.86351013183594, 'KL/chosen_KL_mean': -427.87286376953125, 'KL/rejected_KL_mean': -669.3544921875, 'KL/mean': -548.6137084960938, 'KL/std': 294.9280090332031, 'logits/chosen': -0.5928431749343872, 'logits/rejected': -0.5967549681663513, 'epoch': 0.62} + 62%|██████▏ | 422/681 [17:53<11:29, 2.66s/it] 62%|██████▏ | 423/681 [17:55<11:06, 2.58s/it] {'loss': 1.0539, 'grad_norm': 29.974559783935547, 'learning_rate': 1.902669377503756e-07, 'fcm_dpo/beta': 0.001606134930625558, 'fcm_dpo/q_t': 0.39851221442222595, 'fcm_dpo/delta': -0.038889989256858826, 'fcm_dpo/margin': 272.19378662109375, 'margin_dpo/margin_mean': 272.19378662109375, 'margin_dpo/margin_std': 310.67779541015625, 'logps/chosen': -453.657958984375, 'logps/rejected': -757.16796875, 'logps/ref_chosen': -54.99030303955078, 'logps/ref_rejected': -86.30654907226562, 'KL/chosen_KL_mean': -398.66766357421875, 'KL/rejected_KL_mean': -670.8614501953125, 'KL/mean': -534.7645263671875, 'KL/std': 283.50732421875, 'logits/chosen': -0.5618699789047241, 'logits/rejected': -0.5707763433456421, 'epoch': 0.62} + 62%|██████▏ | 423/681 [17:55<11:06, 2.58s/it] 62%|██████▏ | 424/681 [17:58<11:14, 2.63s/it] {'loss': 1.0959, 'grad_norm': 31.341785430908203, 'learning_rate': 1.890215699729057e-07, 'fcm_dpo/beta': 0.0015977869043126702, 'fcm_dpo/q_t': 0.41002586483955383, 'fcm_dpo/delta': 0.005315911024808884, 'fcm_dpo/margin': 246.94143676757812, 'margin_dpo/margin_mean': 246.94146728515625, 'margin_dpo/margin_std': 331.605712890625, 'logps/chosen': -418.4310607910156, 'logps/rejected': -675.839599609375, 'logps/ref_chosen': -56.01192092895508, 'logps/ref_rejected': -66.47896575927734, 'KL/chosen_KL_mean': -362.41912841796875, 'KL/rejected_KL_mean': -609.360595703125, 'KL/mean': -485.88983154296875, 'KL/std': 279.98773193359375, 'logits/chosen': -0.5942381620407104, 'logits/rejected': -0.574604332447052, 'epoch': 0.62} + 62%|██████▏ | 424/681 [17:58<11:14, 2.63s/it] 62%|██████▏ | 425/681 [18:01<11:03, 2.59s/it] {'loss': 1.1678, 'grad_norm': 32.349361419677734, 'learning_rate': 1.8777780903377732e-07, 'fcm_dpo/beta': 0.001631318125873804, 'fcm_dpo/q_t': 0.4265892803668976, 'fcm_dpo/delta': 0.09219174087047577, 'fcm_dpo/margin': 190.22943115234375, 'margin_dpo/margin_mean': 190.22943115234375, 'margin_dpo/margin_std': 325.59716796875, 'logps/chosen': -446.402099609375, 'logps/rejected': -685.68798828125, 'logps/ref_chosen': -46.86899948120117, 'logps/ref_rejected': -95.92545318603516, 'KL/chosen_KL_mean': -399.5330810546875, 'KL/rejected_KL_mean': -589.7625732421875, 'KL/mean': -494.6478271484375, 'KL/std': 262.13092041015625, 'logits/chosen': -0.5598398447036743, 'logits/rejected': -0.5601568818092346, 'epoch': 0.62} + 62%|██████▏ | 425/681 [18:01<11:03, 2.59s/it] 63%|██████▎ | 426/681 [18:03<11:06, 2.61s/it] {'loss': 1.0895, 'grad_norm': 29.509531021118164, 'learning_rate': 1.8653568770724803e-07, 'fcm_dpo/beta': 0.0016432944685220718, 'fcm_dpo/q_t': 0.4079374670982361, 'fcm_dpo/delta': 0.005093574523925781, 'fcm_dpo/margin': 240.1967315673828, 'margin_dpo/margin_mean': 240.19671630859375, 'margin_dpo/margin_std': 298.68304443359375, 'logps/chosen': -444.57330322265625, 'logps/rejected': -689.4530639648438, 'logps/ref_chosen': -76.58354187011719, 'logps/ref_rejected': -81.26658630371094, 'KL/chosen_KL_mean': -367.98974609375, 'KL/rejected_KL_mean': -608.1864624023438, 'KL/mean': -488.088134765625, 'KL/std': 268.52386474609375, 'logits/chosen': -0.6483026742935181, 'logits/rejected': -0.6253814697265625, 'epoch': 0.63} + 63%|██████▎ | 426/681 [18:03<11:06, 2.61s/it] 63%|██████▎ | 427/681 [18:06<11:03, 2.61s/it] {'loss': 1.1632, 'grad_norm': 24.951610565185547, 'learning_rate': 1.8529523872436977e-07, 'fcm_dpo/beta': 0.00165902404114604, 'fcm_dpo/q_t': 0.4308916926383972, 'fcm_dpo/delta': 0.10449196398258209, 'fcm_dpo/margin': 180.07781982421875, 'margin_dpo/margin_mean': 180.07781982421875, 'margin_dpo/margin_std': 284.20269775390625, 'logps/chosen': -411.4693603515625, 'logps/rejected': -605.25927734375, 'logps/ref_chosen': -64.8538818359375, 'logps/ref_rejected': -78.5660171508789, 'KL/chosen_KL_mean': -346.615478515625, 'KL/rejected_KL_mean': -526.6932373046875, 'KL/mean': -436.65435791015625, 'KL/std': 234.79974365234375, 'logits/chosen': -0.5918477177619934, 'logits/rejected': -0.5726908445358276, 'epoch': 0.63} + 63%|██████▎ | 427/681 [18:06<11:03, 2.61s/it] 63%|██████▎ | 428/681 [18:09<11:05, 2.63s/it] {'loss': 1.0964, 'grad_norm': 30.45539665222168, 'learning_rate': 1.8405649477212697e-07, 'fcm_dpo/beta': 0.0016601982060819864, 'fcm_dpo/q_t': 0.4037541151046753, 'fcm_dpo/delta': -0.02208590693771839, 'fcm_dpo/margin': 253.5104217529297, 'margin_dpo/margin_mean': 253.5104217529297, 'margin_dpo/margin_std': 367.505859375, 'logps/chosen': -480.3148498535156, 'logps/rejected': -774.470458984375, 'logps/ref_chosen': -62.63666534423828, 'logps/ref_rejected': -103.28181457519531, 'KL/chosen_KL_mean': -417.6781921386719, 'KL/rejected_KL_mean': -671.1885986328125, 'KL/mean': -544.4334106445312, 'KL/std': 306.112060546875, 'logits/chosen': -0.5910390615463257, 'logits/rejected': -0.5962928533554077, 'epoch': 0.63} + 63%|██████▎ | 428/681 [18:09<11:05, 2.63s/it] 63%|██████▎ | 429/681 [18:11<11:01, 2.62s/it] {'loss': 1.172, 'grad_norm': 33.337589263916016, 'learning_rate': 1.828194884925749e-07, 'fcm_dpo/beta': 0.0016591004095971584, 'fcm_dpo/q_t': 0.426498144865036, 'fcm_dpo/delta': -0.028070662170648575, 'fcm_dpo/margin': 192.28273010253906, 'margin_dpo/margin_mean': 192.28273010253906, 'margin_dpo/margin_std': 336.96649169921875, 'logps/chosen': -504.73358154296875, 'logps/rejected': -707.5772094726562, 'logps/ref_chosen': -81.23401641845703, 'logps/ref_rejected': -91.79493713378906, 'KL/chosen_KL_mean': -423.49957275390625, 'KL/rejected_KL_mean': -615.7822875976562, 'KL/mean': -519.6409301757812, 'KL/std': 274.84283447265625, 'logits/chosen': -0.5892548561096191, 'logits/rejected': -0.5679141283035278, 'epoch': 0.63} + 63%|██████▎ | 429/681 [18:11<11:01, 2.62s/it] 63%|██████▎ | 430/681 [18:14<11:06, 2.66s/it] {'loss': 1.129, 'grad_norm': 27.670103073120117, 'learning_rate': 1.8158425248197928e-07, 'fcm_dpo/beta': 0.0016736264806240797, 'fcm_dpo/q_t': 0.42213696241378784, 'fcm_dpo/delta': 0.06572603434324265, 'fcm_dpo/margin': 200.95074462890625, 'margin_dpo/margin_mean': 200.95074462890625, 'margin_dpo/margin_std': 278.2074890136719, 'logps/chosen': -403.9998779296875, 'logps/rejected': -648.453125, 'logps/ref_chosen': -60.920326232910156, 'logps/ref_rejected': -104.42280578613281, 'KL/chosen_KL_mean': -343.0795593261719, 'KL/rejected_KL_mean': -544.0302734375, 'KL/mean': -443.554931640625, 'KL/std': 249.0330047607422, 'logits/chosen': -0.5829579830169678, 'logits/rejected': -0.5811977386474609, 'epoch': 0.63} + 63%|██████▎ | 430/681 [18:14<11:06, 2.66s/it] 63%|██████▎ | 431/681 [18:17<11:05, 2.66s/it] {'loss': 1.0172, 'grad_norm': 23.38682746887207, 'learning_rate': 1.8035081928995788e-07, 'fcm_dpo/beta': 0.001651083119213581, 'fcm_dpo/q_t': 0.3859713673591614, 'fcm_dpo/delta': -0.09401773661375046, 'fcm_dpo/margin': 296.34014892578125, 'margin_dpo/margin_mean': 296.3401794433594, 'margin_dpo/margin_std': 303.3402404785156, 'logps/chosen': -378.001708984375, 'logps/rejected': -709.8333740234375, 'logps/ref_chosen': -57.34874725341797, 'logps/ref_rejected': -92.84022521972656, 'KL/chosen_KL_mean': -320.6529541015625, 'KL/rejected_KL_mean': -616.9931640625, 'KL/mean': -468.82305908203125, 'KL/std': 276.59454345703125, 'logits/chosen': -0.5685824751853943, 'logits/rejected': -0.5741355419158936, 'epoch': 0.63} + 63%|██████▎ | 431/681 [18:17<11:05, 2.66s/it] 63%|██████▎ | 432/681 [18:19<11:08, 2.69s/it] {'loss': 1.0337, 'grad_norm': 41.92903518676758, 'learning_rate': 1.791192214186223e-07, 'fcm_dpo/beta': 0.0016406788490712643, 'fcm_dpo/q_t': 0.3939441442489624, 'fcm_dpo/delta': -0.0591546930372715, 'fcm_dpo/margin': 277.9825439453125, 'margin_dpo/margin_mean': 277.9825134277344, 'margin_dpo/margin_std': 276.3160400390625, 'logps/chosen': -388.929443359375, 'logps/rejected': -694.4166870117188, 'logps/ref_chosen': -71.07479095458984, 'logps/ref_rejected': -98.57952880859375, 'KL/chosen_KL_mean': -317.8546447753906, 'KL/rejected_KL_mean': -595.837158203125, 'KL/mean': -456.84588623046875, 'KL/std': 269.2180480957031, 'logits/chosen': -0.531327486038208, 'logits/rejected': -0.520300030708313, 'epoch': 0.63} + 63%|██████▎ | 432/681 [18:19<11:08, 2.69s/it] 64%|██████▎ | 433/681 [18:22<11:00, 2.66s/it] {'loss': 1.1672, 'grad_norm': 35.29652404785156, 'learning_rate': 1.7788949132172193e-07, 'fcm_dpo/beta': 0.0016517346957698464, 'fcm_dpo/q_t': 0.4271540939807892, 'fcm_dpo/delta': 0.09607505798339844, 'fcm_dpo/margin': 185.67514038085938, 'margin_dpo/margin_mean': 185.67514038085938, 'margin_dpo/margin_std': 311.87078857421875, 'logps/chosen': -461.48541259765625, 'logps/rejected': -684.8382568359375, 'logps/ref_chosen': -58.273193359375, 'logps/ref_rejected': -95.95089721679688, 'KL/chosen_KL_mean': -403.21221923828125, 'KL/rejected_KL_mean': -588.8873291015625, 'KL/mean': -496.0498046875, 'KL/std': 261.9140625, 'logits/chosen': -0.5547606945037842, 'logits/rejected': -0.541266679763794, 'epoch': 0.64} + 64%|██████▎ | 433/681 [18:22<11:00, 2.66s/it] 64%|██████▎ | 434/681 [18:24<10:51, 2.64s/it] {'loss': 1.1177, 'grad_norm': 25.378862380981445, 'learning_rate': 1.7666166140378853e-07, 'fcm_dpo/beta': 0.0016591593157500029, 'fcm_dpo/q_t': 0.4180990159511566, 'fcm_dpo/delta': 0.02848285809159279, 'fcm_dpo/margin': 224.49386596679688, 'margin_dpo/margin_mean': 224.49386596679688, 'margin_dpo/margin_std': 339.36627197265625, 'logps/chosen': -405.965087890625, 'logps/rejected': -646.98388671875, 'logps/ref_chosen': -61.97370147705078, 'logps/ref_rejected': -78.49861145019531, 'KL/chosen_KL_mean': -343.99139404296875, 'KL/rejected_KL_mean': -568.4853515625, 'KL/mean': -456.23834228515625, 'KL/std': 267.9556579589844, 'logits/chosen': -0.5715805292129517, 'logits/rejected': -0.5707394480705261, 'epoch': 0.64} + 64%|██████▎ | 434/681 [18:25<10:51, 2.64s/it] 64%|██████▍ | 435/681 [18:27<10:20, 2.52s/it] {'loss': 1.0788, 'grad_norm': 25.661197662353516, 'learning_rate': 1.7543576401928218e-07, 'fcm_dpo/beta': 0.00166351068764925, 'fcm_dpo/q_t': 0.40512967109680176, 'fcm_dpo/delta': -0.007664802018553019, 'fcm_dpo/margin': 244.86380004882812, 'margin_dpo/margin_mean': 244.86380004882812, 'margin_dpo/margin_std': 291.82879638671875, 'logps/chosen': -362.57965087890625, 'logps/rejected': -643.50830078125, 'logps/ref_chosen': -51.502052307128906, 'logps/ref_rejected': -87.56689453125, 'KL/chosen_KL_mean': -311.0776062011719, 'KL/rejected_KL_mean': -555.94140625, 'KL/mean': -433.509521484375, 'KL/std': 261.578857421875, 'logits/chosen': -0.5694348812103271, 'logits/rejected': -0.559348464012146, 'epoch': 0.64} + 64%|██████▍ | 435/681 [18:27<10:20, 2.52s/it] 64%|██████▍ | 436/681 [18:29<10:26, 2.56s/it] {'loss': 1.1115, 'grad_norm': 39.46367263793945, 'learning_rate': 1.742118314717391e-07, 'fcm_dpo/beta': 0.001671030418947339, 'fcm_dpo/q_t': 0.41665488481521606, 'fcm_dpo/delta': 0.04468690603971481, 'fcm_dpo/margin': 213.5735321044922, 'margin_dpo/margin_mean': 213.57354736328125, 'margin_dpo/margin_std': 275.3209533691406, 'logps/chosen': -398.3883972167969, 'logps/rejected': -623.2860107421875, 'logps/ref_chosen': -71.40371704101562, 'logps/ref_rejected': -82.72775268554688, 'KL/chosen_KL_mean': -326.98468017578125, 'KL/rejected_KL_mean': -540.5582275390625, 'KL/mean': -433.77142333984375, 'KL/std': 234.86660766601562, 'logits/chosen': -0.5771512985229492, 'logits/rejected': -0.5507988929748535, 'epoch': 0.64} + 64%|██████▍ | 436/681 [18:29<10:26, 2.56s/it] 64%|██████▍ | 437/681 [18:32<10:35, 2.60s/it] {'loss': 1.0967, 'grad_norm': 25.351360321044922, 'learning_rate': 1.7298989601292036e-07, 'fcm_dpo/beta': 0.0016848563682287931, 'fcm_dpo/q_t': 0.41250330209732056, 'fcm_dpo/delta': 0.028988715261220932, 'fcm_dpo/margin': 220.84902954101562, 'margin_dpo/margin_mean': 220.84902954101562, 'margin_dpo/margin_std': 267.39385986328125, 'logps/chosen': -396.5240478515625, 'logps/rejected': -634.67236328125, 'logps/ref_chosen': -64.7442626953125, 'logps/ref_rejected': -82.04356384277344, 'KL/chosen_KL_mean': -331.77978515625, 'KL/rejected_KL_mean': -552.6287841796875, 'KL/mean': -442.20428466796875, 'KL/std': 225.34506225585938, 'logits/chosen': -0.5720341205596924, 'logits/rejected': -0.5498570203781128, 'epoch': 0.64} + 64%|██████▍ | 437/681 [18:32<10:35, 2.60s/it] 64%|██████▍ | 438/681 [18:34<10:18, 2.55s/it] {'loss': 1.0567, 'grad_norm': 33.649723052978516, 'learning_rate': 1.7176998984196144e-07, 'fcm_dpo/beta': 0.0016751789953559637, 'fcm_dpo/q_t': 0.3999551236629486, 'fcm_dpo/delta': -0.027896108105778694, 'fcm_dpo/margin': 254.47047424316406, 'margin_dpo/margin_mean': 254.470458984375, 'margin_dpo/margin_std': 276.5482177734375, 'logps/chosen': -400.93927001953125, 'logps/rejected': -679.4678955078125, 'logps/ref_chosen': -59.0186653137207, 'logps/ref_rejected': -83.07682800292969, 'KL/chosen_KL_mean': -341.92059326171875, 'KL/rejected_KL_mean': -596.3910522460938, 'KL/mean': -469.1558532714844, 'KL/std': 260.28424072265625, 'logits/chosen': -0.5674476623535156, 'logits/rejected': -0.5483890771865845, 'epoch': 0.64} + 64%|██████▍ | 438/681 [18:35<10:18, 2.55s/it] 64%|██████▍ | 439/681 [18:37<10:07, 2.51s/it] {'loss': 1.1355, 'grad_norm': 28.397993087768555, 'learning_rate': 1.7055214510452458e-07, 'fcm_dpo/beta': 0.0016563256504014134, 'fcm_dpo/q_t': 0.4195774793624878, 'fcm_dpo/delta': -0.06954063475131989, 'fcm_dpo/margin': 209.87954711914062, 'margin_dpo/margin_mean': 209.87953186035156, 'margin_dpo/margin_std': 304.0859375, 'logps/chosen': -429.1225280761719, 'logps/rejected': -669.2034912109375, 'logps/ref_chosen': -53.78407669067383, 'logps/ref_rejected': -83.98545837402344, 'KL/chosen_KL_mean': -375.33843994140625, 'KL/rejected_KL_mean': -585.218017578125, 'KL/mean': -480.2781982421875, 'KL/std': 268.84649658203125, 'logits/chosen': -0.6017969846725464, 'logits/rejected': -0.6068276166915894, 'epoch': 0.64} + 64%|██████▍ | 439/681 [18:37<10:07, 2.51s/it] 65%|██████▍ | 440/681 [18:39<09:54, 2.47s/it] {'loss': 1.1007, 'grad_norm': 36.218482971191406, 'learning_rate': 1.6933639389195134e-07, 'fcm_dpo/beta': 0.0016622185939922929, 'fcm_dpo/q_t': 0.41194236278533936, 'fcm_dpo/delta': 0.005979446694254875, 'fcm_dpo/margin': 237.09423828125, 'margin_dpo/margin_mean': 237.09423828125, 'margin_dpo/margin_std': 334.8287658691406, 'logps/chosen': -482.576904296875, 'logps/rejected': -737.6021728515625, 'logps/ref_chosen': -78.56671905517578, 'logps/ref_rejected': -96.49775695800781, 'KL/chosen_KL_mean': -404.01019287109375, 'KL/rejected_KL_mean': -641.1043701171875, 'KL/mean': -522.5572509765625, 'KL/std': 323.501708984375, 'logits/chosen': -0.6418617367744446, 'logits/rejected': -0.638819694519043, 'epoch': 0.65} + 65%|██████▍ | 440/681 [18:39<09:54, 2.47s/it] 65%|██████▍ | 441/681 [18:42<10:04, 2.52s/it] {'loss': 1.1303, 'grad_norm': 46.04979705810547, 'learning_rate': 1.681227682404166e-07, 'fcm_dpo/beta': 0.001664304407313466, 'fcm_dpo/q_t': 0.41193264722824097, 'fcm_dpo/delta': -0.0037491731345653534, 'fcm_dpo/margin': 242.307861328125, 'margin_dpo/margin_mean': 242.307861328125, 'margin_dpo/margin_std': 407.91796875, 'logps/chosen': -523.1796264648438, 'logps/rejected': -801.1338500976562, 'logps/ref_chosen': -60.824440002441406, 'logps/ref_rejected': -96.47080993652344, 'KL/chosen_KL_mean': -462.35516357421875, 'KL/rejected_KL_mean': -704.6630859375, 'KL/mean': -583.5091552734375, 'KL/std': 340.33734130859375, 'logits/chosen': -0.6510436534881592, 'logits/rejected': -0.6406994462013245, 'epoch': 0.65} + 65%|██████▍ | 441/681 [18:42<10:04, 2.52s/it] 65%|██████▍ | 442/681 [18:44<09:51, 2.47s/it] {'loss': 1.0536, 'grad_norm': 35.095680236816406, 'learning_rate': 1.669113001300851e-07, 'fcm_dpo/beta': 0.0016490614507347345, 'fcm_dpo/q_t': 0.3943568170070648, 'fcm_dpo/delta': -0.07931334525346756, 'fcm_dpo/margin': 288.12567138671875, 'margin_dpo/margin_mean': 288.12567138671875, 'margin_dpo/margin_std': 366.7057800292969, 'logps/chosen': -441.97821044921875, 'logps/rejected': -759.6319580078125, 'logps/ref_chosen': -47.01121520996094, 'logps/ref_rejected': -76.53926086425781, 'KL/chosen_KL_mean': -394.96697998046875, 'KL/rejected_KL_mean': -683.0927124023438, 'KL/mean': -539.0298461914062, 'KL/std': 329.10321044921875, 'logits/chosen': -0.6585125923156738, 'logits/rejected': -0.6561766862869263, 'epoch': 0.65} + 65%|██████▍ | 442/681 [18:44<09:51, 2.47s/it] 65%|██████▌ | 443/681 [18:47<09:56, 2.51s/it] {'loss': 1.2126, 'grad_norm': 37.560585021972656, 'learning_rate': 1.6570202148426815e-07, 'fcm_dpo/beta': 0.0016302757430821657, 'fcm_dpo/q_t': 0.4339308440685272, 'fcm_dpo/delta': -0.0021791704930365086, 'fcm_dpo/margin': 181.2572021484375, 'margin_dpo/margin_mean': 181.2572021484375, 'margin_dpo/margin_std': 397.4133605957031, 'logps/chosen': -528.1087646484375, 'logps/rejected': -724.7728271484375, 'logps/ref_chosen': -71.27301788330078, 'logps/ref_rejected': -86.679931640625, 'KL/chosen_KL_mean': -456.835693359375, 'KL/rejected_KL_mean': -638.0928955078125, 'KL/mean': -547.46435546875, 'KL/std': 329.8770751953125, 'logits/chosen': -0.6085466146469116, 'logits/rejected': -0.5840749740600586, 'epoch': 0.65} + 65%|██████▌ | 443/681 [18:47<09:56, 2.51s/it] 65%|██████▌ | 444/681 [18:49<09:53, 2.50s/it] {'loss': 1.0554, 'grad_norm': 26.796432495117188, 'learning_rate': 1.6449496416858282e-07, 'fcm_dpo/beta': 0.0016081281937658787, 'fcm_dpo/q_t': 0.39475017786026, 'fcm_dpo/delta': -0.07197729498147964, 'fcm_dpo/margin': 291.16546630859375, 'margin_dpo/margin_mean': 291.16546630859375, 'margin_dpo/margin_std': 379.3155517578125, 'logps/chosen': -501.4560546875, 'logps/rejected': -832.6627197265625, 'logps/ref_chosen': -57.213706970214844, 'logps/ref_rejected': -97.25489807128906, 'KL/chosen_KL_mean': -444.24237060546875, 'KL/rejected_KL_mean': -735.4078369140625, 'KL/mean': -589.8250732421875, 'KL/std': 349.052978515625, 'logits/chosen': -0.619565486907959, 'logits/rejected': -0.6288525462150574, 'epoch': 0.65} + 65%|██████▌ | 444/681 [18:49<09:53, 2.50s/it] 65%|██████▌ | 445/681 [18:52<10:13, 2.60s/it] {'loss': 1.0649, 'grad_norm': 27.847251892089844, 'learning_rate': 1.6329015999011182e-07, 'fcm_dpo/beta': 0.0015993316192179918, 'fcm_dpo/q_t': 0.3995450437068939, 'fcm_dpo/delta': -0.04321688041090965, 'fcm_dpo/margin': 275.9429931640625, 'margin_dpo/margin_mean': 275.9429931640625, 'margin_dpo/margin_std': 342.8664245605469, 'logps/chosen': -465.8297119140625, 'logps/rejected': -767.1555786132812, 'logps/ref_chosen': -67.29979705810547, 'logps/ref_rejected': -92.68267059326172, 'KL/chosen_KL_mean': -398.5299072265625, 'KL/rejected_KL_mean': -674.472900390625, 'KL/mean': -536.5014038085938, 'KL/std': 279.18701171875, 'logits/chosen': -0.6301474571228027, 'logits/rejected': -0.6212267279624939, 'epoch': 0.65} + 65%|██████▌ | 445/681 [18:52<10:13, 2.60s/it] 65%|██████▌ | 446/681 [18:55<10:11, 2.60s/it] {'loss': 1.0286, 'grad_norm': 30.288881301879883, 'learning_rate': 1.6208764069656578e-07, 'fcm_dpo/beta': 0.0015849031042307615, 'fcm_dpo/q_t': 0.3897179961204529, 'fcm_dpo/delta': -0.07573074102401733, 'fcm_dpo/margin': 297.735595703125, 'margin_dpo/margin_mean': 297.735595703125, 'margin_dpo/margin_std': 308.36199951171875, 'logps/chosen': -422.6663818359375, 'logps/rejected': -762.567626953125, 'logps/ref_chosen': -59.098487854003906, 'logps/ref_rejected': -101.26419067382812, 'KL/chosen_KL_mean': -363.5679016113281, 'KL/rejected_KL_mean': -661.303466796875, 'KL/mean': -512.4356689453125, 'KL/std': 304.2501220703125, 'logits/chosen': -0.655229926109314, 'logits/rejected': -0.6672055721282959, 'epoch': 0.65} + 65%|██████▌ | 446/681 [18:55<10:11, 2.60s/it] 66%|██████▌ | 447/681 [18:57<09:52, 2.53s/it] {'loss': 1.0369, 'grad_norm': 26.699710845947266, 'learning_rate': 1.608874379754465e-07, 'fcm_dpo/beta': 0.001544747268781066, 'fcm_dpo/q_t': 0.3913511037826538, 'fcm_dpo/delta': -0.08775018155574799, 'fcm_dpo/margin': 312.77081298828125, 'margin_dpo/margin_mean': 312.7708435058594, 'margin_dpo/margin_std': 385.6178283691406, 'logps/chosen': -419.28912353515625, 'logps/rejected': -774.6793212890625, 'logps/ref_chosen': -56.07533264160156, 'logps/ref_rejected': -98.69475555419922, 'KL/chosen_KL_mean': -363.2137756347656, 'KL/rejected_KL_mean': -675.984619140625, 'KL/mean': -519.5991821289062, 'KL/std': 346.8345947265625, 'logits/chosen': -0.7068610191345215, 'logits/rejected': -0.7222627401351929, 'epoch': 0.66} + 66%|██████▌ | 447/681 [18:57<09:52, 2.53s/it] 66%|██████▌ | 448/681 [19:00<09:52, 2.54s/it] {'loss': 1.0467, 'grad_norm': 35.73704528808594, 'learning_rate': 1.5968958345321177e-07, 'fcm_dpo/beta': 0.0015353120397776365, 'fcm_dpo/q_t': 0.395630419254303, 'fcm_dpo/delta': -0.05634545907378197, 'fcm_dpo/margin': 295.49896240234375, 'margin_dpo/margin_mean': 295.4989929199219, 'margin_dpo/margin_std': 336.44561767578125, 'logps/chosen': -466.2862243652344, 'logps/rejected': -804.0460205078125, 'logps/ref_chosen': -60.00384521484375, 'logps/ref_rejected': -102.26465606689453, 'KL/chosen_KL_mean': -406.2823791503906, 'KL/rejected_KL_mean': -701.7813720703125, 'KL/mean': -554.0318603515625, 'KL/std': 298.70819091796875, 'logits/chosen': -0.6082560420036316, 'logits/rejected': -0.6136020421981812, 'epoch': 0.66} + 66%|██████▌ | 448/681 [19:00<09:52, 2.54s/it] 66%|██████▌ | 449/681 [19:02<09:48, 2.53s/it] {'loss': 1.0796, 'grad_norm': 28.19297981262207, 'learning_rate': 1.584941086944423e-07, 'fcm_dpo/beta': 0.0015101665630936623, 'fcm_dpo/q_t': 0.40061530470848083, 'fcm_dpo/delta': -0.052754104137420654, 'fcm_dpo/margin': 298.13922119140625, 'margin_dpo/margin_mean': 298.13922119140625, 'margin_dpo/margin_std': 435.361572265625, 'logps/chosen': -480.90472412109375, 'logps/rejected': -800.1142578125, 'logps/ref_chosen': -67.52661895751953, 'logps/ref_rejected': -88.59690856933594, 'KL/chosen_KL_mean': -413.37811279296875, 'KL/rejected_KL_mean': -711.517333984375, 'KL/mean': -562.44775390625, 'KL/std': 366.28466796875, 'logits/chosen': -0.6445102095603943, 'logits/rejected': -0.6385193467140198, 'epoch': 0.66} + 66%|██████▌ | 449/681 [19:02<09:48, 2.53s/it] 66%|██████▌ | 450/681 [19:05<09:46, 2.54s/it] {'loss': 1.0118, 'grad_norm': 47.2825813293457, 'learning_rate': 1.573010452010098e-07, 'fcm_dpo/beta': 0.0014909481396898627, 'fcm_dpo/q_t': 0.386138379573822, 'fcm_dpo/delta': -0.08969271928071976, 'fcm_dpo/margin': 325.58074951171875, 'margin_dpo/margin_mean': 325.58074951171875, 'margin_dpo/margin_std': 320.9717102050781, 'logps/chosen': -395.791015625, 'logps/rejected': -767.0185546875, 'logps/ref_chosen': -57.10811996459961, 'logps/ref_rejected': -102.75494384765625, 'KL/chosen_KL_mean': -338.682861328125, 'KL/rejected_KL_mean': -664.263671875, 'KL/mean': -501.4732666015625, 'KL/std': 325.22833251953125, 'logits/chosen': -0.6737087965011597, 'logits/rejected': -0.685724675655365, 'epoch': 0.66} + 66%|██████▌ | 450/681 [19:05<09:46, 2.54s/it] 66%|██████▌ | 451/681 [19:07<09:22, 2.45s/it] {'loss': 1.1623, 'grad_norm': 33.79815673828125, 'learning_rate': 1.5611042441124687e-07, 'fcm_dpo/beta': 0.0014971659984439611, 'fcm_dpo/q_t': 0.41749513149261475, 'fcm_dpo/delta': 0.051308851689100266, 'fcm_dpo/margin': 233.9193878173828, 'margin_dpo/margin_mean': 233.91940307617188, 'margin_dpo/margin_std': 428.302490234375, 'logps/chosen': -512.8397827148438, 'logps/rejected': -761.2197265625, 'logps/ref_chosen': -58.46883010864258, 'logps/ref_rejected': -72.92941284179688, 'KL/chosen_KL_mean': -454.3709716796875, 'KL/rejected_KL_mean': -688.2903442382812, 'KL/mean': -571.3306274414062, 'KL/std': 374.0126647949219, 'logits/chosen': -0.7189067602157593, 'logits/rejected': -0.6965080499649048, 'epoch': 0.66} + 66%|██████▌ | 451/681 [19:07<09:22, 2.45s/it] 66%|██████▋ | 452/681 [19:09<09:19, 2.44s/it] {'loss': 1.0559, 'grad_norm': 25.158477783203125, 'learning_rate': 1.549222776991186e-07, 'fcm_dpo/beta': 0.0014898786321282387, 'fcm_dpo/q_t': 0.40081116557121277, 'fcm_dpo/delta': -0.021011171862483025, 'fcm_dpo/margin': 281.9278869628906, 'margin_dpo/margin_mean': 281.9278564453125, 'margin_dpo/margin_std': 295.3541564941406, 'logps/chosen': -371.70745849609375, 'logps/rejected': -701.0162353515625, 'logps/ref_chosen': -50.39055252075195, 'logps/ref_rejected': -97.77142333984375, 'KL/chosen_KL_mean': -321.31689453125, 'KL/rejected_KL_mean': -603.2447509765625, 'KL/mean': -462.2808532714844, 'KL/std': 290.2576904296875, 'logits/chosen': -0.6077337265014648, 'logits/rejected': -0.6246554851531982, 'epoch': 0.66} + 66%|██████▋ | 452/681 [19:10<09:19, 2.44s/it] 67%|██████▋ | 453/681 [19:12<09:07, 2.40s/it] {'loss': 1.0931, 'grad_norm': 26.13146209716797, 'learning_rate': 1.5373663637339584e-07, 'fcm_dpo/beta': 0.0014873708132654428, 'fcm_dpo/q_t': 0.40999874472618103, 'fcm_dpo/delta': 0.0034573376178741455, 'fcm_dpo/margin': 266.5997314453125, 'margin_dpo/margin_mean': 266.5997619628906, 'margin_dpo/margin_std': 356.6203918457031, 'logps/chosen': -432.453125, 'logps/rejected': -723.54541015625, 'logps/ref_chosen': -57.71485137939453, 'logps/ref_rejected': -82.20741271972656, 'KL/chosen_KL_mean': -374.7382507324219, 'KL/rejected_KL_mean': -641.3380126953125, 'KL/mean': -508.03814697265625, 'KL/std': 293.4873046875, 'logits/chosen': -0.6614656448364258, 'logits/rejected': -0.6468169689178467, 'epoch': 0.67} + 67%|██████▋ | 453/681 [19:12<09:07, 2.40s/it] 67%|██████▋ | 454/681 [19:14<09:10, 2.43s/it] {'loss': 1.0611, 'grad_norm': 28.11908721923828, 'learning_rate': 1.5255353167683017e-07, 'fcm_dpo/beta': 0.0014775395393371582, 'fcm_dpo/q_t': 0.39838463068008423, 'fcm_dpo/delta': -0.047248564660549164, 'fcm_dpo/margin': 301.10711669921875, 'margin_dpo/margin_mean': 301.10711669921875, 'margin_dpo/margin_std': 380.62457275390625, 'logps/chosen': -511.26422119140625, 'logps/rejected': -836.37646484375, 'logps/ref_chosen': -60.945648193359375, 'logps/ref_rejected': -84.95079040527344, 'KL/chosen_KL_mean': -450.31854248046875, 'KL/rejected_KL_mean': -751.4256591796875, 'KL/mean': -600.8720703125, 'KL/std': 340.4393005371094, 'logits/chosen': -0.697075605392456, 'logits/rejected': -0.6863827109336853, 'epoch': 0.67} + 67%|██████▋ | 454/681 [19:14<09:10, 2.43s/it] 67%|██████▋ | 455/681 [19:17<09:04, 2.41s/it] {'loss': 1.0253, 'grad_norm': 37.92613983154297, 'learning_rate': 1.5137299478533064e-07, 'fcm_dpo/beta': 0.0014633602695539594, 'fcm_dpo/q_t': 0.38579294085502625, 'fcm_dpo/delta': -0.11007063835859299, 'fcm_dpo/margin': 344.6717224121094, 'margin_dpo/margin_mean': 344.6717224121094, 'margin_dpo/margin_std': 396.2593078613281, 'logps/chosen': -429.02349853515625, 'logps/rejected': -844.1099853515625, 'logps/ref_chosen': -44.88671112060547, 'logps/ref_rejected': -115.30147552490234, 'KL/chosen_KL_mean': -384.13677978515625, 'KL/rejected_KL_mean': -728.8084716796875, 'KL/mean': -556.47265625, 'KL/std': 345.8653869628906, 'logits/chosen': -0.6768559217453003, 'logits/rejected': -0.6982386708259583, 'epoch': 0.67} + 67%|██████▋ | 455/681 [19:17<09:04, 2.41s/it] 67%|██████▋ | 456/681 [19:19<09:14, 2.47s/it] {'loss': 1.015, 'grad_norm': 26.16177749633789, 'learning_rate': 1.5019505680714232e-07, 'fcm_dpo/beta': 0.0014285333454608917, 'fcm_dpo/q_t': 0.3888140320777893, 'fcm_dpo/delta': -0.08655368536710739, 'fcm_dpo/margin': 337.71661376953125, 'margin_dpo/margin_mean': 337.71661376953125, 'margin_dpo/margin_std': 344.4359130859375, 'logps/chosen': -464.1832275390625, 'logps/rejected': -850.0808715820312, 'logps/ref_chosen': -57.036781311035156, 'logps/ref_rejected': -105.21784210205078, 'KL/chosen_KL_mean': -407.1464538574219, 'KL/rejected_KL_mean': -744.863037109375, 'KL/mean': -576.0047607421875, 'KL/std': 354.4407043457031, 'logits/chosen': -0.6761212348937988, 'logits/rejected': -0.6974040865898132, 'epoch': 0.67} + 67%|██████▋ | 456/681 [19:19<09:14, 2.47s/it] 67%|██████▋ | 457/681 [19:22<09:27, 2.54s/it] {'loss': 1.0173, 'grad_norm': 28.89864158630371, 'learning_rate': 1.4901974878202627e-07, 'fcm_dpo/beta': 0.0014012358151376247, 'fcm_dpo/q_t': 0.3888044059276581, 'fcm_dpo/delta': -0.07646898925304413, 'fcm_dpo/margin': 337.23883056640625, 'margin_dpo/margin_mean': 337.23883056640625, 'margin_dpo/margin_std': 322.967529296875, 'logps/chosen': -447.197998046875, 'logps/rejected': -815.3038330078125, 'logps/ref_chosen': -54.24253845214844, 'logps/ref_rejected': -85.10956573486328, 'KL/chosen_KL_mean': -392.9554443359375, 'KL/rejected_KL_mean': -730.1942138671875, 'KL/mean': -561.5748291015625, 'KL/std': 337.5885009765625, 'logits/chosen': -0.6724662780761719, 'logits/rejected': -0.674906849861145, 'epoch': 0.67} + 67%|██████▋ | 457/681 [19:22<09:27, 2.54s/it] 67%|██████▋ | 458/681 [19:24<09:23, 2.53s/it] {'loss': 1.0629, 'grad_norm': 22.595535278320312, 'learning_rate': 1.4784710168044212e-07, 'fcm_dpo/beta': 0.0013883748324587941, 'fcm_dpo/q_t': 0.40227359533309937, 'fcm_dpo/delta': -0.02694622240960598, 'fcm_dpo/margin': 306.4158935546875, 'margin_dpo/margin_mean': 306.4158935546875, 'margin_dpo/margin_std': 356.400634765625, 'logps/chosen': -454.5318908691406, 'logps/rejected': -803.22216796875, 'logps/ref_chosen': -55.40888214111328, 'logps/ref_rejected': -97.68325805664062, 'KL/chosen_KL_mean': -399.1230163574219, 'KL/rejected_KL_mean': -705.5389404296875, 'KL/mean': -552.3309326171875, 'KL/std': 313.0858154296875, 'logits/chosen': -0.6672220826148987, 'logits/rejected': -0.6631453633308411, 'epoch': 0.67} + 67%|██████▋ | 458/681 [19:24<09:23, 2.53s/it] 67%|██████▋ | 459/681 [19:27<09:22, 2.54s/it] {'loss': 1.0531, 'grad_norm': 31.071313858032227, 'learning_rate': 1.466771464027316e-07, 'fcm_dpo/beta': 0.0013751968508586287, 'fcm_dpo/q_t': 0.39526090025901794, 'fcm_dpo/delta': -0.05920097231864929, 'fcm_dpo/margin': 331.68365478515625, 'margin_dpo/margin_mean': 331.6836853027344, 'margin_dpo/margin_std': 400.57806396484375, 'logps/chosen': -489.1966247558594, 'logps/rejected': -860.4913940429688, 'logps/ref_chosen': -46.55748748779297, 'logps/ref_rejected': -86.16854095458984, 'KL/chosen_KL_mean': -442.6391296386719, 'KL/rejected_KL_mean': -774.3228759765625, 'KL/mean': -608.4810180664062, 'KL/std': 361.2178955078125, 'logits/chosen': -0.6531388759613037, 'logits/rejected': -0.6709892749786377, 'epoch': 0.67} + 67%|██████▋ | 459/681 [19:27<09:22, 2.54s/it] 68%|██████▊ | 460/681 [19:30<09:29, 2.58s/it] {'loss': 1.022, 'grad_norm': 46.59115982055664, 'learning_rate': 1.4550991377830423e-07, 'fcm_dpo/beta': 0.0013584838015958667, 'fcm_dpo/q_t': 0.38839712738990784, 'fcm_dpo/delta': -0.08933592587709427, 'fcm_dpo/margin': 357.0634765625, 'margin_dpo/margin_mean': 357.0635070800781, 'margin_dpo/margin_std': 387.8919677734375, 'logps/chosen': -536.249267578125, 'logps/rejected': -945.7972412109375, 'logps/ref_chosen': -51.63489532470703, 'logps/ref_rejected': -104.11935424804688, 'KL/chosen_KL_mean': -484.61431884765625, 'KL/rejected_KL_mean': -841.6778564453125, 'KL/mean': -663.1461181640625, 'KL/std': 356.38165283203125, 'logits/chosen': -0.7092480063438416, 'logits/rejected': -0.7396787405014038, 'epoch': 0.68} + 68%|██████▊ | 460/681 [19:30<09:29, 2.58s/it] 68%|██████▊ | 461/681 [19:32<09:27, 2.58s/it] {'loss': 1.1072, 'grad_norm': 28.806053161621094, 'learning_rate': 1.4434543456482518e-07, 'fcm_dpo/beta': 0.0013587003340944648, 'fcm_dpo/q_t': 0.4131912589073181, 'fcm_dpo/delta': 0.017041990533471107, 'fcm_dpo/margin': 282.1562805175781, 'margin_dpo/margin_mean': 282.15625, 'margin_dpo/margin_std': 399.5581359863281, 'logps/chosen': -575.4599609375, 'logps/rejected': -888.9111328125, 'logps/ref_chosen': -55.18195724487305, 'logps/ref_rejected': -86.47689819335938, 'KL/chosen_KL_mean': -520.2779541015625, 'KL/rejected_KL_mean': -802.4342651367188, 'KL/mean': -661.3561401367188, 'KL/std': 363.57623291015625, 'logits/chosen': -0.7396203279495239, 'logits/rejected': -0.7533408999443054, 'epoch': 0.68} + 68%|██████▊ | 461/681 [19:32<09:27, 2.58s/it] 68%|██████▊ | 462/681 [19:35<09:17, 2.55s/it] {'loss': 1.1717, 'grad_norm': 41.23543930053711, 'learning_rate': 1.4318373944740484e-07, 'fcm_dpo/beta': 0.001372592058032751, 'fcm_dpo/q_t': 0.4292982220649719, 'fcm_dpo/delta': 0.08686043322086334, 'fcm_dpo/margin': 230.16461181640625, 'margin_dpo/margin_mean': 230.1645965576172, 'margin_dpo/margin_std': 417.91949462890625, 'logps/chosen': -615.3565673828125, 'logps/rejected': -854.4342041015625, 'logps/ref_chosen': -69.92803192138672, 'logps/ref_rejected': -78.84111022949219, 'KL/chosen_KL_mean': -545.4285888671875, 'KL/rejected_KL_mean': -775.5931396484375, 'KL/mean': -660.5108642578125, 'KL/std': 371.68798828125, 'logits/chosen': -0.8299468755722046, 'logits/rejected': -0.8264528512954712, 'epoch': 0.68} + 68%|██████▊ | 462/681 [19:35<09:17, 2.55s/it] 68%|██████▊ | 463/681 [19:37<09:07, 2.51s/it] {'loss': 1.0942, 'grad_norm': 37.95475387573242, 'learning_rate': 1.4202485903778976e-07, 'fcm_dpo/beta': 0.0013804540503770113, 'fcm_dpo/q_t': 0.4052172899246216, 'fcm_dpo/delta': -0.025950342416763306, 'fcm_dpo/margin': 307.53863525390625, 'margin_dpo/margin_mean': 307.53863525390625, 'margin_dpo/margin_std': 443.52276611328125, 'logps/chosen': -603.603271484375, 'logps/rejected': -944.8925170898438, 'logps/ref_chosen': -55.27437210083008, 'logps/ref_rejected': -89.02497863769531, 'KL/chosen_KL_mean': -548.3289184570312, 'KL/rejected_KL_mean': -855.8675537109375, 'KL/mean': -702.0982055664062, 'KL/std': 388.29705810546875, 'logits/chosen': -0.8180972337722778, 'logits/rejected': -0.8314469456672668, 'epoch': 0.68} + 68%|██████▊ | 463/681 [19:37<09:07, 2.51s/it] 68%|██████▊ | 464/681 [19:39<08:49, 2.44s/it] {'loss': 0.9447, 'grad_norm': 43.35410690307617, 'learning_rate': 1.4086882387355658e-07, 'fcm_dpo/beta': 0.0013158408692106605, 'fcm_dpo/q_t': 0.359811931848526, 'fcm_dpo/delta': -0.23714584112167358, 'fcm_dpo/margin': 471.21575927734375, 'margin_dpo/margin_mean': 471.21575927734375, 'margin_dpo/margin_std': 474.4074401855469, 'logps/chosen': -595.7581176757812, 'logps/rejected': -1118.551025390625, 'logps/ref_chosen': -50.91230010986328, 'logps/ref_rejected': -102.4893798828125, 'KL/chosen_KL_mean': -544.8458251953125, 'KL/rejected_KL_mean': -1016.0616455078125, 'KL/mean': -780.4537353515625, 'KL/std': 460.05206298828125, 'logits/chosen': -0.7928054332733154, 'logits/rejected': -0.8558509945869446, 'epoch': 0.68} + 68%|██████▊ | 464/681 [19:39<08:49, 2.44s/it] 68%|██████▊ | 465/681 [19:42<08:54, 2.48s/it] {'loss': 1.0584, 'grad_norm': 57.58442306518555, 'learning_rate': 1.3971566441730714e-07, 'fcm_dpo/beta': 0.0012953910045325756, 'fcm_dpo/q_t': 0.38684460520744324, 'fcm_dpo/delta': -0.09291453659534454, 'fcm_dpo/margin': 377.061279296875, 'margin_dpo/margin_mean': 377.061279296875, 'margin_dpo/margin_std': 511.11419677734375, 'logps/chosen': -634.7177734375, 'logps/rejected': -1065.608154296875, 'logps/ref_chosen': -60.116851806640625, 'logps/ref_rejected': -113.94602966308594, 'KL/chosen_KL_mean': -574.6008911132812, 'KL/rejected_KL_mean': -951.6621704101562, 'KL/mean': -763.1315307617188, 'KL/std': 477.3629150390625, 'logits/chosen': -0.7876610159873962, 'logits/rejected': -0.807873547077179, 'epoch': 0.68} + 68%|██████▊ | 465/681 [19:42<08:54, 2.48s/it] 68%|██████▊ | 466/681 [19:45<09:02, 2.52s/it] {'loss': 1.0906, 'grad_norm': 33.484703063964844, 'learning_rate': 1.3856541105586545e-07, 'fcm_dpo/beta': 0.0012718967627733946, 'fcm_dpo/q_t': 0.4008026123046875, 'fcm_dpo/delta': -0.03685159608721733, 'fcm_dpo/margin': 341.7809753417969, 'margin_dpo/margin_mean': 341.7809753417969, 'margin_dpo/margin_std': 492.47955322265625, 'logps/chosen': -666.3096923828125, 'logps/rejected': -1045.4852294921875, 'logps/ref_chosen': -52.920921325683594, 'logps/ref_rejected': -90.3154296875, 'KL/chosen_KL_mean': -613.3887939453125, 'KL/rejected_KL_mean': -955.1697998046875, 'KL/mean': -784.2792358398438, 'KL/std': 442.28924560546875, 'logits/chosen': -0.8349906206130981, 'logits/rejected': -0.843805193901062, 'epoch': 0.68} + 68%|██████▊ | 466/681 [19:45<09:02, 2.52s/it] 69%|██████▊ | 467/681 [19:47<08:56, 2.51s/it] {'loss': 1.1488, 'grad_norm': 52.08469009399414, 'learning_rate': 1.3741809409947729e-07, 'fcm_dpo/beta': 0.00125328847207129, 'fcm_dpo/q_t': 0.4034256041049957, 'fcm_dpo/delta': -0.05458660423755646, 'fcm_dpo/margin': 359.0608825683594, 'margin_dpo/margin_mean': 359.0609130859375, 'margin_dpo/margin_std': 667.78173828125, 'logps/chosen': -853.5452880859375, 'logps/rejected': -1236.7506103515625, 'logps/ref_chosen': -78.7158203125, 'logps/ref_rejected': -102.86019897460938, 'KL/chosen_KL_mean': -774.8294677734375, 'KL/rejected_KL_mean': -1133.890380859375, 'KL/mean': -954.3599243164062, 'KL/std': 575.3023681640625, 'logits/chosen': -0.9275529384613037, 'logits/rejected': -0.9244056940078735, 'epoch': 0.69} + 69%|██████▊ | 467/681 [19:47<08:56, 2.51s/it] 69%|██████▊ | 468/681 [19:50<09:02, 2.55s/it] {'loss': 1.0164, 'grad_norm': 44.7249641418457, 'learning_rate': 1.362737437810114e-07, 'fcm_dpo/beta': 0.0012304207775741816, 'fcm_dpo/q_t': 0.3800439238548279, 'fcm_dpo/delta': -0.16281697154045105, 'fcm_dpo/margin': 450.0086364746094, 'margin_dpo/margin_mean': 450.0086364746094, 'margin_dpo/margin_std': 588.6818237304688, 'logps/chosen': -677.3187255859375, 'logps/rejected': -1158.4208984375, 'logps/ref_chosen': -69.93536376953125, 'logps/ref_rejected': -101.02880859375, 'KL/chosen_KL_mean': -607.3834228515625, 'KL/rejected_KL_mean': -1057.39208984375, 'KL/mean': -832.3876953125, 'KL/std': 518.711669921875, 'logits/chosen': -0.9139019846916199, 'logits/rejected': -0.9288034439086914, 'epoch': 0.69} + 69%|██████▊ | 468/681 [19:50<09:02, 2.55s/it] 69%|██████▉ | 469/681 [19:53<09:17, 2.63s/it] {'loss': 1.0044, 'grad_norm': 36.414100646972656, 'learning_rate': 1.351323902551631e-07, 'fcm_dpo/beta': 0.0011876230128109455, 'fcm_dpo/q_t': 0.3780639171600342, 'fcm_dpo/delta': -0.14456316828727722, 'fcm_dpo/margin': 450.6948547363281, 'margin_dpo/margin_mean': 450.69488525390625, 'margin_dpo/margin_std': 503.34381103515625, 'logps/chosen': -709.1287231445312, 'logps/rejected': -1196.4853515625, 'logps/ref_chosen': -68.12469482421875, 'logps/ref_rejected': -104.78640747070312, 'KL/chosen_KL_mean': -641.0040283203125, 'KL/rejected_KL_mean': -1091.698974609375, 'KL/mean': -866.3514404296875, 'KL/std': 456.729736328125, 'logits/chosen': -0.9149258136749268, 'logits/rejected': -0.9327446222305298, 'epoch': 0.69} + 69%|██████▉ | 469/681 [19:53<09:17, 2.63s/it] 69%|██████▉ | 470/681 [19:55<09:09, 2.60s/it] {'loss': 1.0628, 'grad_norm': 25.32660484313965, 'learning_rate': 1.339940635976592e-07, 'fcm_dpo/beta': 0.0011768193216994405, 'fcm_dpo/q_t': 0.3930772542953491, 'fcm_dpo/delta': -0.07272230088710785, 'fcm_dpo/margin': 398.8236083984375, 'margin_dpo/margin_mean': 398.8235778808594, 'margin_dpo/margin_std': 535.6801147460938, 'logps/chosen': -599.1129150390625, 'logps/rejected': -1036.847412109375, 'logps/ref_chosen': -43.791927337646484, 'logps/ref_rejected': -82.70285034179688, 'KL/chosen_KL_mean': -555.3209838867188, 'KL/rejected_KL_mean': -954.1446533203125, 'KL/mean': -754.7327880859375, 'KL/std': 484.958984375, 'logits/chosen': -0.8891603946685791, 'logits/rejected': -0.9033347368240356, 'epoch': 0.69} + 69%|██████▉ | 470/681 [19:55<09:09, 2.60s/it] 69%|██████▉ | 471/681 [19:57<08:48, 2.52s/it] {'loss': 1.131, 'grad_norm': 45.39756393432617, 'learning_rate': 1.3285879380446563e-07, 'fcm_dpo/beta': 0.0011662011966109276, 'fcm_dpo/q_t': 0.416721373796463, 'fcm_dpo/delta': 0.02322380244731903, 'fcm_dpo/margin': 323.3678283691406, 'margin_dpo/margin_mean': 323.36785888671875, 'margin_dpo/margin_std': 523.2651977539062, 'logps/chosen': -756.7708129882812, 'logps/rejected': -1100.40966796875, 'logps/ref_chosen': -63.33952331542969, 'logps/ref_rejected': -83.61048126220703, 'KL/chosen_KL_mean': -693.4312744140625, 'KL/rejected_KL_mean': -1016.7991943359375, 'KL/mean': -855.115234375, 'KL/std': 482.7373352050781, 'logits/chosen': -0.9871773719787598, 'logits/rejected': -0.9965918064117432, 'epoch': 0.69} + 69%|██████▉ | 471/681 [19:57<08:48, 2.52s/it] 69%|██████▉ | 472/681 [20:00<09:01, 2.59s/it] {'loss': 1.0948, 'grad_norm': 32.389835357666016, 'learning_rate': 1.317266107909975e-07, 'fcm_dpo/beta': 0.001157897524535656, 'fcm_dpo/q_t': 0.40090325474739075, 'fcm_dpo/delta': -0.07049451023340225, 'fcm_dpo/margin': 403.0553894042969, 'margin_dpo/margin_mean': 403.055419921875, 'margin_dpo/margin_std': 634.1861572265625, 'logps/chosen': -768.666748046875, 'logps/rejected': -1205.2652587890625, 'logps/ref_chosen': -83.66610717773438, 'logps/ref_rejected': -117.20919799804688, 'KL/chosen_KL_mean': -685.0006103515625, 'KL/rejected_KL_mean': -1088.0560302734375, 'KL/mean': -886.5283203125, 'KL/std': 587.74755859375, 'logits/chosen': -0.9715889096260071, 'logits/rejected': -0.9529412388801575, 'epoch': 0.69} + 69%|██████▉ | 472/681 [20:00<09:01, 2.59s/it] 69%|██████▉ | 473/681 [20:03<09:06, 2.63s/it] {'loss': 1.3594, 'grad_norm': 114.76943969726562, 'learning_rate': 1.3059754439133002e-07, 'fcm_dpo/beta': 0.001172641757875681, 'fcm_dpo/q_t': 0.45356637239456177, 'fcm_dpo/delta': 0.06300715357065201, 'fcm_dpo/margin': 184.01409912109375, 'margin_dpo/margin_mean': 184.0141143798828, 'margin_dpo/margin_std': 777.1190185546875, 'logps/chosen': -874.7928466796875, 'logps/rejected': -1076.45654296875, 'logps/ref_chosen': -63.49696731567383, 'logps/ref_rejected': -81.14657592773438, 'KL/chosen_KL_mean': -811.2958984375, 'KL/rejected_KL_mean': -995.3099365234375, 'KL/mean': -903.3029174804688, 'KL/std': 595.69921875, 'logits/chosen': -0.9685148596763611, 'logits/rejected': -0.9405593872070312, 'epoch': 0.69} + 69%|██████▉ | 473/681 [20:03<09:06, 2.63s/it] 70%|██████▉ | 474/681 [20:06<09:08, 2.65s/it] {'loss': 1.1578, 'grad_norm': 41.28981399536133, 'learning_rate': 1.2947162435741277e-07, 'fcm_dpo/beta': 0.0011639699805527925, 'fcm_dpo/q_t': 0.4137033224105835, 'fcm_dpo/delta': -0.07450275868177414, 'fcm_dpo/margin': 319.40814208984375, 'margin_dpo/margin_mean': 319.4081726074219, 'margin_dpo/margin_std': 566.008544921875, 'logps/chosen': -702.8743896484375, 'logps/rejected': -1059.7509765625, 'logps/ref_chosen': -52.6119384765625, 'logps/ref_rejected': -90.08041381835938, 'KL/chosen_KL_mean': -650.262451171875, 'KL/rejected_KL_mean': -969.6705322265625, 'KL/mean': -809.9664916992188, 'KL/std': 507.17437744140625, 'logits/chosen': -0.8996328115463257, 'logits/rejected': -0.9031381607055664, 'epoch': 0.7} + 70%|██████▉ | 474/681 [20:06<09:08, 2.65s/it] 70%|██████▉ | 475/681 [20:08<08:54, 2.60s/it] {'loss': 1.0215, 'grad_norm': 38.87031936645508, 'learning_rate': 1.2834888035828596e-07, 'fcm_dpo/beta': 0.0011392869055271149, 'fcm_dpo/q_t': 0.3890076279640198, 'fcm_dpo/delta': -0.08241432905197144, 'fcm_dpo/margin': 419.8091735839844, 'margin_dpo/margin_mean': 419.8092041015625, 'margin_dpo/margin_std': 437.57635498046875, 'logps/chosen': -512.83154296875, 'logps/rejected': -980.2085571289062, 'logps/ref_chosen': -42.49519348144531, 'logps/ref_rejected': -90.06294250488281, 'KL/chosen_KL_mean': -470.3363342285156, 'KL/rejected_KL_mean': -890.1456298828125, 'KL/mean': -680.240966796875, 'KL/std': 411.4319763183594, 'logits/chosen': -0.930426836013794, 'logits/rejected': -0.9583991765975952, 'epoch': 0.7} + 70%|██████▉ | 475/681 [20:08<08:54, 2.60s/it] 70%|██████▉ | 476/681 [20:11<08:50, 2.59s/it] {'loss': 1.1033, 'grad_norm': 55.413394927978516, 'learning_rate': 1.2722934197929802e-07, 'fcm_dpo/beta': 0.001139120664447546, 'fcm_dpo/q_t': 0.4132460355758667, 'fcm_dpo/delta': 0.019377058371901512, 'fcm_dpo/margin': 334.79010009765625, 'margin_dpo/margin_mean': 334.79010009765625, 'margin_dpo/margin_std': 459.30084228515625, 'logps/chosen': -626.66943359375, 'logps/rejected': -992.2203979492188, 'logps/ref_chosen': -42.94938278198242, 'logps/ref_rejected': -73.71023559570312, 'KL/chosen_KL_mean': -583.7200317382812, 'KL/rejected_KL_mean': -918.5101318359375, 'KL/mean': -751.1150512695312, 'KL/std': 458.8353576660156, 'logits/chosen': -0.9340738654136658, 'logits/rejected': -0.9469287991523743, 'epoch': 0.7} + 70%|██████▉ | 476/681 [20:11<08:50, 2.59s/it] 70%|███████ | 477/681 [20:13<08:46, 2.58s/it] {'loss': 1.1429, 'grad_norm': 35.95357131958008, 'learning_rate': 1.2611303872132631e-07, 'fcm_dpo/beta': 0.0011485903523862362, 'fcm_dpo/q_t': 0.41430675983428955, 'fcm_dpo/delta': 0.023847589269280434, 'fcm_dpo/margin': 328.0302734375, 'margin_dpo/margin_mean': 328.0302429199219, 'margin_dpo/margin_std': 566.1884155273438, 'logps/chosen': -697.1861572265625, 'logps/rejected': -1030.5811767578125, 'logps/ref_chosen': -70.77261352539062, 'logps/ref_rejected': -76.13737487792969, 'KL/chosen_KL_mean': -626.4135131835938, 'KL/rejected_KL_mean': -954.4437255859375, 'KL/mean': -790.4285888671875, 'KL/std': 489.424560546875, 'logits/chosen': -0.9906863570213318, 'logits/rejected': -0.9593477845191956, 'epoch': 0.7} + 70%|███████ | 477/681 [20:13<08:46, 2.58s/it] 70%|███████ | 478/681 [20:16<08:49, 2.61s/it] {'loss': 1.0683, 'grad_norm': 40.23908615112305, 'learning_rate': 1.2500000000000005e-07, 'fcm_dpo/beta': 0.0011402592062950134, 'fcm_dpo/q_t': 0.39800071716308594, 'fcm_dpo/delta': -0.05179014056921005, 'fcm_dpo/margin': 394.17974853515625, 'margin_dpo/margin_mean': 394.1797790527344, 'margin_dpo/margin_std': 522.7603759765625, 'logps/chosen': -553.0662841796875, 'logps/rejected': -991.1675415039062, 'logps/ref_chosen': -41.440513610839844, 'logps/ref_rejected': -85.36196899414062, 'KL/chosen_KL_mean': -511.62579345703125, 'KL/rejected_KL_mean': -905.8055419921875, 'KL/mean': -708.7156982421875, 'KL/std': 440.7845458984375, 'logits/chosen': -0.8462599515914917, 'logits/rejected': -0.871573805809021, 'epoch': 0.7} + 70%|███████ | 478/681 [20:16<08:49, 2.61s/it] 70%|███████ | 479/681 [20:18<08:36, 2.56s/it] {'loss': 1.1171, 'grad_norm': 29.177635192871094, 'learning_rate': 1.2389025514492456e-07, 'fcm_dpo/beta': 0.0011373090092092752, 'fcm_dpo/q_t': 0.40709632635116577, 'fcm_dpo/delta': -0.030031614005565643, 'fcm_dpo/margin': 376.3300476074219, 'margin_dpo/margin_mean': 376.3300476074219, 'margin_dpo/margin_std': 624.73388671875, 'logps/chosen': -711.8945922851562, 'logps/rejected': -1129.43310546875, 'logps/ref_chosen': -53.907920837402344, 'logps/ref_rejected': -95.1163330078125, 'KL/chosen_KL_mean': -657.9866943359375, 'KL/rejected_KL_mean': -1034.316650390625, 'KL/mean': -846.1517333984375, 'KL/std': 541.3948974609375, 'logits/chosen': -0.9058327674865723, 'logits/rejected': -0.9377299547195435, 'epoch': 0.7} + 70%|███████ | 479/681 [20:18<08:36, 2.56s/it] 70%|███████ | 480/681 [20:21<08:24, 2.51s/it] {'loss': 1.188, 'grad_norm': 56.077796936035156, 'learning_rate': 1.227838333989088e-07, 'fcm_dpo/beta': 0.0011228574439883232, 'fcm_dpo/q_t': 0.4276391863822937, 'fcm_dpo/delta': -0.042888376861810684, 'fcm_dpo/margin': 288.78948974609375, 'margin_dpo/margin_mean': 288.78948974609375, 'margin_dpo/margin_std': 554.4617309570312, 'logps/chosen': -862.9566650390625, 'logps/rejected': -1175.9959716796875, 'logps/ref_chosen': -58.682701110839844, 'logps/ref_rejected': -82.93248748779297, 'KL/chosen_KL_mean': -804.2739868164062, 'KL/rejected_KL_mean': -1093.0634765625, 'KL/mean': -948.6687622070312, 'KL/std': 507.27008056640625, 'logits/chosen': -0.9502737522125244, 'logits/rejected': -0.9440046548843384, 'epoch': 0.7} + 70%|███████ | 480/681 [20:21<08:24, 2.51s/it] 71%|███████ | 481/681 [20:23<08:21, 2.51s/it] {'loss': 1.0354, 'grad_norm': 37.83536148071289, 'learning_rate': 1.2168076391719489e-07, 'fcm_dpo/beta': 0.0011024028062820435, 'fcm_dpo/q_t': 0.38820528984069824, 'fcm_dpo/delta': -0.10436421632766724, 'fcm_dpo/margin': 452.87890625, 'margin_dpo/margin_mean': 452.87890625, 'margin_dpo/margin_std': 566.91455078125, 'logps/chosen': -727.95703125, 'logps/rejected': -1218.2919921875, 'logps/ref_chosen': -54.964271545410156, 'logps/ref_rejected': -92.42044067382812, 'KL/chosen_KL_mean': -672.9927368164062, 'KL/rejected_KL_mean': -1125.87158203125, 'KL/mean': -899.4321899414062, 'KL/std': 524.7504272460938, 'logits/chosen': -0.9408276081085205, 'logits/rejected': -0.9681203365325928, 'epoch': 0.71} + 71%|███████ | 481/681 [20:23<08:21, 2.51s/it] 71%|███████ | 482/681 [20:26<08:35, 2.59s/it] {'loss': 1.2751, 'grad_norm': 56.61158752441406, 'learning_rate': 1.2058107576668938e-07, 'fcm_dpo/beta': 0.0011032463517040014, 'fcm_dpo/q_t': 0.44445592164993286, 'fcm_dpo/delta': 0.07522930204868317, 'fcm_dpo/margin': 214.26512145996094, 'margin_dpo/margin_mean': 214.26513671875, 'margin_dpo/margin_std': 631.7608642578125, 'logps/chosen': -805.560791015625, 'logps/rejected': -1039.862060546875, 'logps/ref_chosen': -67.553466796875, 'logps/ref_rejected': -87.58953857421875, 'KL/chosen_KL_mean': -738.0073852539062, 'KL/rejected_KL_mean': -952.2725219726562, 'KL/mean': -845.139892578125, 'KL/std': 542.2655029296875, 'logits/chosen': -0.8482377529144287, 'logits/rejected': -0.8390638828277588, 'epoch': 0.71} + 71%|███████ | 482/681 [20:26<08:35, 2.59s/it] 71%|███████ | 483/681 [20:29<08:34, 2.60s/it] {'loss': 1.0221, 'grad_norm': 34.9405517578125, 'learning_rate': 1.194847979251979e-07, 'fcm_dpo/beta': 0.0010884404182434082, 'fcm_dpo/q_t': 0.38443121314048767, 'fcm_dpo/delta': -0.1278223842382431, 'fcm_dpo/margin': 478.5213928222656, 'margin_dpo/margin_mean': 478.5213623046875, 'margin_dpo/margin_std': 590.3174438476562, 'logps/chosen': -703.4990844726562, 'logps/rejected': -1214.4775390625, 'logps/ref_chosen': -63.32981872558594, 'logps/ref_rejected': -95.78697204589844, 'KL/chosen_KL_mean': -640.1693115234375, 'KL/rejected_KL_mean': -1118.690673828125, 'KL/mean': -879.429931640625, 'KL/std': 530.899658203125, 'logits/chosen': -0.9293410778045654, 'logits/rejected': -0.9389553070068359, 'epoch': 0.71} + 71%|███████ | 483/681 [20:29<08:34, 2.60s/it] 71%|███████ | 484/681 [20:31<08:16, 2.52s/it] {'loss': 1.0454, 'grad_norm': 53.70915985107422, 'learning_rate': 1.1839195928066101e-07, 'fcm_dpo/beta': 0.001076672924682498, 'fcm_dpo/q_t': 0.39384713768959045, 'fcm_dpo/delta': -0.0724029541015625, 'fcm_dpo/margin': 435.53656005859375, 'margin_dpo/margin_mean': 435.53656005859375, 'margin_dpo/margin_std': 524.6602783203125, 'logps/chosen': -607.956298828125, 'logps/rejected': -1068.7261962890625, 'logps/ref_chosen': -59.13812255859375, 'logps/ref_rejected': -84.37144470214844, 'KL/chosen_KL_mean': -548.8181762695312, 'KL/rejected_KL_mean': -984.354736328125, 'KL/mean': -766.58642578125, 'KL/std': 516.6387939453125, 'logits/chosen': -0.9350720643997192, 'logits/rejected': -0.9612249135971069, 'epoch': 0.71} + 71%|███████ | 484/681 [20:31<08:16, 2.52s/it] 71%|███████ | 485/681 [20:33<08:10, 2.50s/it] {'loss': 1.0858, 'grad_norm': 35.44499588012695, 'learning_rate': 1.1730258863039347e-07, 'fcm_dpo/beta': 0.001064480864442885, 'fcm_dpo/q_t': 0.4025202989578247, 'fcm_dpo/delta': -0.039454929530620575, 'fcm_dpo/margin': 411.17791748046875, 'margin_dpo/margin_mean': 411.17791748046875, 'margin_dpo/margin_std': 591.315185546875, 'logps/chosen': -635.2353515625, 'logps/rejected': -1090.927734375, 'logps/ref_chosen': -58.849571228027344, 'logps/ref_rejected': -103.36408233642578, 'KL/chosen_KL_mean': -576.3857421875, 'KL/rejected_KL_mean': -987.5636596679688, 'KL/mean': -781.9747314453125, 'KL/std': 519.3931884765625, 'logits/chosen': -0.8758097887039185, 'logits/rejected': -0.8995819091796875, 'epoch': 0.71} + 71%|███████ | 485/681 [20:33<08:10, 2.50s/it] 71%|███████▏ | 486/681 [20:35<07:42, 2.37s/it] {'loss': 1.0796, 'grad_norm': 38.644596099853516, 'learning_rate': 1.1621671468032493e-07, 'fcm_dpo/beta': 0.0010440791957080364, 'fcm_dpo/q_t': 0.39278823137283325, 'fcm_dpo/delta': -0.09158313274383545, 'fcm_dpo/margin': 466.44757080078125, 'margin_dpo/margin_mean': 466.4476013183594, 'margin_dpo/margin_std': 711.1044311523438, 'logps/chosen': -719.041015625, 'logps/rejected': -1222.368408203125, 'logps/ref_chosen': -55.25966262817383, 'logps/ref_rejected': -92.13936614990234, 'KL/chosen_KL_mean': -663.7813720703125, 'KL/rejected_KL_mean': -1130.22900390625, 'KL/mean': -897.005126953125, 'KL/std': 587.8470458984375, 'logits/chosen': -0.9534709453582764, 'logits/rejected': -0.9716538786888123, 'epoch': 0.71} + 71%|███████▏ | 486/681 [20:35<07:42, 2.37s/it] 72%|███████▏ | 487/681 [20:38<07:56, 2.46s/it] {'loss': 1.1294, 'grad_norm': 34.07633590698242, 'learning_rate': 1.1513436604424378e-07, 'fcm_dpo/beta': 0.001049531390890479, 'fcm_dpo/q_t': 0.4152664542198181, 'fcm_dpo/delta': 0.03742973506450653, 'fcm_dpo/margin': 346.44439697265625, 'margin_dpo/margin_mean': 346.44439697265625, 'margin_dpo/margin_std': 535.1236572265625, 'logps/chosen': -742.1241455078125, 'logps/rejected': -1127.924072265625, 'logps/ref_chosen': -53.06330871582031, 'logps/ref_rejected': -92.41883087158203, 'KL/chosen_KL_mean': -689.0608520507812, 'KL/rejected_KL_mean': -1035.505126953125, 'KL/mean': -862.2830200195312, 'KL/std': 522.194580078125, 'logits/chosen': -0.9346251487731934, 'logits/rejected': -0.9410355091094971, 'epoch': 0.72} + 72%|███████▏ | 487/681 [20:38<07:56, 2.46s/it] 72%|███████▏ | 488/681 [20:41<08:04, 2.51s/it] {'loss': 1.0911, 'grad_norm': 30.069625854492188, 'learning_rate': 1.1405557124304335e-07, 'fcm_dpo/beta': 0.0010536068584769964, 'fcm_dpo/q_t': 0.4102671444416046, 'fcm_dpo/delta': 0.012613944709300995, 'fcm_dpo/margin': 367.89251708984375, 'margin_dpo/margin_mean': 367.89251708984375, 'margin_dpo/margin_std': 458.40264892578125, 'logps/chosen': -614.334228515625, 'logps/rejected': -1014.0052490234375, 'logps/ref_chosen': -52.22815704345703, 'logps/ref_rejected': -84.00656127929688, 'KL/chosen_KL_mean': -562.1060791015625, 'KL/rejected_KL_mean': -929.9986572265625, 'KL/mean': -746.0523681640625, 'KL/std': 474.5586853027344, 'logits/chosen': -0.8993455767631531, 'logits/rejected': -0.9078420400619507, 'epoch': 0.72} + 72%|███████▏ | 488/681 [20:41<08:04, 2.51s/it] 72%|███████▏ | 489/681 [20:43<07:58, 2.49s/it] {'loss': 1.1131, 'grad_norm': 27.20409393310547, 'learning_rate': 1.1298035870396985e-07, 'fcm_dpo/beta': 0.0010581349488347769, 'fcm_dpo/q_t': 0.41536301374435425, 'fcm_dpo/delta': 0.02230164408683777, 'fcm_dpo/margin': 357.4197082519531, 'margin_dpo/margin_mean': 357.419677734375, 'margin_dpo/margin_std': 521.9287719726562, 'logps/chosen': -571.2440185546875, 'logps/rejected': -952.0721435546875, 'logps/ref_chosen': -55.989627838134766, 'logps/ref_rejected': -79.39812469482422, 'KL/chosen_KL_mean': -515.2543334960938, 'KL/rejected_KL_mean': -872.674072265625, 'KL/mean': -693.9641723632812, 'KL/std': 473.46533203125, 'logits/chosen': -0.8895210027694702, 'logits/rejected': -0.8903396725654602, 'epoch': 0.72} + 72%|███████▏ | 489/681 [20:43<07:58, 2.49s/it] 72%|███████▏ | 490/681 [20:46<08:09, 2.57s/it] {'loss': 1.1447, 'grad_norm': 36.384334564208984, 'learning_rate': 1.1190875675987355e-07, 'fcm_dpo/beta': 0.001059696776792407, 'fcm_dpo/q_t': 0.4139998257160187, 'fcm_dpo/delta': 0.011018646880984306, 'fcm_dpo/margin': 367.3455505371094, 'margin_dpo/margin_mean': 367.3455505371094, 'margin_dpo/margin_std': 653.3365478515625, 'logps/chosen': -681.4434814453125, 'logps/rejected': -1106.831787109375, 'logps/ref_chosen': -52.36639404296875, 'logps/ref_rejected': -110.4090576171875, 'KL/chosen_KL_mean': -629.0771484375, 'KL/rejected_KL_mean': -996.4226684570312, 'KL/mean': -812.7498779296875, 'KL/std': 568.397705078125, 'logits/chosen': -0.8977552652359009, 'logits/rejected': -0.9355182647705078, 'epoch': 0.72} + 72%|███████▏ | 490/681 [20:46<08:09, 2.57s/it] 72%|███████▏ | 491/681 [20:48<08:01, 2.54s/it] {'loss': 1.1925, 'grad_norm': 30.11342430114746, 'learning_rate': 1.1084079364846241e-07, 'fcm_dpo/beta': 0.0010794580448418856, 'fcm_dpo/q_t': 0.4374847710132599, 'fcm_dpo/delta': 0.1289866715669632, 'fcm_dpo/margin': 254.20654296875, 'margin_dpo/margin_mean': 254.20654296875, 'margin_dpo/margin_std': 475.316162109375, 'logps/chosen': -629.2527465820312, 'logps/rejected': -896.6157836914062, 'logps/ref_chosen': -60.11626434326172, 'logps/ref_rejected': -73.27278900146484, 'KL/chosen_KL_mean': -569.136474609375, 'KL/rejected_KL_mean': -823.343017578125, 'KL/mean': -696.23974609375, 'KL/std': 484.46240234375, 'logits/chosen': -0.8976389169692993, 'logits/rejected': -0.8920071125030518, 'epoch': 0.72} + 72%|███████▏ | 491/681 [20:48<08:01, 2.54s/it] 72%|███████▏ | 492/681 [20:51<08:03, 2.56s/it] {'loss': 1.2187, 'grad_norm': 42.94180679321289, 'learning_rate': 1.097764975115576e-07, 'fcm_dpo/beta': 0.0011044761631637812, 'fcm_dpo/q_t': 0.4389011859893799, 'fcm_dpo/delta': 0.1298675835132599, 'fcm_dpo/margin': 247.95816040039062, 'margin_dpo/margin_mean': 247.95819091796875, 'margin_dpo/margin_std': 550.007080078125, 'logps/chosen': -639.7259521484375, 'logps/rejected': -906.349609375, 'logps/ref_chosen': -53.994178771972656, 'logps/ref_rejected': -72.65962219238281, 'KL/chosen_KL_mean': -585.7317504882812, 'KL/rejected_KL_mean': -833.68994140625, 'KL/mean': -709.7108154296875, 'KL/std': 479.278076171875, 'logits/chosen': -0.9602404832839966, 'logits/rejected': -0.9433440566062927, 'epoch': 0.72} + 72%|███████▏ | 492/681 [20:51<08:03, 2.56s/it] 72%|███████▏ | 493/681 [20:54<08:09, 2.60s/it] {'loss': 1.1813, 'grad_norm': 33.16301727294922, 'learning_rate': 1.0871589639435203e-07, 'fcm_dpo/beta': 0.0011116546811535954, 'fcm_dpo/q_t': 0.42805489897727966, 'fcm_dpo/delta': -0.010466049425303936, 'fcm_dpo/margin': 277.23876953125, 'margin_dpo/margin_mean': 277.2387390136719, 'margin_dpo/margin_std': 518.248046875, 'logps/chosen': -695.2529296875, 'logps/rejected': -984.3175048828125, 'logps/ref_chosen': -75.49723815917969, 'logps/ref_rejected': -87.32301330566406, 'KL/chosen_KL_mean': -619.7557373046875, 'KL/rejected_KL_mean': -896.9945068359375, 'KL/mean': -758.3751220703125, 'KL/std': 526.8731689453125, 'logits/chosen': -0.9904724359512329, 'logits/rejected': -0.9639154076576233, 'epoch': 0.72} + 72%|███████▏ | 493/681 [20:54<08:09, 2.60s/it] 73%|███████▎ | 494/681 [20:56<08:06, 2.60s/it] {'loss': 1.0279, 'grad_norm': 45.52617263793945, 'learning_rate': 1.0765901824467166e-07, 'fcm_dpo/beta': 0.0010987753048539162, 'fcm_dpo/q_t': 0.3895169794559479, 'fcm_dpo/delta': -0.08314534276723862, 'fcm_dpo/margin': 436.12835693359375, 'margin_dpo/margin_mean': 436.12835693359375, 'margin_dpo/margin_std': 479.73455810546875, 'logps/chosen': -526.5089111328125, 'logps/rejected': -1007.369384765625, 'logps/ref_chosen': -41.35926818847656, 'logps/ref_rejected': -86.09136962890625, 'KL/chosen_KL_mean': -485.149658203125, 'KL/rejected_KL_mean': -921.278076171875, 'KL/mean': -703.2138671875, 'KL/std': 474.62786865234375, 'logits/chosen': -0.8565849661827087, 'logits/rejected': -0.8936357498168945, 'epoch': 0.73} + 73%|███████▎ | 494/681 [20:56<08:06, 2.60s/it] 73%|███████▎ | 495/681 [20:59<08:18, 2.68s/it] {'loss': 1.0985, 'grad_norm': 32.46592330932617, 'learning_rate': 1.0660589091223854e-07, 'fcm_dpo/beta': 0.0010912488214671612, 'fcm_dpo/q_t': 0.40759721398353577, 'fcm_dpo/delta': -0.015377325937151909, 'fcm_dpo/margin': 380.0621337890625, 'margin_dpo/margin_mean': 380.0621337890625, 'margin_dpo/margin_std': 567.2548217773438, 'logps/chosen': -609.1558227539062, 'logps/rejected': -1017.1073608398438, 'logps/ref_chosen': -63.53507995605469, 'logps/ref_rejected': -91.42443084716797, 'KL/chosen_KL_mean': -545.6207275390625, 'KL/rejected_KL_mean': -925.6829833984375, 'KL/mean': -735.65185546875, 'KL/std': 484.06903076171875, 'logits/chosen': -0.9482539892196655, 'logits/rejected': -0.9554197192192078, 'epoch': 0.73} + 73%|███████▎ | 495/681 [20:59<08:18, 2.68s/it] 73%|███████▎ | 496/681 [21:02<08:15, 2.68s/it] {'loss': 1.2282, 'grad_norm': 76.83142852783203, 'learning_rate': 1.0555654214793722e-07, 'fcm_dpo/beta': 0.0011189571814611554, 'fcm_dpo/q_t': 0.44650715589523315, 'fcm_dpo/delta': 0.1738756000995636, 'fcm_dpo/margin': 205.62347412109375, 'margin_dpo/margin_mean': 205.62347412109375, 'margin_dpo/margin_std': 438.72454833984375, 'logps/chosen': -751.309814453125, 'logps/rejected': -968.6707153320312, 'logps/ref_chosen': -72.5919189453125, 'logps/ref_rejected': -84.32933807373047, 'KL/chosen_KL_mean': -678.7178955078125, 'KL/rejected_KL_mean': -884.3413696289062, 'KL/mean': -781.5296020507812, 'KL/std': 392.3121337890625, 'logits/chosen': -0.9424889087677002, 'logits/rejected': -0.9166613817214966, 'epoch': 0.73} + 73%|███████▎ | 496/681 [21:02<08:15, 2.68s/it] 73%|███████▎ | 497/681 [21:04<08:08, 2.66s/it] {'loss': 1.2322, 'grad_norm': 40.14469528198242, 'learning_rate': 1.0451099960308374e-07, 'fcm_dpo/beta': 0.0011342904763296247, 'fcm_dpo/q_t': 0.4462537467479706, 'fcm_dpo/delta': 0.02543473243713379, 'fcm_dpo/margin': 205.537109375, 'margin_dpo/margin_mean': 205.537109375, 'margin_dpo/margin_std': 450.22613525390625, 'logps/chosen': -690.3705444335938, 'logps/rejected': -913.60205078125, 'logps/ref_chosen': -58.59397506713867, 'logps/ref_rejected': -76.28836822509766, 'KL/chosen_KL_mean': -631.7765502929688, 'KL/rejected_KL_mean': -837.3136596679688, 'KL/mean': -734.545166015625, 'KL/std': 477.0352478027344, 'logits/chosen': -0.9075003266334534, 'logits/rejected': -0.8960117101669312, 'epoch': 0.73} + 73%|███████▎ | 497/681 [21:04<08:08, 2.66s/it] 73%|███████▎ | 498/681 [21:07<08:11, 2.68s/it] {'loss': 1.1291, 'grad_norm': 27.699867248535156, 'learning_rate': 1.0346929082869641e-07, 'fcm_dpo/beta': 0.0011364180827513337, 'fcm_dpo/q_t': 0.4120427668094635, 'fcm_dpo/delta': 0.014480667188763618, 'fcm_dpo/margin': 339.73480224609375, 'margin_dpo/margin_mean': 339.73480224609375, 'margin_dpo/margin_std': 556.2047119140625, 'logps/chosen': -642.84716796875, 'logps/rejected': -995.3343505859375, 'logps/ref_chosen': -71.20565795898438, 'logps/ref_rejected': -83.95803833007812, 'KL/chosen_KL_mean': -571.6414794921875, 'KL/rejected_KL_mean': -911.3762817382812, 'KL/mean': -741.5089111328125, 'KL/std': 496.2296142578125, 'logits/chosen': -0.8944777250289917, 'logits/rejected': -0.8845921754837036, 'epoch': 0.73} + 73%|███████▎ | 498/681 [21:07<08:11, 2.68s/it] 73%|███████▎ | 499/681 [21:10<07:59, 2.63s/it] {'loss': 1.0675, 'grad_norm': 35.038902282714844, 'learning_rate': 1.0243144327477013e-07, 'fcm_dpo/beta': 0.0011275302385911345, 'fcm_dpo/q_t': 0.39803507924079895, 'fcm_dpo/delta': -0.04935740679502487, 'fcm_dpo/margin': 396.4712829589844, 'margin_dpo/margin_mean': 396.47125244140625, 'margin_dpo/margin_std': 522.6738891601562, 'logps/chosen': -553.5039672851562, 'logps/rejected': -999.7987060546875, 'logps/ref_chosen': -51.25519561767578, 'logps/ref_rejected': -101.07870483398438, 'KL/chosen_KL_mean': -502.248779296875, 'KL/rejected_KL_mean': -898.719970703125, 'KL/mean': -700.484375, 'KL/std': 475.0845947265625, 'logits/chosen': -0.9036816954612732, 'logits/rejected': -0.9378571510314941, 'epoch': 0.73} + 73%|███████▎ | 499/681 [21:10<07:59, 2.63s/it] 73%|███████▎ | 500/681 [21:12<07:46, 2.58s/it] {'loss': 1.1223, 'grad_norm': 33.4288215637207, 'learning_rate': 1.0139748428955333e-07, 'fcm_dpo/beta': 0.0011279778555035591, 'fcm_dpo/q_t': 0.4120955467224121, 'fcm_dpo/delta': 0.01636883243918419, 'fcm_dpo/margin': 340.65838623046875, 'margin_dpo/margin_mean': 340.65838623046875, 'margin_dpo/margin_std': 526.3189697265625, 'logps/chosen': -667.15625, 'logps/rejected': -1044.721435546875, 'logps/ref_chosen': -57.027442932128906, 'logps/ref_rejected': -93.93421173095703, 'KL/chosen_KL_mean': -610.1287841796875, 'KL/rejected_KL_mean': -950.7872314453125, 'KL/mean': -780.4580078125, 'KL/std': 427.2696228027344, 'logits/chosen': -0.9120993614196777, 'logits/rejected': -0.9445118308067322, 'epoch': 0.73} + 73%|███████▎ | 500/681 [21:12<07:46, 2.58s/it] 74%|███████▎ | 501/681 [21:15<07:41, 2.56s/it] {'loss': 1.1026, 'grad_norm': 29.30938720703125, 'learning_rate': 1.0036744111882672e-07, 'fcm_dpo/beta': 0.001129691954702139, 'fcm_dpo/q_t': 0.40666812658309937, 'fcm_dpo/delta': -0.017550457268953323, 'fcm_dpo/margin': 368.912841796875, 'margin_dpo/margin_mean': 368.912841796875, 'margin_dpo/margin_std': 553.5332641601562, 'logps/chosen': -579.1849365234375, 'logps/rejected': -973.8949584960938, 'logps/ref_chosen': -54.359527587890625, 'logps/ref_rejected': -80.15670013427734, 'KL/chosen_KL_mean': -524.825439453125, 'KL/rejected_KL_mean': -893.73828125, 'KL/mean': -709.2818603515625, 'KL/std': 459.21185302734375, 'logits/chosen': -0.8794831037521362, 'logits/rejected': -0.8698313236236572, 'epoch': 0.74} + 74%|███████▎ | 501/681 [21:15<07:41, 2.56s/it] 74%|███████▎ | 502/681 [21:17<07:40, 2.57s/it] {'loss': 1.071, 'grad_norm': 29.00743865966797, 'learning_rate': 9.934134090518592e-08, 'fcm_dpo/beta': 0.0011267581721767783, 'fcm_dpo/q_t': 0.4055173695087433, 'fcm_dpo/delta': -0.007630977779626846, 'fcm_dpo/margin': 361.4590759277344, 'margin_dpo/margin_mean': 361.4591064453125, 'margin_dpo/margin_std': 415.29315185546875, 'logps/chosen': -543.0424194335938, 'logps/rejected': -919.8497924804688, 'logps/ref_chosen': -67.60050964355469, 'logps/ref_rejected': -82.94876098632812, 'KL/chosen_KL_mean': -475.44189453125, 'KL/rejected_KL_mean': -836.9010009765625, 'KL/mean': -656.1715087890625, 'KL/std': 405.69573974609375, 'logits/chosen': -0.7667361497879028, 'logits/rejected': -0.7510417699813843, 'epoch': 0.74} + 74%|███████▎ | 502/681 [21:17<07:40, 2.57s/it] 74%|███████▍ | 503/681 [21:20<07:46, 2.62s/it] {'loss': 1.1014, 'grad_norm': 27.742767333984375, 'learning_rate': 9.831921068732571e-08, 'fcm_dpo/beta': 0.001126825693063438, 'fcm_dpo/q_t': 0.41448622941970825, 'fcm_dpo/delta': 0.026751546189188957, 'fcm_dpo/margin': 332.1096496582031, 'margin_dpo/margin_mean': 332.10968017578125, 'margin_dpo/margin_std': 432.3536376953125, 'logps/chosen': -536.665283203125, 'logps/rejected': -896.2020263671875, 'logps/ref_chosen': -55.078407287597656, 'logps/ref_rejected': -82.50544738769531, 'KL/chosen_KL_mean': -481.5869140625, 'KL/rejected_KL_mean': -813.696533203125, 'KL/mean': -647.6417236328125, 'KL/std': 403.74560546875, 'logits/chosen': -0.794667661190033, 'logits/rejected': -0.7808655500411987, 'epoch': 0.74} + 74%|███████▍ | 503/681 [21:20<07:46, 2.62s/it] 74%|███████▍ | 504/681 [21:23<07:41, 2.61s/it] {'loss': 1.0754, 'grad_norm': 32.015926361083984, 'learning_rate': 9.730107739932805e-08, 'fcm_dpo/beta': 0.0011236823629587889, 'fcm_dpo/q_t': 0.39904850721359253, 'fcm_dpo/delta': -0.050532855093479156, 'fcm_dpo/margin': 398.95245361328125, 'margin_dpo/margin_mean': 398.95245361328125, 'margin_dpo/margin_std': 538.3695678710938, 'logps/chosen': -598.259033203125, 'logps/rejected': -1041.0078125, 'logps/ref_chosen': -59.96575164794922, 'logps/ref_rejected': -103.76212310791016, 'KL/chosen_KL_mean': -538.2932739257812, 'KL/rejected_KL_mean': -937.2457275390625, 'KL/mean': -737.7694702148438, 'KL/std': 478.7033996582031, 'logits/chosen': -0.8786238431930542, 'logits/rejected': -0.9033294320106506, 'epoch': 0.74} + 74%|███████▍ | 504/681 [21:23<07:41, 2.61s/it] 74%|███████▍ | 505/681 [21:25<07:37, 2.60s/it] {'loss': 1.2118, 'grad_norm': 34.641334533691406, 'learning_rate': 9.628696786995188e-08, 'fcm_dpo/beta': 0.0011460301466286182, 'fcm_dpo/q_t': 0.4422228932380676, 'fcm_dpo/delta': 0.1525171399116516, 'fcm_dpo/margin': 219.1453094482422, 'margin_dpo/margin_mean': 219.14532470703125, 'margin_dpo/margin_std': 433.828125, 'logps/chosen': -676.8646240234375, 'logps/rejected': -908.4404296875, 'logps/ref_chosen': -76.1549072265625, 'logps/ref_rejected': -88.58537292480469, 'KL/chosen_KL_mean': -600.709716796875, 'KL/rejected_KL_mean': -819.8551025390625, 'KL/mean': -710.2824096679688, 'KL/std': 460.1452331542969, 'logits/chosen': -0.8766049742698669, 'logits/rejected': -0.8530220985412598, 'epoch': 0.74} + 74%|███████▍ | 505/681 [21:25<07:37, 2.60s/it] 74%|███████▍ | 506/681 [21:28<07:24, 2.54s/it] {'loss': 1.0883, 'grad_norm': 36.54334259033203, 'learning_rate': 9.527690882192635e-08, 'fcm_dpo/beta': 0.0011465998832136393, 'fcm_dpo/q_t': 0.4050843119621277, 'fcm_dpo/delta': -0.015700122341513634, 'fcm_dpo/margin': 361.67413330078125, 'margin_dpo/margin_mean': 361.67413330078125, 'margin_dpo/margin_std': 490.28369140625, 'logps/chosen': -539.34423828125, 'logps/rejected': -930.472900390625, 'logps/ref_chosen': -48.96050262451172, 'logps/ref_rejected': -78.41505432128906, 'KL/chosen_KL_mean': -490.38372802734375, 'KL/rejected_KL_mean': -852.057861328125, 'KL/mean': -671.2208251953125, 'KL/std': 451.6136169433594, 'logits/chosen': -0.8800439834594727, 'logits/rejected': -0.8960914611816406, 'epoch': 0.74} + 74%|███████▍ | 506/681 [21:28<07:24, 2.54s/it] 74%|███████▍ | 507/681 [21:30<07:26, 2.57s/it] {'loss': 1.162, 'grad_norm': 33.112762451171875, 'learning_rate': 9.427092687124691e-08, 'fcm_dpo/beta': 0.0011556025128811598, 'fcm_dpo/q_t': 0.42233383655548096, 'fcm_dpo/delta': 0.038512568920850754, 'fcm_dpo/margin': 314.0374755859375, 'margin_dpo/margin_mean': 314.0374755859375, 'margin_dpo/margin_std': 601.7278442382812, 'logps/chosen': -651.0076293945312, 'logps/rejected': -993.616455078125, 'logps/ref_chosen': -66.80149841308594, 'logps/ref_rejected': -95.37289428710938, 'KL/chosen_KL_mean': -584.2061767578125, 'KL/rejected_KL_mean': -898.2435913085938, 'KL/mean': -741.224853515625, 'KL/std': 549.9444580078125, 'logits/chosen': -0.9117947816848755, 'logits/rejected': -0.918329119682312, 'epoch': 0.74} + 74%|███████▍ | 507/681 [21:30<07:26, 2.57s/it] 75%|███████▍ | 508/681 [21:33<07:34, 2.62s/it] {'loss': 1.2157, 'grad_norm': 40.246891021728516, 'learning_rate': 9.326904852647344e-08, 'fcm_dpo/beta': 0.0011775526218116283, 'fcm_dpo/q_t': 0.43183645606040955, 'fcm_dpo/delta': 0.08841653168201447, 'fcm_dpo/margin': 266.6474609375, 'margin_dpo/margin_mean': 266.6474609375, 'margin_dpo/margin_std': 611.8427734375, 'logps/chosen': -699.3486328125, 'logps/rejected': -990.3201904296875, 'logps/ref_chosen': -71.303466796875, 'logps/ref_rejected': -95.6275405883789, 'KL/chosen_KL_mean': -628.045166015625, 'KL/rejected_KL_mean': -894.692626953125, 'KL/mean': -761.368896484375, 'KL/std': 525.7574462890625, 'logits/chosen': -0.8933985233306885, 'logits/rejected': -0.8938655853271484, 'epoch': 0.75} + 75%|███████▍ | 508/681 [21:33<07:34, 2.62s/it] 75%|███████▍ | 509/681 [21:36<07:31, 2.63s/it] {'loss': 1.1367, 'grad_norm': 31.764202117919922, 'learning_rate': 9.227130018803195e-08, 'fcm_dpo/beta': 0.0011943180579692125, 'fcm_dpo/q_t': 0.4205209016799927, 'fcm_dpo/delta': 0.051631003618240356, 'fcm_dpo/margin': 292.65716552734375, 'margin_dpo/margin_mean': 292.65716552734375, 'margin_dpo/margin_std': 450.46405029296875, 'logps/chosen': -525.8379516601562, 'logps/rejected': -837.9326171875, 'logps/ref_chosen': -63.81895065307617, 'logps/ref_rejected': -83.25643920898438, 'KL/chosen_KL_mean': -462.01898193359375, 'KL/rejected_KL_mean': -754.6761474609375, 'KL/mean': -608.3475952148438, 'KL/std': 369.74627685546875, 'logits/chosen': -0.8035761117935181, 'logits/rejected': -0.7988163232803345, 'epoch': 0.75} + 75%|███████▍ | 509/681 [21:36<07:31, 2.63s/it] 75%|███████▍ | 510/681 [21:38<07:32, 2.65s/it] {'loss': 1.039, 'grad_norm': 38.86001205444336, 'learning_rate': 9.127770814751932e-08, 'fcm_dpo/beta': 0.0011832050513476133, 'fcm_dpo/q_t': 0.39339399337768555, 'fcm_dpo/delta': -0.060402024537324905, 'fcm_dpo/margin': 386.8134460449219, 'margin_dpo/margin_mean': 386.8134765625, 'margin_dpo/margin_std': 423.6184997558594, 'logps/chosen': -611.544921875, 'logps/rejected': -1049.2451171875, 'logps/ref_chosen': -51.878448486328125, 'logps/ref_rejected': -102.7651596069336, 'KL/chosen_KL_mean': -559.66650390625, 'KL/rejected_KL_mean': -946.47998046875, 'KL/mean': -753.0732421875, 'KL/std': 431.13751220703125, 'logits/chosen': -0.7946321964263916, 'logits/rejected': -0.8168176412582397, 'epoch': 0.75} + 75%|███████▍ | 510/681 [21:38<07:32, 2.65s/it] 75%|███████▌ | 511/681 [21:41<07:26, 2.63s/it] {'loss': 1.1564, 'grad_norm': 41.20817565917969, 'learning_rate': 9.028829858700973e-08, 'fcm_dpo/beta': 0.0011853575706481934, 'fcm_dpo/q_t': 0.4183180034160614, 'fcm_dpo/delta': 0.03986484557390213, 'fcm_dpo/margin': 305.0107727050781, 'margin_dpo/margin_mean': 305.0107421875, 'margin_dpo/margin_std': 559.034423828125, 'logps/chosen': -585.9109497070312, 'logps/rejected': -923.5404052734375, 'logps/ref_chosen': -60.23811721801758, 'logps/ref_rejected': -92.85676574707031, 'KL/chosen_KL_mean': -525.6728515625, 'KL/rejected_KL_mean': -830.68359375, 'KL/mean': -678.17822265625, 'KL/std': 464.34112548828125, 'logits/chosen': -0.9006566405296326, 'logits/rejected': -0.9088428616523743, 'epoch': 0.75} + 75%|███████▌ | 511/681 [21:41<07:26, 2.63s/it] 75%|███████▌ | 512/681 [21:43<07:05, 2.52s/it] {'loss': 1.0149, 'grad_norm': 55.33091735839844, 'learning_rate': 8.930309757836516e-08, 'fcm_dpo/beta': 0.0011696910951286554, 'fcm_dpo/q_t': 0.387323796749115, 'fcm_dpo/delta': -0.08911710977554321, 'fcm_dpo/margin': 414.32196044921875, 'margin_dpo/margin_mean': 414.32196044921875, 'margin_dpo/margin_std': 421.341796875, 'logps/chosen': -475.5699462890625, 'logps/rejected': -916.8623046875, 'logps/ref_chosen': -54.905494689941406, 'logps/ref_rejected': -81.87586975097656, 'KL/chosen_KL_mean': -420.6644592285156, 'KL/rejected_KL_mean': -834.9864501953125, 'KL/mean': -627.825439453125, 'KL/std': 417.6683349609375, 'logits/chosen': -0.8243488073348999, 'logits/rejected': -0.8429218530654907, 'epoch': 0.75} + 75%|███████▌ | 512/681 [21:43<07:05, 2.52s/it] 75%|███████▌ | 513/681 [21:46<07:13, 2.58s/it] {'loss': 1.1408, 'grad_norm': 42.702476501464844, 'learning_rate': 8.832213108254863e-08, 'fcm_dpo/beta': 0.001167251612059772, 'fcm_dpo/q_t': 0.42014437913894653, 'fcm_dpo/delta': 0.04872651398181915, 'fcm_dpo/margin': 301.99615478515625, 'margin_dpo/margin_mean': 301.9961242675781, 'margin_dpo/margin_std': 475.26885986328125, 'logps/chosen': -607.17041015625, 'logps/rejected': -920.3125610351562, 'logps/ref_chosen': -64.91644287109375, 'logps/ref_rejected': -76.06245422363281, 'KL/chosen_KL_mean': -542.2540283203125, 'KL/rejected_KL_mean': -844.2501220703125, 'KL/mean': -693.2520751953125, 'KL/std': 411.43499755859375, 'logits/chosen': -0.8896423578262329, 'logits/rejected': -0.8757469654083252, 'epoch': 0.75} + 75%|███████▌ | 513/681 [21:46<07:13, 2.58s/it] 75%|███████▌ | 514/681 [21:49<07:16, 2.61s/it] {'loss': 1.1399, 'grad_norm': 35.660560607910156, 'learning_rate': 8.734542494893954e-08, 'fcm_dpo/beta': 0.0011872373288497329, 'fcm_dpo/q_t': 0.4199420213699341, 'fcm_dpo/delta': 0.04112107306718826, 'fcm_dpo/margin': 303.3856201171875, 'margin_dpo/margin_mean': 303.3856201171875, 'margin_dpo/margin_std': 503.74169921875, 'logps/chosen': -632.4039306640625, 'logps/rejected': -940.5054931640625, 'logps/ref_chosen': -74.22957611083984, 'logps/ref_rejected': -78.945556640625, 'KL/chosen_KL_mean': -558.1743774414062, 'KL/rejected_KL_mean': -861.5599365234375, 'KL/mean': -709.8671875, 'KL/std': 448.15179443359375, 'logits/chosen': -0.8495243191719055, 'logits/rejected': -0.8417561054229736, 'epoch': 0.75} + 75%|███████▌ | 514/681 [21:49<07:16, 2.61s/it] 76%|███████▌ | 515/681 [21:51<07:12, 2.61s/it] {'loss': 1.201, 'grad_norm': 48.169334411621094, 'learning_rate': 8.637300491465272e-08, 'fcm_dpo/beta': 0.0012109719682484865, 'fcm_dpo/q_t': 0.4361518621444702, 'fcm_dpo/delta': 0.1313389241695404, 'fcm_dpo/margin': 224.79022216796875, 'margin_dpo/margin_mean': 224.79022216796875, 'margin_dpo/margin_std': 446.3857421875, 'logps/chosen': -527.9971313476562, 'logps/rejected': -789.4835815429688, 'logps/ref_chosen': -50.40156555175781, 'logps/ref_rejected': -87.09774780273438, 'KL/chosen_KL_mean': -477.5955505371094, 'KL/rejected_KL_mean': -702.3858642578125, 'KL/mean': -589.99072265625, 'KL/std': 382.209716796875, 'logits/chosen': -0.7953609228134155, 'logits/rejected': -0.8050397634506226, 'epoch': 0.76} + 76%|███████▌ | 515/681 [21:51<07:12, 2.61s/it] 76%|███████▌ | 516/681 [21:53<06:49, 2.48s/it] {'loss': 1.0738, 'grad_norm': 45.79306411743164, 'learning_rate': 8.540489660386064e-08, 'fcm_dpo/beta': 0.001219091354869306, 'fcm_dpo/q_t': 0.40270549058914185, 'fcm_dpo/delta': -0.01847529225051403, 'fcm_dpo/margin': 342.53802490234375, 'margin_dpo/margin_mean': 342.53802490234375, 'margin_dpo/margin_std': 418.29913330078125, 'logps/chosen': -569.3353271484375, 'logps/rejected': -958.9461669921875, 'logps/ref_chosen': -64.64956665039062, 'logps/ref_rejected': -111.72237396240234, 'KL/chosen_KL_mean': -504.6857604980469, 'KL/rejected_KL_mean': -847.2237548828125, 'KL/mean': -675.954833984375, 'KL/std': 428.9405212402344, 'logits/chosen': -0.883423924446106, 'logits/rejected': -0.9122099876403809, 'epoch': 0.76} + 76%|███████▌ | 516/681 [21:53<06:49, 2.48s/it] 76%|███████▌ | 517/681 [21:56<06:49, 2.50s/it] {'loss': 1.0572, 'grad_norm': 29.204376220703125, 'learning_rate': 8.444112552711752e-08, 'fcm_dpo/beta': 0.0011984179727733135, 'fcm_dpo/q_t': 0.395224928855896, 'fcm_dpo/delta': -0.07682677358388901, 'fcm_dpo/margin': 394.6889343261719, 'margin_dpo/margin_mean': 394.6889343261719, 'margin_dpo/margin_std': 530.7623291015625, 'logps/chosen': -595.247802734375, 'logps/rejected': -1018.1063232421875, 'logps/ref_chosen': -60.913551330566406, 'logps/ref_rejected': -89.08308410644531, 'KL/chosen_KL_mean': -534.3342895507812, 'KL/rejected_KL_mean': -929.0232543945312, 'KL/mean': -731.6787109375, 'KL/std': 476.9851989746094, 'logits/chosen': -0.830208420753479, 'logits/rejected': -0.8261853456497192, 'epoch': 0.76} + 76%|███████▌ | 517/681 [21:56<06:49, 2.50s/it] 76%|███████▌ | 518/681 [21:59<06:57, 2.56s/it] {'loss': 1.0945, 'grad_norm': 62.06501007080078, 'learning_rate': 8.348171708068747e-08, 'fcm_dpo/beta': 0.0011941856937482953, 'fcm_dpo/q_t': 0.4095669090747833, 'fcm_dpo/delta': 0.012808417901396751, 'fcm_dpo/margin': 324.5027770996094, 'margin_dpo/margin_mean': 324.5028076171875, 'margin_dpo/margin_std': 413.08428955078125, 'logps/chosen': -529.95263671875, 'logps/rejected': -882.312255859375, 'logps/ref_chosen': -57.45589065551758, 'logps/ref_rejected': -85.31269836425781, 'KL/chosen_KL_mean': -472.4967041015625, 'KL/rejected_KL_mean': -796.99951171875, 'KL/mean': -634.7481689453125, 'KL/std': 382.7030944824219, 'logits/chosen': -0.8725818395614624, 'logits/rejected': -0.8878906965255737, 'epoch': 0.76} + 76%|███████▌ | 518/681 [21:59<06:57, 2.56s/it] 76%|███████▌ | 519/681 [22:02<07:16, 2.69s/it] {'loss': 1.1817, 'grad_norm': 33.864437103271484, 'learning_rate': 8.25266965458755e-08, 'fcm_dpo/beta': 0.0012234165333211422, 'fcm_dpo/q_t': 0.4324970841407776, 'fcm_dpo/delta': 0.11090720444917679, 'fcm_dpo/margin': 238.39837646484375, 'margin_dpo/margin_mean': 238.39837646484375, 'margin_dpo/margin_std': 425.4404296875, 'logps/chosen': -548.3914794921875, 'logps/rejected': -817.1707763671875, 'logps/ref_chosen': -74.06331634521484, 'logps/ref_rejected': -104.44416809082031, 'KL/chosen_KL_mean': -474.32818603515625, 'KL/rejected_KL_mean': -712.7265625, 'KL/mean': -593.5274047851562, 'KL/std': 341.5205078125, 'logits/chosen': -0.8436448574066162, 'logits/rejected': -0.8289774060249329, 'epoch': 0.76} + 76%|███████▌ | 519/681 [22:02<07:16, 2.69s/it] 76%|███████▋ | 520/681 [22:04<07:13, 2.69s/it] {'loss': 1.126, 'grad_norm': 38.63158416748047, 'learning_rate': 8.15760890883607e-08, 'fcm_dpo/beta': 0.001228465000167489, 'fcm_dpo/q_t': 0.4170858561992645, 'fcm_dpo/delta': 0.0323098823428154, 'fcm_dpo/margin': 300.2342529296875, 'margin_dpo/margin_mean': 300.2342529296875, 'margin_dpo/margin_std': 449.99859619140625, 'logps/chosen': -580.9718017578125, 'logps/rejected': -910.8875732421875, 'logps/ref_chosen': -70.2998275756836, 'logps/ref_rejected': -99.98133850097656, 'KL/chosen_KL_mean': -510.67193603515625, 'KL/rejected_KL_mean': -810.9061889648438, 'KL/mean': -660.7890625, 'KL/std': 406.78131103515625, 'logits/chosen': -0.8047879934310913, 'logits/rejected': -0.8105298280715942, 'epoch': 0.76} + 76%|███████▋ | 520/681 [22:04<07:13, 2.69s/it] 77%|███████▋ | 521/681 [22:07<07:09, 2.68s/it] {'loss': 1.0889, 'grad_norm': 31.36321258544922, 'learning_rate': 8.062991975753378e-08, 'fcm_dpo/beta': 0.0012406650930643082, 'fcm_dpo/q_t': 0.4070153832435608, 'fcm_dpo/delta': -0.006974354386329651, 'fcm_dpo/margin': 327.14410400390625, 'margin_dpo/margin_mean': 327.1440734863281, 'margin_dpo/margin_std': 424.02587890625, 'logps/chosen': -508.67523193359375, 'logps/rejected': -860.9569091796875, 'logps/ref_chosen': -58.14292526245117, 'logps/ref_rejected': -83.28060913085938, 'KL/chosen_KL_mean': -450.53228759765625, 'KL/rejected_KL_mean': -777.6763305664062, 'KL/mean': -614.1043090820312, 'KL/std': 415.71075439453125, 'logits/chosen': -0.8560887575149536, 'logits/rejected': -0.8592597246170044, 'epoch': 0.77} + 77%|███████▋ | 521/681 [22:07<07:09, 2.68s/it] 77%|███████▋ | 522/681 [22:09<07:02, 2.65s/it] {'loss': 1.1325, 'grad_norm': 32.587318420410156, 'learning_rate': 7.968821348583643e-08, 'fcm_dpo/beta': 0.001239138189703226, 'fcm_dpo/q_t': 0.41743797063827515, 'fcm_dpo/delta': 0.03980087861418724, 'fcm_dpo/margin': 291.8514404296875, 'margin_dpo/margin_mean': 291.8514404296875, 'margin_dpo/margin_std': 456.8363037109375, 'logps/chosen': -559.9071655273438, 'logps/rejected': -871.224853515625, 'logps/ref_chosen': -46.54766845703125, 'logps/ref_rejected': -66.01388549804688, 'KL/chosen_KL_mean': -513.3594970703125, 'KL/rejected_KL_mean': -805.2109375, 'KL/mean': -659.28515625, 'KL/std': 429.93572998046875, 'logits/chosen': -0.8581516146659851, 'logits/rejected': -0.8615491390228271, 'epoch': 0.77} + 77%|███████▋ | 522/681 [22:10<07:02, 2.65s/it] 77%|███████▋ | 523/681 [22:12<07:02, 2.68s/it] {'loss': 1.1311, 'grad_norm': 36.84627151489258, 'learning_rate': 7.875099508810484e-08, 'fcm_dpo/beta': 0.0012422900181263685, 'fcm_dpo/q_t': 0.4125151038169861, 'fcm_dpo/delta': 0.007354713976383209, 'fcm_dpo/margin': 316.2938232421875, 'margin_dpo/margin_mean': 316.2938232421875, 'margin_dpo/margin_std': 531.6959228515625, 'logps/chosen': -602.6751708984375, 'logps/rejected': -940.9608154296875, 'logps/ref_chosen': -61.76960372924805, 'logps/ref_rejected': -83.76141357421875, 'KL/chosen_KL_mean': -540.905517578125, 'KL/rejected_KL_mean': -857.1994018554688, 'KL/mean': -699.052490234375, 'KL/std': 499.2489929199219, 'logits/chosen': -0.8944802284240723, 'logits/rejected': -0.8932949304580688, 'epoch': 0.77} + 77%|███████▋ | 523/681 [22:12<07:02, 2.68s/it] 77%|███████▋ | 524/681 [22:15<06:58, 2.66s/it] {'loss': 1.1012, 'grad_norm': 39.90791702270508, 'learning_rate': 7.781828926091535e-08, 'fcm_dpo/beta': 0.0012349834432825446, 'fcm_dpo/q_t': 0.4045429229736328, 'fcm_dpo/delta': -0.010303705930709839, 'fcm_dpo/margin': 331.36962890625, 'margin_dpo/margin_mean': 331.36962890625, 'margin_dpo/margin_std': 464.6836853027344, 'logps/chosen': -613.0374755859375, 'logps/rejected': -947.636962890625, 'logps/ref_chosen': -78.0720443725586, 'logps/ref_rejected': -81.30198669433594, 'KL/chosen_KL_mean': -534.9653930664062, 'KL/rejected_KL_mean': -866.3349609375, 'KL/mean': -700.6502075195312, 'KL/std': 458.77227783203125, 'logits/chosen': -0.9295982122421265, 'logits/rejected': -0.914442777633667, 'epoch': 0.77} + 77%|███████▋ | 524/681 [22:15<06:58, 2.66s/it] 77%|███████▋ | 525/681 [22:18<06:57, 2.67s/it] {'loss': 1.0215, 'grad_norm': 41.53816604614258, 'learning_rate': 7.689012058193384e-08, 'fcm_dpo/beta': 0.0012153794523328543, 'fcm_dpo/q_t': 0.3845774531364441, 'fcm_dpo/delta': -0.12056128680706024, 'fcm_dpo/margin': 422.78076171875, 'margin_dpo/margin_mean': 422.78076171875, 'margin_dpo/margin_std': 505.25018310546875, 'logps/chosen': -570.680908203125, 'logps/rejected': -1042.686767578125, 'logps/ref_chosen': -50.827857971191406, 'logps/ref_rejected': -100.05294036865234, 'KL/chosen_KL_mean': -519.85302734375, 'KL/rejected_KL_mean': -942.6337890625, 'KL/mean': -731.243408203125, 'KL/std': 482.66436767578125, 'logits/chosen': -0.8437707424163818, 'logits/rejected': -0.8764776587486267, 'epoch': 0.77} + 77%|███████▋ | 525/681 [22:18<06:57, 2.67s/it] 77%|███████▋ | 526/681 [22:20<06:47, 2.63s/it] {'loss': 1.0386, 'grad_norm': 29.220232009887695, 'learning_rate': 7.596651350926836e-08, 'fcm_dpo/beta': 0.001197699224576354, 'fcm_dpo/q_t': 0.38653671741485596, 'fcm_dpo/delta': -0.09957602620124817, 'fcm_dpo/margin': 413.0840148925781, 'margin_dpo/margin_mean': 413.083984375, 'margin_dpo/margin_std': 507.3397521972656, 'logps/chosen': -625.2489013671875, 'logps/rejected': -1061.47509765625, 'logps/ref_chosen': -63.167236328125, 'logps/ref_rejected': -86.30934143066406, 'KL/chosen_KL_mean': -562.0816650390625, 'KL/rejected_KL_mean': -975.165771484375, 'KL/mean': -768.6236572265625, 'KL/std': 466.148193359375, 'logits/chosen': -0.8780766725540161, 'logits/rejected': -0.8744189739227295, 'epoch': 0.77} + 77%|███████▋ | 526/681 [22:20<06:47, 2.63s/it] 77%|███████▋ | 527/681 [22:23<06:41, 2.61s/it] {'loss': 1.1354, 'grad_norm': 34.30127716064453, 'learning_rate': 7.504749238082414e-08, 'fcm_dpo/beta': 0.0011968073667958379, 'fcm_dpo/q_t': 0.4214463233947754, 'fcm_dpo/delta': 0.06430923938751221, 'fcm_dpo/margin': 282.34320068359375, 'margin_dpo/margin_mean': 282.34320068359375, 'margin_dpo/margin_std': 413.549560546875, 'logps/chosen': -661.3546142578125, 'logps/rejected': -950.9118041992188, 'logps/ref_chosen': -71.12867736816406, 'logps/ref_rejected': -78.3425521850586, 'KL/chosen_KL_mean': -590.2259521484375, 'KL/rejected_KL_mean': -872.5692138671875, 'KL/mean': -731.3975830078125, 'KL/std': 487.9652099609375, 'logits/chosen': -1.0278353691101074, 'logits/rejected': -0.9955443143844604, 'epoch': 0.77} + 77%|███████▋ | 527/681 [22:23<06:41, 2.61s/it] 78%|███████▊ | 528/681 [22:25<06:42, 2.63s/it] {'loss': 1.1046, 'grad_norm': 41.24021530151367, 'learning_rate': 7.413308141366254e-08, 'fcm_dpo/beta': 0.001196006080135703, 'fcm_dpo/q_t': 0.40584173798561096, 'fcm_dpo/delta': -0.030209090560674667, 'fcm_dpo/margin': 358.5581970214844, 'margin_dpo/margin_mean': 358.5582275390625, 'margin_dpo/margin_std': 559.9374389648438, 'logps/chosen': -680.9362182617188, 'logps/rejected': -1065.3150634765625, 'logps/ref_chosen': -68.0894546508789, 'logps/ref_rejected': -93.91006469726562, 'KL/chosen_KL_mean': -612.8468017578125, 'KL/rejected_KL_mean': -971.405029296875, 'KL/mean': -792.1258544921875, 'KL/std': 480.26239013671875, 'logits/chosen': -0.9734677672386169, 'logits/rejected': -0.9565155506134033, 'epoch': 0.78} + 78%|███████▊ | 528/681 [22:25<06:42, 2.63s/it] 78%|███████▊ | 529/681 [22:28<06:41, 2.64s/it] {'loss': 1.2136, 'grad_norm': 45.17340087890625, 'learning_rate': 7.322330470336313e-08, 'fcm_dpo/beta': 0.001211107592098415, 'fcm_dpo/q_t': 0.4329761266708374, 'fcm_dpo/delta': 0.10773831605911255, 'fcm_dpo/margin': 244.0526580810547, 'margin_dpo/margin_mean': 244.05267333984375, 'margin_dpo/margin_std': 532.4437255859375, 'logps/chosen': -771.517578125, 'logps/rejected': -1049.2044677734375, 'logps/ref_chosen': -55.57495880126953, 'logps/ref_rejected': -89.20909118652344, 'KL/chosen_KL_mean': -715.942626953125, 'KL/rejected_KL_mean': -959.995361328125, 'KL/mean': -837.968994140625, 'KL/std': 430.2584228515625, 'logits/chosen': -0.9782444834709167, 'logits/rejected': -0.9867458939552307, 'epoch': 0.78} + 78%|███████▊ | 529/681 [22:28<06:41, 2.64s/it] 78%|███████▊ | 530/681 [22:31<06:46, 2.69s/it] {'loss': 1.1077, 'grad_norm': 42.13739776611328, 'learning_rate': 7.231818622338822e-08, 'fcm_dpo/beta': 0.0012108308728784323, 'fcm_dpo/q_t': 0.39872339367866516, 'fcm_dpo/delta': -0.06629342585802078, 'fcm_dpo/margin': 382.5973205566406, 'margin_dpo/margin_mean': 382.59735107421875, 'margin_dpo/margin_std': 640.5091552734375, 'logps/chosen': -659.43115234375, 'logps/rejected': -1081.7115478515625, 'logps/ref_chosen': -47.601417541503906, 'logps/ref_rejected': -87.2845230102539, 'KL/chosen_KL_mean': -611.8297119140625, 'KL/rejected_KL_mean': -994.427001953125, 'KL/mean': -803.12841796875, 'KL/std': 524.01220703125, 'logits/chosen': -0.9113196730613708, 'logits/rejected': -0.9070870876312256, 'epoch': 0.78} + 78%|███████▊ | 530/681 [22:31<06:46, 2.69s/it] 78%|███████▊ | 531/681 [22:33<06:26, 2.58s/it] {'loss': 1.1228, 'grad_norm': 42.0710563659668, 'learning_rate': 7.141774982445147e-08, 'fcm_dpo/beta': 0.0012086308561265469, 'fcm_dpo/q_t': 0.4118250906467438, 'fcm_dpo/delta': 0.001310013234615326, 'fcm_dpo/margin': 329.83197021484375, 'margin_dpo/margin_mean': 329.83197021484375, 'margin_dpo/margin_std': 535.4554443359375, 'logps/chosen': -748.7460327148438, 'logps/rejected': -1093.93798828125, 'logps/ref_chosen': -55.246063232421875, 'logps/ref_rejected': -70.60598754882812, 'KL/chosen_KL_mean': -693.5, 'KL/rejected_KL_mean': -1023.3319091796875, 'KL/mean': -858.4159545898438, 'KL/std': 566.25390625, 'logits/chosen': -1.0113496780395508, 'logits/rejected': -0.9929705858230591, 'epoch': 0.78} + 78%|███████▊ | 531/681 [22:33<06:26, 2.58s/it] 78%|███████▊ | 532/681 [22:36<06:28, 2.61s/it] {'loss': 1.1397, 'grad_norm': 65.42340850830078, 'learning_rate': 7.052201923388953e-08, 'fcm_dpo/beta': 0.0011906104627996683, 'fcm_dpo/q_t': 0.4059098958969116, 'fcm_dpo/delta': -0.03280455619096756, 'fcm_dpo/margin': 361.05938720703125, 'margin_dpo/margin_mean': 361.05938720703125, 'margin_dpo/margin_std': 642.376953125, 'logps/chosen': -760.280029296875, 'logps/rejected': -1137.644775390625, 'logps/ref_chosen': -70.28601837158203, 'logps/ref_rejected': -86.5913314819336, 'KL/chosen_KL_mean': -689.9940185546875, 'KL/rejected_KL_mean': -1051.053466796875, 'KL/mean': -870.5237426757812, 'KL/std': 547.52587890625, 'logits/chosen': -0.954756498336792, 'logits/rejected': -0.9292545914649963, 'epoch': 0.78} + 78%|███████▊ | 532/681 [22:36<06:28, 2.61s/it] 78%|███████▊ | 533/681 [22:38<06:25, 2.60s/it] {'loss': 1.1999, 'grad_norm': 43.53575897216797, 'learning_rate': 6.963101805503646e-08, 'fcm_dpo/beta': 0.0011966102756559849, 'fcm_dpo/q_t': 0.4301643371582031, 'fcm_dpo/delta': -0.0130624333396554, 'fcm_dpo/margin': 258.5040283203125, 'margin_dpo/margin_mean': 258.5040283203125, 'margin_dpo/margin_std': 543.6436767578125, 'logps/chosen': -667.987548828125, 'logps/rejected': -938.2244873046875, 'logps/ref_chosen': -64.8551025390625, 'logps/ref_rejected': -76.58805847167969, 'KL/chosen_KL_mean': -603.1324462890625, 'KL/rejected_KL_mean': -861.636474609375, 'KL/mean': -732.3844604492188, 'KL/std': 453.2569580078125, 'logits/chosen': -0.9747976064682007, 'logits/rejected': -0.9492688775062561, 'epoch': 0.78} + 78%|███████▊ | 533/681 [22:38<06:25, 2.60s/it] 78%|███████▊ | 534/681 [22:41<06:21, 2.60s/it] {'loss': 1.1036, 'grad_norm': 40.776493072509766, 'learning_rate': 6.874476976660184e-08, 'fcm_dpo/beta': 0.0011840970255434513, 'fcm_dpo/q_t': 0.4072118401527405, 'fcm_dpo/delta': -0.014506392180919647, 'fcm_dpo/margin': 348.64691162109375, 'margin_dpo/margin_mean': 348.64691162109375, 'margin_dpo/margin_std': 510.9076232910156, 'logps/chosen': -705.80517578125, 'logps/rejected': -1072.876220703125, 'logps/ref_chosen': -60.119388580322266, 'logps/ref_rejected': -78.54347229003906, 'KL/chosen_KL_mean': -645.685791015625, 'KL/rejected_KL_mean': -994.332763671875, 'KL/mean': -820.00927734375, 'KL/std': 505.59075927734375, 'logits/chosen': -0.9978982210159302, 'logits/rejected': -0.9965052008628845, 'epoch': 0.78} + 78%|███████▊ | 534/681 [22:41<06:21, 2.60s/it] 79%|███████▊ | 535/681 [22:44<06:19, 2.60s/it] {'loss': 1.061, 'grad_norm': 30.75901222229004, 'learning_rate': 6.786329772205246e-08, 'fcm_dpo/beta': 0.0011865987908095121, 'fcm_dpo/q_t': 0.39498764276504517, 'fcm_dpo/delta': -0.07477213442325592, 'fcm_dpo/margin': 396.780029296875, 'margin_dpo/margin_mean': 396.780029296875, 'margin_dpo/margin_std': 520.8341064453125, 'logps/chosen': -606.6577758789062, 'logps/rejected': -1045.415283203125, 'logps/ref_chosen': -54.330238342285156, 'logps/ref_rejected': -96.30763244628906, 'KL/chosen_KL_mean': -552.3275146484375, 'KL/rejected_KL_mean': -949.107666015625, 'KL/mean': -750.717529296875, 'KL/std': 493.9169921875, 'logits/chosen': -0.9087494611740112, 'logits/rejected': -0.90961092710495, 'epoch': 0.79} + 79%|███████▊ | 535/681 [22:44<06:19, 2.60s/it] 79%|███████▊ | 536/681 [22:46<06:18, 2.61s/it] {'loss': 1.0176, 'grad_norm': 33.95513916015625, 'learning_rate': 6.698662514899638e-08, 'fcm_dpo/beta': 0.0011459384113550186, 'fcm_dpo/q_t': 0.38111627101898193, 'fcm_dpo/delta': -0.16502085328102112, 'fcm_dpo/margin': 485.10699462890625, 'margin_dpo/margin_mean': 485.10693359375, 'margin_dpo/margin_std': 655.8091430664062, 'logps/chosen': -528.61328125, 'logps/rejected': -1055.737548828125, 'logps/ref_chosen': -47.08053207397461, 'logps/ref_rejected': -89.09783935546875, 'KL/chosen_KL_mean': -481.53271484375, 'KL/rejected_KL_mean': -966.6396484375, 'KL/mean': -724.086181640625, 'KL/std': 552.1593627929688, 'logits/chosen': -0.8919925689697266, 'logits/rejected': -0.9215620756149292, 'epoch': 0.79} + 79%|███████▊ | 536/681 [22:46<06:18, 2.61s/it] 79%|███████▉ | 537/681 [22:49<06:13, 2.59s/it] {'loss': 1.1505, 'grad_norm': 48.758907318115234, 'learning_rate': 6.611477514857114e-08, 'fcm_dpo/beta': 0.0011480746325105429, 'fcm_dpo/q_t': 0.4170358180999756, 'fcm_dpo/delta': 0.038443662226200104, 'fcm_dpo/margin': 315.5430603027344, 'margin_dpo/margin_mean': 315.5430603027344, 'margin_dpo/margin_std': 543.536376953125, 'logps/chosen': -583.41064453125, 'logps/rejected': -911.6445922851562, 'logps/ref_chosen': -57.747467041015625, 'logps/ref_rejected': -70.43838500976562, 'KL/chosen_KL_mean': -525.6631469726562, 'KL/rejected_KL_mean': -841.2061767578125, 'KL/mean': -683.4346923828125, 'KL/std': 446.1978454589844, 'logits/chosen': -0.9522344470024109, 'logits/rejected': -0.9362703561782837, 'epoch': 0.79} + 79%|███████▉ | 537/681 [22:49<06:13, 2.59s/it] 79%|███████▉ | 538/681 [22:51<06:12, 2.61s/it] {'loss': 1.0819, 'grad_norm': 32.13328170776367, 'learning_rate': 6.524777069483525e-08, 'fcm_dpo/beta': 0.0011375262401998043, 'fcm_dpo/q_t': 0.40403687953948975, 'fcm_dpo/delta': -0.02470758929848671, 'fcm_dpo/margin': 372.283447265625, 'margin_dpo/margin_mean': 372.283447265625, 'margin_dpo/margin_std': 503.72845458984375, 'logps/chosen': -709.4288330078125, 'logps/rejected': -1099.5244140625, 'logps/ref_chosen': -66.41594696044922, 'logps/ref_rejected': -84.22808837890625, 'KL/chosen_KL_mean': -643.0128784179688, 'KL/rejected_KL_mean': -1015.29638671875, 'KL/mean': -829.154541015625, 'KL/std': 483.1163330078125, 'logits/chosen': -0.9377896785736084, 'logits/rejected': -0.9210348129272461, 'epoch': 0.79} + 79%|███████▉ | 538/681 [22:51<06:12, 2.61s/it] 79%|███████▉ | 539/681 [22:54<06:14, 2.64s/it] {'loss': 1.0923, 'grad_norm': 33.58725357055664, 'learning_rate': 6.438563463416221e-08, 'fcm_dpo/beta': 0.00114156911149621, 'fcm_dpo/q_t': 0.40886110067367554, 'fcm_dpo/delta': 0.002832382917404175, 'fcm_dpo/margin': 347.9453430175781, 'margin_dpo/margin_mean': 347.9453430175781, 'margin_dpo/margin_std': 457.59783935546875, 'logps/chosen': -614.3486328125, 'logps/rejected': -995.6550903320312, 'logps/ref_chosen': -58.492855072021484, 'logps/ref_rejected': -91.85395050048828, 'KL/chosen_KL_mean': -555.8558349609375, 'KL/rejected_KL_mean': -903.8011474609375, 'KL/mean': -729.8284912109375, 'KL/std': 412.5657653808594, 'logits/chosen': -0.9776486158370972, 'logits/rejected': -0.9680448174476624, 'epoch': 0.79} + 79%|███████▉ | 539/681 [22:54<06:14, 2.64s/it] 79%|███████▉ | 540/681 [22:57<06:09, 2.62s/it] {'loss': 1.0431, 'grad_norm': 32.977603912353516, 'learning_rate': 6.352838968463919e-08, 'fcm_dpo/beta': 0.0011229969095438719, 'fcm_dpo/q_t': 0.38754981756210327, 'fcm_dpo/delta': -0.12408408522605896, 'fcm_dpo/margin': 461.1107177734375, 'margin_dpo/margin_mean': 461.1107177734375, 'margin_dpo/margin_std': 613.2233276367188, 'logps/chosen': -619.2936401367188, 'logps/rejected': -1133.351806640625, 'logps/ref_chosen': -63.482513427734375, 'logps/ref_rejected': -116.42999267578125, 'KL/chosen_KL_mean': -555.8110961914062, 'KL/rejected_KL_mean': -1016.921875, 'KL/mean': -786.366455078125, 'KL/std': 521.7627563476562, 'logits/chosen': -0.9218310713768005, 'logits/rejected': -0.944137692451477, 'epoch': 0.79} + 79%|███████▉ | 540/681 [22:57<06:09, 2.62s/it] 79%|███████▉ | 541/681 [22:59<05:56, 2.54s/it] {'loss': 1.2244, 'grad_norm': 51.01187515258789, 'learning_rate': 6.267605843546767e-08, 'fcm_dpo/beta': 0.0011111920466646552, 'fcm_dpo/q_t': 0.43856528401374817, 'fcm_dpo/delta': -0.0019461165647953749, 'fcm_dpo/margin': 243.34225463867188, 'margin_dpo/margin_mean': 243.34228515625, 'margin_dpo/margin_std': 547.9241943359375, 'logps/chosen': -746.0413818359375, 'logps/rejected': -1014.3770751953125, 'logps/ref_chosen': -78.28036499023438, 'logps/ref_rejected': -103.273681640625, 'KL/chosen_KL_mean': -667.7610473632812, 'KL/rejected_KL_mean': -911.1033935546875, 'KL/mean': -789.4322509765625, 'KL/std': 467.2005920410156, 'logits/chosen': -1.004181981086731, 'logits/rejected': -0.992661714553833, 'epoch': 0.79} + 79%|███████▉ | 541/681 [22:59<05:56, 2.54s/it] 80%|███████▉ | 542/681 [23:02<06:06, 2.64s/it] {'loss': 1.0578, 'grad_norm': 47.77986145019531, 'learning_rate': 6.182866334636888e-08, 'fcm_dpo/beta': 0.001085467985831201, 'fcm_dpo/q_t': 0.39055657386779785, 'fcm_dpo/delta': -0.09905168414115906, 'fcm_dpo/margin': 453.1556091308594, 'margin_dpo/margin_mean': 453.1556091308594, 'margin_dpo/margin_std': 612.7730712890625, 'logps/chosen': -634.0296630859375, 'logps/rejected': -1126.1754150390625, 'logps/ref_chosen': -57.48497009277344, 'logps/ref_rejected': -96.47506713867188, 'KL/chosen_KL_mean': -576.544677734375, 'KL/rejected_KL_mean': -1029.7003173828125, 'KL/mean': -803.12255859375, 'KL/std': 522.02685546875, 'logits/chosen': -0.9915866255760193, 'logits/rejected': -1.0235321521759033, 'epoch': 0.8} + 80%|███████▉ | 542/681 [23:02<06:06, 2.64s/it] 80%|███████▉ | 543/681 [23:04<06:00, 2.61s/it] {'loss': 1.1968, 'grad_norm': 30.012170791625977, 'learning_rate': 6.098622674699147e-08, 'fcm_dpo/beta': 0.001093997503630817, 'fcm_dpo/q_t': 0.4311785399913788, 'fcm_dpo/delta': 0.042909275740385056, 'fcm_dpo/margin': 327.8123474121094, 'margin_dpo/margin_mean': 327.8123474121094, 'margin_dpo/margin_std': 748.0994262695312, 'logps/chosen': -726.1270141601562, 'logps/rejected': -1098.9208984375, 'logps/ref_chosen': -60.61750793457031, 'logps/ref_rejected': -105.59896850585938, 'KL/chosen_KL_mean': -665.509521484375, 'KL/rejected_KL_mean': -993.3218994140625, 'KL/mean': -829.4156494140625, 'KL/std': 642.0631103515625, 'logits/chosen': -0.9615781307220459, 'logits/rejected': -0.9914584159851074, 'epoch': 0.8} + 80%|███████▉ | 543/681 [23:04<06:00, 2.61s/it] 80%|███████▉ | 544/681 [23:07<05:53, 2.58s/it] {'loss': 1.1045, 'grad_norm': 32.4089469909668, 'learning_rate': 6.01487708363232e-08, 'fcm_dpo/beta': 0.0010954445460811257, 'fcm_dpo/q_t': 0.4067176282405853, 'fcm_dpo/delta': -0.012936984188854694, 'fcm_dpo/margin': 376.4364013671875, 'margin_dpo/margin_mean': 376.4364013671875, 'margin_dpo/margin_std': 568.992919921875, 'logps/chosen': -727.5975341796875, 'logps/rejected': -1145.34619140625, 'logps/ref_chosen': -59.642303466796875, 'logps/ref_rejected': -100.95469665527344, 'KL/chosen_KL_mean': -667.9552001953125, 'KL/rejected_KL_mean': -1044.3916015625, 'KL/mean': -856.1734008789062, 'KL/std': 508.0960693359375, 'logits/chosen': -0.9522734880447388, 'logits/rejected': -0.9718469381332397, 'epoch': 0.8} + 80%|███████▉ | 544/681 [23:07<05:53, 2.58s/it] 80%|████████ | 545/681 [23:10<05:49, 2.57s/it] {'loss': 1.0441, 'grad_norm': 32.737972259521484, 'learning_rate': 5.9316317682106294e-08, 'fcm_dpo/beta': 0.0010812245309352875, 'fcm_dpo/q_t': 0.3911089301109314, 'fcm_dpo/delta': -0.09056208282709122, 'fcm_dpo/margin': 449.6327209472656, 'margin_dpo/margin_mean': 449.6326904296875, 'margin_dpo/margin_std': 570.2744140625, 'logps/chosen': -687.356689453125, 'logps/rejected': -1165.248779296875, 'logps/ref_chosen': -67.64859771728516, 'logps/ref_rejected': -95.90800476074219, 'KL/chosen_KL_mean': -619.7081298828125, 'KL/rejected_KL_mean': -1069.3408203125, 'KL/mean': -844.5244140625, 'KL/std': 505.60400390625, 'logits/chosen': -0.9047988653182983, 'logits/rejected': -0.938023567199707, 'epoch': 0.8} + 80%|████████ | 545/681 [23:10<05:49, 2.57s/it] 80%|████████ | 546/681 [23:12<05:46, 2.56s/it] {'loss': 1.1537, 'grad_norm': 35.74420928955078, 'learning_rate': 5.848888922025552e-08, 'fcm_dpo/beta': 0.0010882640490308404, 'fcm_dpo/q_t': 0.4228670001029968, 'fcm_dpo/delta': 0.07409149408340454, 'fcm_dpo/margin': 301.70965576171875, 'margin_dpo/margin_mean': 301.70965576171875, 'margin_dpo/margin_std': 491.32568359375, 'logps/chosen': -646.5230712890625, 'logps/rejected': -979.354736328125, 'logps/ref_chosen': -50.744232177734375, 'logps/ref_rejected': -81.86622619628906, 'KL/chosen_KL_mean': -595.77880859375, 'KL/rejected_KL_mean': -897.488525390625, 'KL/mean': -746.6336669921875, 'KL/std': 452.455322265625, 'logits/chosen': -0.9621337652206421, 'logits/rejected': -0.9499717354774475, 'epoch': 0.8} + 80%|████████ | 546/681 [23:12<05:46, 2.56s/it] 80%|████████ | 547/681 [23:15<05:51, 2.62s/it] {'loss': 1.0824, 'grad_norm': 43.03551483154297, 'learning_rate': 5.7666507254280265e-08, 'fcm_dpo/beta': 0.0010885847732424736, 'fcm_dpo/q_t': 0.40285325050354004, 'fcm_dpo/delta': -0.030972033739089966, 'fcm_dpo/margin': 394.6861267089844, 'margin_dpo/margin_mean': 394.68609619140625, 'margin_dpo/margin_std': 543.5963745117188, 'logps/chosen': -679.5400390625, 'logps/rejected': -1091.2998046875, 'logps/ref_chosen': -73.6877212524414, 'logps/ref_rejected': -90.76136779785156, 'KL/chosen_KL_mean': -605.8523559570312, 'KL/rejected_KL_mean': -1000.5384521484375, 'KL/mean': -803.1954345703125, 'KL/std': 501.6481628417969, 'logits/chosen': -0.9044293165206909, 'logits/rejected': -0.9168886542320251, 'epoch': 0.8} + 80%|████████ | 547/681 [23:15<05:51, 2.62s/it] 80%|████████ | 548/681 [23:17<05:44, 2.59s/it] {'loss': 1.1264, 'grad_norm': 30.352699279785156, 'learning_rate': 5.684919345471029e-08, 'fcm_dpo/beta': 0.001088649732992053, 'fcm_dpo/q_t': 0.41777533292770386, 'fcm_dpo/delta': 0.021810464560985565, 'fcm_dpo/margin': 348.1575622558594, 'margin_dpo/margin_mean': 348.1575622558594, 'margin_dpo/margin_std': 577.4847412109375, 'logps/chosen': -691.4476928710938, 'logps/rejected': -1068.47705078125, 'logps/ref_chosen': -65.24634552001953, 'logps/ref_rejected': -94.11807250976562, 'KL/chosen_KL_mean': -626.2013549804688, 'KL/rejected_KL_mean': -974.35888671875, 'KL/mean': -800.2800903320312, 'KL/std': 522.8885498046875, 'logits/chosen': -0.9622275829315186, 'logits/rejected': -0.9603374004364014, 'epoch': 0.8} + 80%|████████ | 548/681 [23:17<05:44, 2.59s/it] 81%|████████ | 549/681 [23:20<05:38, 2.56s/it] {'loss': 1.1922, 'grad_norm': 48.91337203979492, 'learning_rate': 5.603696935852426e-08, 'fcm_dpo/beta': 0.0010918962070718408, 'fcm_dpo/q_t': 0.43372684717178345, 'fcm_dpo/delta': 0.014238527044653893, 'fcm_dpo/margin': 269.5531005859375, 'margin_dpo/margin_mean': 269.5531005859375, 'margin_dpo/margin_std': 526.7017822265625, 'logps/chosen': -699.7032470703125, 'logps/rejected': -993.9542846679688, 'logps/ref_chosen': -49.21235656738281, 'logps/ref_rejected': -73.91031646728516, 'KL/chosen_KL_mean': -650.4908447265625, 'KL/rejected_KL_mean': -920.0439453125, 'KL/mean': -785.2673950195312, 'KL/std': 439.36041259765625, 'logits/chosen': -0.9819549918174744, 'logits/rejected': -0.9728246927261353, 'epoch': 0.81} + 81%|████████ | 549/681 [23:20<05:38, 2.56s/it] 81%|████████ | 550/681 [23:22<05:37, 2.58s/it] {'loss': 1.1343, 'grad_norm': 45.43336868286133, 'learning_rate': 5.5229856368582376e-08, 'fcm_dpo/beta': 0.0010998416692018509, 'fcm_dpo/q_t': 0.41901546716690063, 'fcm_dpo/delta': 0.04686359316110611, 'fcm_dpo/margin': 322.6148376464844, 'margin_dpo/margin_mean': 322.6148376464844, 'margin_dpo/margin_std': 508.62127685546875, 'logps/chosen': -700.1971435546875, 'logps/rejected': -1061.130859375, 'logps/ref_chosen': -56.80695343017578, 'logps/ref_rejected': -95.12580871582031, 'KL/chosen_KL_mean': -643.39013671875, 'KL/rejected_KL_mean': -966.0050048828125, 'KL/mean': -804.6975708007812, 'KL/std': 482.32452392578125, 'logits/chosen': -0.9264281988143921, 'logits/rejected': -0.9501577615737915, 'epoch': 0.81} + 81%|████████ | 550/681 [23:22<05:37, 2.58s/it] 81%|████████ | 551/681 [23:25<05:30, 2.54s/it] {'loss': 0.9628, 'grad_norm': 60.08759689331055, 'learning_rate': 5.4427875753062734e-08, 'fcm_dpo/beta': 0.0010715980315580964, 'fcm_dpo/q_t': 0.3684789538383484, 'fcm_dpo/delta': -0.18658655881881714, 'fcm_dpo/margin': 537.0025634765625, 'margin_dpo/margin_mean': 537.0025634765625, 'margin_dpo/margin_std': 528.0545654296875, 'logps/chosen': -623.759033203125, 'logps/rejected': -1213.3280029296875, 'logps/ref_chosen': -59.10633087158203, 'logps/ref_rejected': -111.67280578613281, 'KL/chosen_KL_mean': -564.6527099609375, 'KL/rejected_KL_mean': -1101.6552734375, 'KL/mean': -833.1539306640625, 'KL/std': 532.0479736328125, 'logits/chosen': -0.9065227508544922, 'logits/rejected': -0.9625868797302246, 'epoch': 0.81} + 81%|████████ | 551/681 [23:25<05:30, 2.54s/it] 81%|████████ | 552/681 [23:27<05:17, 2.46s/it] {'loss': 0.9784, 'grad_norm': 48.343841552734375, 'learning_rate': 5.363104864490034e-08, 'fcm_dpo/beta': 0.0010204364079982042, 'fcm_dpo/q_t': 0.3694334626197815, 'fcm_dpo/delta': -0.21537676453590393, 'fcm_dpo/margin': 586.982177734375, 'margin_dpo/margin_mean': 586.982177734375, 'margin_dpo/margin_std': 677.5487060546875, 'logps/chosen': -597.8224487304688, 'logps/rejected': -1227.0120849609375, 'logps/ref_chosen': -62.35459899902344, 'logps/ref_rejected': -104.56210327148438, 'KL/chosen_KL_mean': -535.4678344726562, 'KL/rejected_KL_mean': -1122.449951171875, 'KL/mean': -828.958984375, 'KL/std': 597.85693359375, 'logits/chosen': -0.937026858329773, 'logits/rejected': -0.9775291681289673, 'epoch': 0.81} + 81%|████████ | 552/681 [23:27<05:17, 2.46s/it] 81%|████████ | 553/681 [23:30<05:19, 2.50s/it] {'loss': 1.168, 'grad_norm': 28.72612762451172, 'learning_rate': 5.2839396041230415e-08, 'fcm_dpo/beta': 0.0010282043367624283, 'fcm_dpo/q_t': 0.43020299077033997, 'fcm_dpo/delta': 0.08580633997917175, 'fcm_dpo/margin': 308.27679443359375, 'margin_dpo/margin_mean': 308.27679443359375, 'margin_dpo/margin_std': 553.154541015625, 'logps/chosen': -691.1805419921875, 'logps/rejected': -1029.295654296875, 'logps/ref_chosen': -68.25881958007812, 'logps/ref_rejected': -98.0971450805664, 'KL/chosen_KL_mean': -622.9216918945312, 'KL/rejected_KL_mean': -931.198486328125, 'KL/mean': -777.06005859375, 'KL/std': 503.83489990234375, 'logits/chosen': -0.9406133890151978, 'logits/rejected': -0.9367384910583496, 'epoch': 0.81} + 81%|████████ | 553/681 [23:30<05:19, 2.50s/it] 81%|████████▏ | 554/681 [23:32<05:26, 2.57s/it] {'loss': 1.1295, 'grad_norm': 94.50716400146484, 'learning_rate': 5.205293880283551e-08, 'fcm_dpo/beta': 0.00103902374394238, 'fcm_dpo/q_t': 0.4067898094654083, 'fcm_dpo/delta': -0.029413558542728424, 'fcm_dpo/margin': 411.3079833984375, 'margin_dpo/margin_mean': 411.3079833984375, 'margin_dpo/margin_std': 707.3028564453125, 'logps/chosen': -722.765380859375, 'logps/rejected': -1155.908447265625, 'logps/ref_chosen': -67.94767761230469, 'logps/ref_rejected': -89.78272247314453, 'KL/chosen_KL_mean': -654.8176879882812, 'KL/rejected_KL_mean': -1066.125732421875, 'KL/mean': -860.4717407226562, 'KL/std': 554.7606811523438, 'logits/chosen': -0.9185788035392761, 'logits/rejected': -0.8953433036804199, 'epoch': 0.81} + 81%|████████▏ | 554/681 [23:33<05:26, 2.57s/it] 81%|████████▏ | 555/681 [23:35<05:19, 2.53s/it] {'loss': 1.0765, 'grad_norm': 39.124813079833984, 'learning_rate': 5.127169765359515e-08, 'fcm_dpo/beta': 0.0010173844639211893, 'fcm_dpo/q_t': 0.39459365606307983, 'fcm_dpo/delta': -0.09548080712556839, 'fcm_dpo/margin': 482.5062255859375, 'margin_dpo/margin_mean': 482.5062255859375, 'margin_dpo/margin_std': 734.37890625, 'logps/chosen': -709.71630859375, 'logps/rejected': -1247.3714599609375, 'logps/ref_chosen': -53.33049011230469, 'logps/ref_rejected': -108.47937774658203, 'KL/chosen_KL_mean': -656.3858642578125, 'KL/rejected_KL_mean': -1138.89208984375, 'KL/mean': -897.638916015625, 'KL/std': 570.92724609375, 'logits/chosen': -0.9555931687355042, 'logits/rejected': -1.0082941055297852, 'epoch': 0.81} + 81%|████████▏ | 555/681 [23:35<05:19, 2.53s/it] 82%|████████▏ | 556/681 [23:38<05:23, 2.59s/it] {'loss': 1.1551, 'grad_norm': 35.36925506591797, 'learning_rate': 5.049569317994012e-08, 'fcm_dpo/beta': 0.0010262987343594432, 'fcm_dpo/q_t': 0.4302240312099457, 'fcm_dpo/delta': 0.10118204355239868, 'fcm_dpo/margin': 294.1011047363281, 'margin_dpo/margin_mean': 294.1011047363281, 'margin_dpo/margin_std': 439.78289794921875, 'logps/chosen': -680.3403930664062, 'logps/rejected': -1017.137451171875, 'logps/ref_chosen': -58.64447021484375, 'logps/ref_rejected': -101.34040832519531, 'KL/chosen_KL_mean': -621.6959228515625, 'KL/rejected_KL_mean': -915.7969970703125, 'KL/mean': -768.7464599609375, 'KL/std': 436.3918762207031, 'logits/chosen': -0.9388109445571899, 'logits/rejected': -0.9352363348007202, 'epoch': 0.82} + 82%|████████▏ | 556/681 [23:38<05:23, 2.59s/it] 82%|████████▏ | 557/681 [23:40<05:21, 2.60s/it] {'loss': 1.1, 'grad_norm': 69.26335906982422, 'learning_rate': 4.9724945830310144e-08, 'fcm_dpo/beta': 0.0010207702871412039, 'fcm_dpo/q_t': 0.401253342628479, 'fcm_dpo/delta': -0.05446251481771469, 'fcm_dpo/margin': 442.61669921875, 'margin_dpo/margin_mean': 442.61669921875, 'margin_dpo/margin_std': 697.1099853515625, 'logps/chosen': -764.374267578125, 'logps/rejected': -1249.090087890625, 'logps/ref_chosen': -67.84066009521484, 'logps/ref_rejected': -109.93965911865234, 'KL/chosen_KL_mean': -696.53369140625, 'KL/rejected_KL_mean': -1139.150390625, 'KL/mean': -917.8419799804688, 'KL/std': 616.8480224609375, 'logits/chosen': -0.9779127836227417, 'logits/rejected': -1.0124623775482178, 'epoch': 0.82} + 82%|████████▏ | 557/681 [23:40<05:21, 2.60s/it] 82%|████████▏ | 558/681 [23:43<05:17, 2.58s/it] {'loss': 0.9776, 'grad_norm': 35.57923126220703, 'learning_rate': 4.8959475914614554e-08, 'fcm_dpo/beta': 0.0009917229181155562, 'fcm_dpo/q_t': 0.3662768006324768, 'fcm_dpo/delta': -0.19064576923847198, 'fcm_dpo/margin': 584.4525146484375, 'margin_dpo/margin_mean': 584.4525146484375, 'margin_dpo/margin_std': 628.419677734375, 'logps/chosen': -686.46484375, 'logps/rejected': -1310.710205078125, 'logps/ref_chosen': -62.36824035644531, 'logps/ref_rejected': -102.16102600097656, 'KL/chosen_KL_mean': -624.0965576171875, 'KL/rejected_KL_mean': -1208.549072265625, 'KL/mean': -916.3228759765625, 'KL/std': 568.7822265625, 'logits/chosen': -1.0224618911743164, 'logits/rejected': -1.0410199165344238, 'epoch': 0.82} + 82%|████████▏ | 558/681 [23:43<05:17, 2.58s/it] 82%|████████▏ | 559/681 [23:45<05:13, 2.57s/it] {'loss': 1.0485, 'grad_norm': 28.913423538208008, 'learning_rate': 4.8199303603697614e-08, 'fcm_dpo/beta': 0.000971162342466414, 'fcm_dpo/q_t': 0.39221078157424927, 'fcm_dpo/delta': -0.08387550711631775, 'fcm_dpo/margin': 494.1765441894531, 'margin_dpo/margin_mean': 494.1765441894531, 'margin_dpo/margin_std': 634.8758544921875, 'logps/chosen': -763.1143798828125, 'logps/rejected': -1289.9808349609375, 'logps/ref_chosen': -60.752323150634766, 'logps/ref_rejected': -93.44229125976562, 'KL/chosen_KL_mean': -702.362060546875, 'KL/rejected_KL_mean': -1196.53857421875, 'KL/mean': -949.4503173828125, 'KL/std': 592.9994506835938, 'logits/chosen': -1.1046611070632935, 'logits/rejected': -1.1130573749542236, 'epoch': 0.82} + 82%|████████▏ | 559/681 [23:45<05:13, 2.57s/it] 82%|████████▏ | 560/681 [23:48<05:17, 2.62s/it] {'loss': 1.1422, 'grad_norm': 29.267030715942383, 'learning_rate': 4.7444448928806615e-08, 'fcm_dpo/beta': 0.0009687429992482066, 'fcm_dpo/q_t': 0.42102909088134766, 'fcm_dpo/delta': 0.05413452535867691, 'fcm_dpo/margin': 358.7589111328125, 'margin_dpo/margin_mean': 358.7589111328125, 'margin_dpo/margin_std': 570.3694458007812, 'logps/chosen': -704.2850952148438, 'logps/rejected': -1084.931396484375, 'logps/ref_chosen': -58.10382080078125, 'logps/ref_rejected': -79.99122619628906, 'KL/chosen_KL_mean': -646.1812744140625, 'KL/rejected_KL_mean': -1004.9402465820312, 'KL/mean': -825.5607299804688, 'KL/std': 523.7559204101562, 'logits/chosen': -0.9093506336212158, 'logits/rejected': -0.8941901326179504, 'epoch': 0.82} + 82%|████████▏ | 560/681 [23:48<05:17, 2.62s/it] 82%|████████▏ | 561/681 [23:51<05:09, 2.58s/it] {'loss': 1.2066, 'grad_norm': 46.64256286621094, 'learning_rate': 4.669493178106432e-08, 'fcm_dpo/beta': 0.0009945239871740341, 'fcm_dpo/q_t': 0.4313068687915802, 'fcm_dpo/delta': 0.10583681613206863, 'fcm_dpo/margin': 298.46636962890625, 'margin_dpo/margin_mean': 298.46636962890625, 'margin_dpo/margin_std': 644.6820678710938, 'logps/chosen': -795.062255859375, 'logps/rejected': -1141.684326171875, 'logps/ref_chosen': -50.912879943847656, 'logps/ref_rejected': -99.06856536865234, 'KL/chosen_KL_mean': -744.1492919921875, 'KL/rejected_KL_mean': -1042.61572265625, 'KL/mean': -893.382568359375, 'KL/std': 518.4768676757812, 'logits/chosen': -1.0665897130966187, 'logits/rejected': -1.0877900123596191, 'epoch': 0.82} + 82%|████████▏ | 561/681 [23:51<05:09, 2.58s/it] 83%|████████▎ | 562/681 [23:53<05:10, 2.61s/it] {'loss': 1.0985, 'grad_norm': 40.539154052734375, 'learning_rate': 4.5950771910944596e-08, 'fcm_dpo/beta': 0.0009885327890515327, 'fcm_dpo/q_t': 0.4067729115486145, 'fcm_dpo/delta': -0.02009068801999092, 'fcm_dpo/margin': 423.4101257324219, 'margin_dpo/margin_mean': 423.41015625, 'margin_dpo/margin_std': 631.9222412109375, 'logps/chosen': -760.7117919921875, 'logps/rejected': -1221.2001953125, 'logps/ref_chosen': -59.46440124511719, 'logps/ref_rejected': -96.54266357421875, 'KL/chosen_KL_mean': -701.2473754882812, 'KL/rejected_KL_mean': -1124.657470703125, 'KL/mean': -912.952392578125, 'KL/std': 575.4769287109375, 'logits/chosen': -1.0031108856201172, 'logits/rejected': -1.0146968364715576, 'epoch': 0.83} + 83%|████████▎ | 562/681 [23:53<05:10, 2.61s/it] 83%|████████▎ | 563/681 [23:56<04:59, 2.53s/it] {'loss': 1.2224, 'grad_norm': 46.25846481323242, 'learning_rate': 4.521198892775202e-08, 'fcm_dpo/beta': 0.0009880930883809924, 'fcm_dpo/q_t': 0.42376065254211426, 'fcm_dpo/delta': -0.04907416179776192, 'fcm_dpo/margin': 314.54473876953125, 'margin_dpo/margin_mean': 314.54473876953125, 'margin_dpo/margin_std': 707.6341552734375, 'logps/chosen': -828.5675659179688, 'logps/rejected': -1177.07177734375, 'logps/ref_chosen': -60.60819625854492, 'logps/ref_rejected': -94.56770324707031, 'KL/chosen_KL_mean': -767.9593505859375, 'KL/rejected_KL_mean': -1082.504150390625, 'KL/mean': -925.231689453125, 'KL/std': 606.1444091796875, 'logits/chosen': -0.9742862582206726, 'logits/rejected': -0.9843175411224365, 'epoch': 0.83} + 83%|████████▎ | 563/681 [23:56<04:59, 2.53s/it] 83%|████████▎ | 564/681 [23:58<04:54, 2.51s/it] {'loss': 1.091, 'grad_norm': 47.38982009887695, 'learning_rate': 4.447860229910544e-08, 'fcm_dpo/beta': 0.0009863328887149692, 'fcm_dpo/q_t': 0.40960630774497986, 'fcm_dpo/delta': 0.0033044088631868362, 'fcm_dpo/margin': 402.17816162109375, 'margin_dpo/margin_mean': 402.17816162109375, 'margin_dpo/margin_std': 517.3626098632812, 'logps/chosen': -759.5974731445312, 'logps/rejected': -1180.7454833984375, 'logps/ref_chosen': -74.26837921142578, 'logps/ref_rejected': -93.23818969726562, 'KL/chosen_KL_mean': -685.3291015625, 'KL/rejected_KL_mean': -1087.50732421875, 'KL/mean': -886.4181518554688, 'KL/std': 537.7996826171875, 'logits/chosen': -1.0377906560897827, 'logits/rejected': -1.0290945768356323, 'epoch': 0.83} + 83%|████████▎ | 564/681 [23:58<04:54, 2.51s/it] 83%|████████▎ | 565/681 [24:01<04:55, 2.55s/it] {'loss': 1.1303, 'grad_norm': 44.25437927246094, 'learning_rate': 4.375063135042445e-08, 'fcm_dpo/beta': 0.0009813096839934587, 'fcm_dpo/q_t': 0.41049522161483765, 'fcm_dpo/delta': -0.015731915831565857, 'fcm_dpo/margin': 422.97857666015625, 'margin_dpo/margin_mean': 422.9785461425781, 'margin_dpo/margin_std': 737.2132568359375, 'logps/chosen': -796.7603759765625, 'logps/rejected': -1236.4979248046875, 'logps/ref_chosen': -69.0199203491211, 'logps/ref_rejected': -85.7789306640625, 'KL/chosen_KL_mean': -727.7404174804688, 'KL/rejected_KL_mean': -1150.718994140625, 'KL/mean': -939.229736328125, 'KL/std': 615.11181640625, 'logits/chosen': -0.9896056652069092, 'logits/rejected': -0.9939931035041809, 'epoch': 0.83} + 83%|████████▎ | 565/681 [24:01<04:55, 2.55s/it] 83%|████████▎ | 566/681 [24:03<04:59, 2.60s/it] {'loss': 1.0977, 'grad_norm': 35.89476013183594, 'learning_rate': 4.3028095264420525e-08, 'fcm_dpo/beta': 0.0009774458594620228, 'fcm_dpo/q_t': 0.3984670639038086, 'fcm_dpo/delta': -0.06289710104465485, 'fcm_dpo/margin': 470.2884216308594, 'margin_dpo/margin_mean': 470.28839111328125, 'margin_dpo/margin_std': 727.6954345703125, 'logps/chosen': -738.927734375, 'logps/rejected': -1246.5401611328125, 'logps/ref_chosen': -66.5453109741211, 'logps/ref_rejected': -103.86932373046875, 'KL/chosen_KL_mean': -672.3824462890625, 'KL/rejected_KL_mean': -1142.6708984375, 'KL/mean': -907.526611328125, 'KL/std': 631.5331420898438, 'logits/chosen': -0.9764306545257568, 'logits/rejected': -1.0030491352081299, 'epoch': 0.83} + 83%|████████▎ | 566/681 [24:03<04:59, 2.60s/it] 83%|████████▎ | 567/681 [24:06<04:54, 2.58s/it] {'loss': 1.1168, 'grad_norm': 37.64247512817383, 'learning_rate': 4.231101308059165e-08, 'fcm_dpo/beta': 0.0009747430449351668, 'fcm_dpo/q_t': 0.41720670461654663, 'fcm_dpo/delta': 0.04814485087990761, 'fcm_dpo/margin': 362.7438659667969, 'margin_dpo/margin_mean': 362.7438659667969, 'margin_dpo/margin_std': 488.75982666015625, 'logps/chosen': -707.17578125, 'logps/rejected': -1102.432373046875, 'logps/ref_chosen': -52.85829544067383, 'logps/ref_rejected': -85.37095642089844, 'KL/chosen_KL_mean': -654.3175048828125, 'KL/rejected_KL_mean': -1017.0613403320312, 'KL/mean': -835.689453125, 'KL/std': 442.9603576660156, 'logits/chosen': -1.0903135538101196, 'logits/rejected': -1.1013944149017334, 'epoch': 0.83} + 83%|████████▎ | 567/681 [24:06<04:54, 2.58s/it] 83%|████████▎ | 568/681 [24:08<04:45, 2.52s/it] {'loss': 1.0349, 'grad_norm': 32.80691146850586, 'learning_rate': 4.1599403694720145e-08, 'fcm_dpo/beta': 0.0009669936262071133, 'fcm_dpo/q_t': 0.39191970229148865, 'fcm_dpo/delta': -0.06800977885723114, 'fcm_dpo/margin': 480.6161804199219, 'margin_dpo/margin_mean': 480.61614990234375, 'margin_dpo/margin_std': 529.2095947265625, 'logps/chosen': -685.841552734375, 'logps/rejected': -1210.357666015625, 'logps/ref_chosen': -45.1923828125, 'logps/ref_rejected': -89.09236907958984, 'KL/chosen_KL_mean': -640.649169921875, 'KL/rejected_KL_mean': -1121.2652587890625, 'KL/mean': -880.9572143554688, 'KL/std': 509.034912109375, 'logits/chosen': -0.951264500617981, 'logits/rejected': -0.9899559020996094, 'epoch': 0.83} + 83%|████████▎ | 568/681 [24:08<04:45, 2.52s/it] 84%|████████▎ | 569/681 [24:11<04:45, 2.55s/it] {'loss': 1.142, 'grad_norm': 49.85871505737305, 'learning_rate': 4.089328585837512e-08, 'fcm_dpo/beta': 0.0009723026305437088, 'fcm_dpo/q_t': 0.4109431803226471, 'fcm_dpo/delta': 0.008742645382881165, 'fcm_dpo/margin': 401.8387451171875, 'margin_dpo/margin_mean': 401.8387451171875, 'margin_dpo/margin_std': 688.9107055664062, 'logps/chosen': -789.855224609375, 'logps/rejected': -1207.07666015625, 'logps/ref_chosen': -63.72056198120117, 'logps/ref_rejected': -79.10325622558594, 'KL/chosen_KL_mean': -726.1346435546875, 'KL/rejected_KL_mean': -1127.973388671875, 'KL/mean': -927.0540161132812, 'KL/std': 663.6150512695312, 'logits/chosen': -1.000624179840088, 'logits/rejected': -1.0081329345703125, 'epoch': 0.84} + 84%|████████▎ | 569/681 [24:11<04:45, 2.55s/it] 84%|████████▎ | 570/681 [24:14<04:44, 2.56s/it] {'loss': 1.134, 'grad_norm': 30.256921768188477, 'learning_rate': 4.019267817841834e-08, 'fcm_dpo/beta': 0.000971082947216928, 'fcm_dpo/q_t': 0.41922780871391296, 'fcm_dpo/delta': 0.04106369987130165, 'fcm_dpo/margin': 371.16790771484375, 'margin_dpo/margin_mean': 371.16790771484375, 'margin_dpo/margin_std': 586.2235717773438, 'logps/chosen': -741.500244140625, 'logps/rejected': -1133.1954345703125, 'logps/ref_chosen': -61.61454391479492, 'logps/ref_rejected': -82.14186096191406, 'KL/chosen_KL_mean': -679.8856811523438, 'KL/rejected_KL_mean': -1051.053466796875, 'KL/mean': -865.4696044921875, 'KL/std': 531.23974609375, 'logits/chosen': -1.0755581855773926, 'logits/rejected': -1.0673818588256836, 'epoch': 0.84} + 84%|████████▎ | 570/681 [24:14<04:44, 2.56s/it] 84%|████████▍ | 571/681 [24:16<04:43, 2.57s/it] {'loss': 1.103, 'grad_norm': 37.00971221923828, 'learning_rate': 3.9497599116513705e-08, 'fcm_dpo/beta': 0.0009676171466708183, 'fcm_dpo/q_t': 0.4041333794593811, 'fcm_dpo/delta': -0.031879834830760956, 'fcm_dpo/margin': 444.74859619140625, 'margin_dpo/margin_mean': 444.74853515625, 'margin_dpo/margin_std': 689.9317016601562, 'logps/chosen': -757.800537109375, 'logps/rejected': -1240.831787109375, 'logps/ref_chosen': -53.05406188964844, 'logps/ref_rejected': -91.33682250976562, 'KL/chosen_KL_mean': -704.7464599609375, 'KL/rejected_KL_mean': -1149.4949951171875, 'KL/mean': -927.1207275390625, 'KL/std': 556.0410766601562, 'logits/chosen': -0.9698342680931091, 'logits/rejected': -0.984066903591156, 'epoch': 0.84} + 84%|████████▍ | 571/681 [24:16<04:43, 2.57s/it] 84%|████████▍ | 572/681 [24:19<04:33, 2.51s/it] {'loss': 1.1207, 'grad_norm': 33.24798583984375, 'learning_rate': 3.880806698864086e-08, 'fcm_dpo/beta': 0.0009603890357539058, 'fcm_dpo/q_t': 0.40735888481140137, 'fcm_dpo/delta': -0.03531990945339203, 'fcm_dpo/margin': 451.42071533203125, 'margin_dpo/margin_mean': 451.42071533203125, 'margin_dpo/margin_std': 778.9022216796875, 'logps/chosen': -808.8663330078125, 'logps/rejected': -1295.384765625, 'logps/ref_chosen': -48.45928955078125, 'logps/ref_rejected': -83.55703735351562, 'KL/chosen_KL_mean': -760.4070434570312, 'KL/rejected_KL_mean': -1211.82763671875, 'KL/mean': -986.117431640625, 'KL/std': 642.9850463867188, 'logits/chosen': -1.01137113571167, 'logits/rejected': -1.0419948101043701, 'epoch': 0.84} + 84%|████████▍ | 572/681 [24:19<04:33, 2.51s/it] 84%|████████▍ | 573/681 [24:21<04:22, 2.43s/it] {'loss': 1.0995, 'grad_norm': 30.2346248626709, 'learning_rate': 3.812409996461275e-08, 'fcm_dpo/beta': 0.0009635947062633932, 'fcm_dpo/q_t': 0.4119398593902588, 'fcm_dpo/delta': 0.008085294626653194, 'fcm_dpo/margin': 407.01580810546875, 'margin_dpo/margin_mean': 407.01580810546875, 'margin_dpo/margin_std': 571.386474609375, 'logps/chosen': -790.063232421875, 'logps/rejected': -1230.7813720703125, 'logps/ref_chosen': -51.62262725830078, 'logps/ref_rejected': -85.32499694824219, 'KL/chosen_KL_mean': -738.4405517578125, 'KL/rejected_KL_mean': -1145.4564208984375, 'KL/mean': -941.948486328125, 'KL/std': 572.2998657226562, 'logits/chosen': -1.0823559761047363, 'logits/rejected': -1.0929925441741943, 'epoch': 0.84} + 84%|████████▍ | 573/681 [24:21<04:22, 2.43s/it] 84%|████████▍ | 574/681 [24:23<04:28, 2.51s/it] {'loss': 1.0826, 'grad_norm': 39.73578643798828, 'learning_rate': 3.74457160675965e-08, 'fcm_dpo/beta': 0.0009609279222786427, 'fcm_dpo/q_t': 0.4030435085296631, 'fcm_dpo/delta': -0.02838175743818283, 'fcm_dpo/margin': 444.5045166015625, 'margin_dpo/margin_mean': 444.5045166015625, 'margin_dpo/margin_std': 604.4505004882812, 'logps/chosen': -720.0551147460938, 'logps/rejected': -1206.321533203125, 'logps/ref_chosen': -51.04446029663086, 'logps/ref_rejected': -92.80640411376953, 'KL/chosen_KL_mean': -669.0106201171875, 'KL/rejected_KL_mean': -1113.51513671875, 'KL/mean': -891.2628784179688, 'KL/std': 523.285888671875, 'logits/chosen': -1.0616734027862549, 'logits/rejected': -1.087287187576294, 'epoch': 0.84} + 84%|████████▍ | 574/681 [24:24<04:28, 2.51s/it] 84%|████████▍ | 575/681 [24:26<04:29, 2.55s/it] {'loss': 1.1349, 'grad_norm': 34.28059005737305, 'learning_rate': 3.677293317363864e-08, 'fcm_dpo/beta': 0.0009517880389466882, 'fcm_dpo/q_t': 0.41225284337997437, 'fcm_dpo/delta': 0.01296766847372055, 'fcm_dpo/margin': 405.9783020019531, 'margin_dpo/margin_mean': 405.97833251953125, 'margin_dpo/margin_std': 661.7513427734375, 'logps/chosen': -787.5184326171875, 'logps/rejected': -1217.0927734375, 'logps/ref_chosen': -71.7901382446289, 'logps/ref_rejected': -95.38619995117188, 'KL/chosen_KL_mean': -715.7283325195312, 'KL/rejected_KL_mean': -1121.70654296875, 'KL/mean': -918.717529296875, 'KL/std': 521.6762084960938, 'logits/chosen': -0.95893394947052, 'logits/rejected': -0.9681143760681152, 'epoch': 0.84} + 84%|████████▍ | 575/681 [24:26<04:29, 2.55s/it] 85%|████████▍ | 576/681 [24:28<04:21, 2.49s/it] {'loss': 1.1802, 'grad_norm': 37.09640884399414, 'learning_rate': 3.6105769011194224e-08, 'fcm_dpo/beta': 0.0009712062310427427, 'fcm_dpo/q_t': 0.4315390884876251, 'fcm_dpo/delta': 0.10144974291324615, 'fcm_dpo/margin': 310.6585998535156, 'margin_dpo/margin_mean': 310.6585998535156, 'margin_dpo/margin_std': 565.0277709960938, 'logps/chosen': -754.484130859375, 'logps/rejected': -1111.634033203125, 'logps/ref_chosen': -54.262962341308594, 'logps/ref_rejected': -100.75428009033203, 'KL/chosen_KL_mean': -700.2211303710938, 'KL/rejected_KL_mean': -1010.8797607421875, 'KL/mean': -855.5504150390625, 'KL/std': 483.954345703125, 'logits/chosen': -1.0683985948562622, 'logits/rejected': -1.0978965759277344, 'epoch': 0.85} + 85%|████████▍ | 576/681 [24:29<04:21, 2.49s/it] 85%|████████▍ | 577/681 [24:31<04:17, 2.47s/it] {'loss': 1.1141, 'grad_norm': 28.927133560180664, 'learning_rate': 3.5444241160659304e-08, 'fcm_dpo/beta': 0.0009785243310034275, 'fcm_dpo/q_t': 0.4103270471096039, 'fcm_dpo/delta': 0.0017292937263846397, 'fcm_dpo/margin': 407.0462341308594, 'margin_dpo/margin_mean': 407.0462646484375, 'margin_dpo/margin_std': 594.7308959960938, 'logps/chosen': -679.371337890625, 'logps/rejected': -1108.57861328125, 'logps/ref_chosen': -61.909706115722656, 'logps/ref_rejected': -84.07069396972656, 'KL/chosen_KL_mean': -617.461669921875, 'KL/rejected_KL_mean': -1024.5079345703125, 'KL/mean': -820.9848022460938, 'KL/std': 550.1454467773438, 'logits/chosen': -1.0405795574188232, 'logits/rejected': -1.0324490070343018, 'epoch': 0.85} + 85%|████████▍ | 577/681 [24:31<04:17, 2.47s/it] 85%|████████▍ | 578/681 [24:34<04:19, 2.52s/it] {'loss': 1.073, 'grad_norm': 37.58174133300781, 'learning_rate': 3.478836705390808e-08, 'fcm_dpo/beta': 0.0009709839941933751, 'fcm_dpo/q_t': 0.4028467535972595, 'fcm_dpo/delta': -0.02570383995771408, 'fcm_dpo/margin': 436.5827331542969, 'margin_dpo/margin_mean': 436.5827331542969, 'margin_dpo/margin_std': 540.6303100585938, 'logps/chosen': -662.6415405273438, 'logps/rejected': -1133.396728515625, 'logps/ref_chosen': -49.26368713378906, 'logps/ref_rejected': -83.4362564086914, 'KL/chosen_KL_mean': -613.3778076171875, 'KL/rejected_KL_mean': -1049.9605712890625, 'KL/mean': -831.669189453125, 'KL/std': 529.0792236328125, 'logits/chosen': -0.949306845664978, 'logits/rejected': -0.9812426567077637, 'epoch': 0.85} + 85%|████████▍ | 578/681 [24:34<04:19, 2.52s/it] 85%|████████▌ | 579/681 [24:36<04:24, 2.59s/it] {'loss': 1.2246, 'grad_norm': 57.85899353027344, 'learning_rate': 3.41381639738331e-08, 'fcm_dpo/beta': 0.0009836649987846613, 'fcm_dpo/q_t': 0.43926477432250977, 'fcm_dpo/delta': 0.0445760153234005, 'fcm_dpo/margin': 266.721923828125, 'margin_dpo/margin_mean': 266.721923828125, 'margin_dpo/margin_std': 599.7505493164062, 'logps/chosen': -782.810791015625, 'logps/rejected': -1085.4345703125, 'logps/ref_chosen': -58.88581848144531, 'logps/ref_rejected': -94.78762817382812, 'KL/chosen_KL_mean': -723.9249267578125, 'KL/rejected_KL_mean': -990.6468505859375, 'KL/mean': -857.285888671875, 'KL/std': 545.2241821289062, 'logits/chosen': -1.0406593084335327, 'logits/rejected': -1.0456761121749878, 'epoch': 0.85} + 85%|████████▌ | 579/681 [24:36<04:24, 2.59s/it] 85%|████████▌ | 580/681 [24:39<04:22, 2.60s/it] {'loss': 1.0505, 'grad_norm': 28.836139678955078, 'learning_rate': 3.349364905389032e-08, 'fcm_dpo/beta': 0.0009660617797635496, 'fcm_dpo/q_t': 0.39319556951522827, 'fcm_dpo/delta': -0.09629727900028229, 'fcm_dpo/margin': 508.2030029296875, 'margin_dpo/margin_mean': 508.2029724121094, 'margin_dpo/margin_std': 699.3994140625, 'logps/chosen': -572.5047607421875, 'logps/rejected': -1113.75927734375, 'logps/ref_chosen': -48.70683670043945, 'logps/ref_rejected': -81.7583999633789, 'KL/chosen_KL_mean': -523.7979125976562, 'KL/rejected_KL_mean': -1032.0008544921875, 'KL/mean': -777.8994140625, 'KL/std': 608.378662109375, 'logits/chosen': -0.8956875205039978, 'logits/rejected': -0.930424153804779, 'epoch': 0.85} + 85%|████████▌ | 580/681 [24:39<04:22, 2.60s/it] 85%|████████▌ | 581/681 [24:41<04:15, 2.55s/it] {'loss': 1.1609, 'grad_norm': 42.403324127197266, 'learning_rate': 3.285483927764726e-08, 'fcm_dpo/beta': 0.0009752740152180195, 'fcm_dpo/q_t': 0.4253264367580414, 'fcm_dpo/delta': 0.06534610688686371, 'fcm_dpo/margin': 345.31573486328125, 'margin_dpo/margin_mean': 345.31573486328125, 'margin_dpo/margin_std': 624.8870849609375, 'logps/chosen': -781.6790161132812, 'logps/rejected': -1156.508056640625, 'logps/ref_chosen': -62.22235107421875, 'logps/ref_rejected': -91.73568725585938, 'KL/chosen_KL_mean': -719.4566650390625, 'KL/rejected_KL_mean': -1064.7723388671875, 'KL/mean': -892.114501953125, 'KL/std': 567.6749877929688, 'logits/chosen': -1.105149269104004, 'logits/rejected': -1.113175630569458, 'epoch': 0.85} + 85%|████████▌ | 581/681 [24:41<04:15, 2.55s/it] 85%|████████▌ | 582/681 [24:44<04:14, 2.57s/it] {'loss': 1.1048, 'grad_norm': 30.787132263183594, 'learning_rate': 3.222175147833556e-08, 'fcm_dpo/beta': 0.000987016363069415, 'fcm_dpo/q_t': 0.4085530638694763, 'fcm_dpo/delta': 0.00210629403591156, 'fcm_dpo/margin': 401.499267578125, 'margin_dpo/margin_mean': 401.499267578125, 'margin_dpo/margin_std': 544.4295654296875, 'logps/chosen': -686.2553100585938, 'logps/rejected': -1139.595458984375, 'logps/ref_chosen': -58.228660583496094, 'logps/ref_rejected': -110.06959533691406, 'KL/chosen_KL_mean': -628.026611328125, 'KL/rejected_KL_mean': -1029.52587890625, 'KL/mean': -828.7762451171875, 'KL/std': 499.4111022949219, 'logits/chosen': -1.0466606616973877, 'logits/rejected': -1.0699677467346191, 'epoch': 0.85} + 85%|████████▌ | 582/681 [24:44<04:14, 2.57s/it] 86%|████████▌ | 583/681 [24:47<04:15, 2.60s/it] {'loss': 1.2576, 'grad_norm': 57.56175231933594, 'learning_rate': 3.159440233840763e-08, 'fcm_dpo/beta': 0.0009754466009326279, 'fcm_dpo/q_t': 0.44551074504852295, 'fcm_dpo/delta': -0.013105042278766632, 'fcm_dpo/margin': 249.6529998779297, 'margin_dpo/margin_mean': 249.65298461914062, 'margin_dpo/margin_std': 645.1383056640625, 'logps/chosen': -777.570068359375, 'logps/rejected': -1058.76416015625, 'logps/ref_chosen': -56.86286163330078, 'logps/ref_rejected': -88.4039306640625, 'KL/chosen_KL_mean': -720.7072143554688, 'KL/rejected_KL_mean': -970.3602905273438, 'KL/mean': -845.5337524414062, 'KL/std': 540.9931640625, 'logits/chosen': -0.9852885007858276, 'logits/rejected': -0.9814597368240356, 'epoch': 0.86} + 86%|████████▌ | 583/681 [24:47<04:15, 2.60s/it] 86%|████████▌ | 584/681 [24:49<04:07, 2.55s/it] {'loss': 1.0435, 'grad_norm': 40.76878356933594, 'learning_rate': 3.0972808389096635e-08, 'fcm_dpo/beta': 0.0009648328414186835, 'fcm_dpo/q_t': 0.3933956027030945, 'fcm_dpo/delta': -0.07715471088886261, 'fcm_dpo/margin': 490.79443359375, 'margin_dpo/margin_mean': 490.79443359375, 'margin_dpo/margin_std': 595.7258911132812, 'logps/chosen': -681.8408203125, 'logps/rejected': -1213.37060546875, 'logps/ref_chosen': -56.90068054199219, 'logps/ref_rejected': -97.63606262207031, 'KL/chosen_KL_mean': -624.9401245117188, 'KL/rejected_KL_mean': -1115.734619140625, 'KL/mean': -870.3372802734375, 'KL/std': 560.4860229492188, 'logits/chosen': -1.0293495655059814, 'logits/rejected': -1.0419096946716309, 'epoch': 0.86} + 86%|████████▌ | 584/681 [24:49<04:07, 2.55s/it] 86%|████████▌ | 585/681 [24:52<04:02, 2.52s/it] {'loss': 1.1181, 'grad_norm': 35.20669937133789, 'learning_rate': 3.035698600998121e-08, 'fcm_dpo/beta': 0.0009544256026856601, 'fcm_dpo/q_t': 0.40594780445098877, 'fcm_dpo/delta': -0.023169085383415222, 'fcm_dpo/margin': 442.110595703125, 'margin_dpo/margin_mean': 442.110595703125, 'margin_dpo/margin_std': 730.5751953125, 'logps/chosen': -774.8123168945312, 'logps/rejected': -1240.118408203125, 'logps/ref_chosen': -60.973968505859375, 'logps/ref_rejected': -84.16952514648438, 'KL/chosen_KL_mean': -713.83837890625, 'KL/rejected_KL_mean': -1155.948974609375, 'KL/mean': -934.8936157226562, 'KL/std': 615.5269775390625, 'logits/chosen': -1.0367913246154785, 'logits/rejected': -1.058849811553955, 'epoch': 0.86} + 86%|████████▌ | 585/681 [24:52<04:02, 2.52s/it] 86%|████████▌ | 586/681 [24:54<03:59, 2.52s/it] {'loss': 1.2039, 'grad_norm': 31.211702346801758, 'learning_rate': 2.974695142855388e-08, 'fcm_dpo/beta': 0.0009719742811284959, 'fcm_dpo/q_t': 0.4358428120613098, 'fcm_dpo/delta': 0.12220651656389236, 'fcm_dpo/margin': 289.494140625, 'margin_dpo/margin_mean': 289.494140625, 'margin_dpo/margin_std': 594.351806640625, 'logps/chosen': -803.76513671875, 'logps/rejected': -1128.206298828125, 'logps/ref_chosen': -56.85559844970703, 'logps/ref_rejected': -91.80261993408203, 'KL/chosen_KL_mean': -746.9095458984375, 'KL/rejected_KL_mean': -1036.403564453125, 'KL/mean': -891.6566162109375, 'KL/std': 537.6516723632812, 'logits/chosen': -1.0327489376068115, 'logits/rejected': -1.0522578954696655, 'epoch': 0.86} + 86%|████████▌ | 586/681 [24:54<03:59, 2.52s/it] 86%|████████▌ | 587/681 [24:57<03:58, 2.53s/it] {'loss': 1.0918, 'grad_norm': 38.273529052734375, 'learning_rate': 2.9142720719793122e-08, 'fcm_dpo/beta': 0.0009805042063817382, 'fcm_dpo/q_t': 0.40643125772476196, 'fcm_dpo/delta': -0.008139118552207947, 'fcm_dpo/margin': 415.85150146484375, 'margin_dpo/margin_mean': 415.85150146484375, 'margin_dpo/margin_std': 569.570068359375, 'logps/chosen': -551.4901733398438, 'logps/rejected': -1005.27392578125, 'logps/ref_chosen': -44.69159698486328, 'logps/ref_rejected': -82.62385559082031, 'KL/chosen_KL_mean': -506.798583984375, 'KL/rejected_KL_mean': -922.6500244140625, 'KL/mean': -714.7243041992188, 'KL/std': 565.3491821289062, 'logits/chosen': -1.03069269657135, 'logits/rejected': -1.058362603187561, 'epoch': 0.86} + 86%|████████▌ | 587/681 [24:57<03:58, 2.53s/it] 86%|████████▋ | 588/681 [24:59<03:49, 2.47s/it] {'loss': 1.1364, 'grad_norm': 31.023210525512695, 'learning_rate': 2.8544309805740018e-08, 'fcm_dpo/beta': 0.000982759054750204, 'fcm_dpo/q_t': 0.4209359884262085, 'fcm_dpo/delta': 0.061286523938179016, 'fcm_dpo/margin': 346.6522216796875, 'margin_dpo/margin_mean': 346.6522521972656, 'margin_dpo/margin_std': 510.10968017578125, 'logps/chosen': -714.8203735351562, 'logps/rejected': -1118.5474853515625, 'logps/ref_chosen': -50.29494857788086, 'logps/ref_rejected': -107.36988067626953, 'KL/chosen_KL_mean': -664.525390625, 'KL/rejected_KL_mean': -1011.1776123046875, 'KL/mean': -837.8515014648438, 'KL/std': 480.2796630859375, 'logits/chosen': -1.0278799533843994, 'logits/rejected': -1.05256187915802, 'epoch': 0.86} + 86%|████████▋ | 588/681 [24:59<03:49, 2.47s/it] 86%|████████▋ | 589/681 [25:01<03:48, 2.48s/it] {'loss': 1.0576, 'grad_norm': 30.869823455810547, 'learning_rate': 2.7951734455078786e-08, 'fcm_dpo/beta': 0.0009825675515457988, 'fcm_dpo/q_t': 0.3966492712497711, 'fcm_dpo/delta': -0.05569233000278473, 'fcm_dpo/margin': 461.232177734375, 'margin_dpo/margin_mean': 461.2321472167969, 'margin_dpo/margin_std': 579.5751953125, 'logps/chosen': -733.4117431640625, 'logps/rejected': -1246.369384765625, 'logps/ref_chosen': -59.929908752441406, 'logps/ref_rejected': -111.65534973144531, 'KL/chosen_KL_mean': -673.4818115234375, 'KL/rejected_KL_mean': -1134.7139892578125, 'KL/mean': -904.097900390625, 'KL/std': 546.526611328125, 'logits/chosen': -0.9944198131561279, 'logits/rejected': -1.005649447441101, 'epoch': 0.86} + 86%|████████▋ | 589/681 [25:01<03:48, 2.48s/it] 87%|████████▋ | 590/681 [25:04<03:43, 2.45s/it] {'loss': 1.0653, 'grad_norm': 33.35025405883789, 'learning_rate': 2.736501028272095e-08, 'fcm_dpo/beta': 0.0009721757378429174, 'fcm_dpo/q_t': 0.40020644664764404, 'fcm_dpo/delta': -0.042073942720890045, 'fcm_dpo/margin': 452.7629089355469, 'margin_dpo/margin_mean': 452.762939453125, 'margin_dpo/margin_std': 576.941162109375, 'logps/chosen': -632.8732299804688, 'logps/rejected': -1135.88916015625, 'logps/ref_chosen': -55.80979537963867, 'logps/ref_rejected': -106.06282043457031, 'KL/chosen_KL_mean': -577.0634765625, 'KL/rejected_KL_mean': -1029.8262939453125, 'KL/mean': -803.4448852539062, 'KL/std': 528.4348754882812, 'logits/chosen': -0.9723612070083618, 'logits/rejected': -1.0005714893341064, 'epoch': 0.87} + 87%|████████▋ | 590/681 [25:04<03:43, 2.45s/it] 87%|████████▋ | 591/681 [25:06<03:32, 2.36s/it] {'loss': 1.1021, 'grad_norm': 34.23089599609375, 'learning_rate': 2.678415274939408e-08, 'fcm_dpo/beta': 0.0009712845785543323, 'fcm_dpo/q_t': 0.40842798352241516, 'fcm_dpo/delta': 0.003869034815579653, 'fcm_dpo/margin': 407.99945068359375, 'margin_dpo/margin_mean': 407.9994812011719, 'margin_dpo/margin_std': 576.1143798828125, 'logps/chosen': -705.963623046875, 'logps/rejected': -1141.5087890625, 'logps/ref_chosen': -56.24061965942383, 'logps/ref_rejected': -83.78629302978516, 'KL/chosen_KL_mean': -649.7230224609375, 'KL/rejected_KL_mean': -1057.722412109375, 'KL/mean': -853.7227783203125, 'KL/std': 513.365478515625, 'logits/chosen': -1.0266298055648804, 'logits/rejected': -1.0202120542526245, 'epoch': 0.87} + 87%|████████▋ | 591/681 [25:06<03:32, 2.36s/it] 87%|████████▋ | 592/681 [25:09<03:35, 2.43s/it] {'loss': 1.1739, 'grad_norm': 36.562538146972656, 'learning_rate': 2.6209177161234442e-08, 'fcm_dpo/beta': 0.0009767988231033087, 'fcm_dpo/q_t': 0.4183656573295593, 'fcm_dpo/delta': 0.0378945954144001, 'fcm_dpo/margin': 372.12994384765625, 'margin_dpo/margin_mean': 372.1299133300781, 'margin_dpo/margin_std': 716.3402099609375, 'logps/chosen': -728.2474365234375, 'logps/rejected': -1128.169921875, 'logps/ref_chosen': -47.94025421142578, 'logps/ref_rejected': -75.73287963867188, 'KL/chosen_KL_mean': -680.3071899414062, 'KL/rejected_KL_mean': -1052.4371337890625, 'KL/mean': -866.3721923828125, 'KL/std': 528.517822265625, 'logits/chosen': -1.0103018283843994, 'logits/rejected': -1.0133998394012451, 'epoch': 0.87} + 87%|████████▋ | 592/681 [25:09<03:35, 2.43s/it] 87%|████████▋ | 593/681 [25:11<03:38, 2.48s/it] {'loss': 1.2122, 'grad_norm': 52.18275451660156, 'learning_rate': 2.564009866938349e-08, 'fcm_dpo/beta': 0.000984064768999815, 'fcm_dpo/q_t': 0.4358568787574768, 'fcm_dpo/delta': 0.02495434135198593, 'fcm_dpo/margin': 292.0050048828125, 'margin_dpo/margin_mean': 292.0050048828125, 'margin_dpo/margin_std': 635.2003173828125, 'logps/chosen': -692.62353515625, 'logps/rejected': -996.845703125, 'logps/ref_chosen': -48.690757751464844, 'logps/ref_rejected': -60.90800094604492, 'KL/chosen_KL_mean': -643.9327392578125, 'KL/rejected_KL_mean': -935.937744140625, 'KL/mean': -789.9352416992188, 'KL/std': 571.6775512695312, 'logits/chosen': -0.895799994468689, 'logits/rejected': -0.8876699209213257, 'epoch': 0.87} + 87%|████████▋ | 593/681 [25:11<03:38, 2.48s/it] 87%|████████▋ | 594/681 [25:14<03:35, 2.48s/it] {'loss': 1.1384, 'grad_norm': 37.190494537353516, 'learning_rate': 2.5076932269588708e-08, 'fcm_dpo/beta': 0.000995452981442213, 'fcm_dpo/q_t': 0.4163949191570282, 'fcm_dpo/delta': 0.030517850071191788, 'fcm_dpo/margin': 371.14691162109375, 'margin_dpo/margin_mean': 371.1469421386719, 'margin_dpo/margin_std': 597.141845703125, 'logps/chosen': -686.9970703125, 'logps/rejected': -1089.308837890625, 'logps/ref_chosen': -54.93488693237305, 'logps/ref_rejected': -86.09967803955078, 'KL/chosen_KL_mean': -632.062255859375, 'KL/rejected_KL_mean': -1003.2091064453125, 'KL/mean': -817.6357421875, 'KL/std': 560.24951171875, 'logits/chosen': -0.9844435453414917, 'logits/rejected': -0.9757124185562134, 'epoch': 0.87} + 87%|████████▋ | 594/681 [25:14<03:35, 2.48s/it] 87%|████████▋ | 595/681 [25:16<03:36, 2.52s/it] {'loss': 1.0916, 'grad_norm': 42.089027404785156, 'learning_rate': 2.451969280180849e-08, 'fcm_dpo/beta': 0.0009872771333903074, 'fcm_dpo/q_t': 0.4093300700187683, 'fcm_dpo/delta': -0.008713661693036556, 'fcm_dpo/margin': 413.5597229003906, 'margin_dpo/margin_mean': 413.5596923828125, 'margin_dpo/margin_std': 573.5089721679688, 'logps/chosen': -626.1961669921875, 'logps/rejected': -1070.962646484375, 'logps/ref_chosen': -49.4204216003418, 'logps/ref_rejected': -80.62731170654297, 'KL/chosen_KL_mean': -576.7757568359375, 'KL/rejected_KL_mean': -990.33544921875, 'KL/mean': -783.5555419921875, 'KL/std': 514.779541015625, 'logits/chosen': -0.9605817794799805, 'logits/rejected': -0.979555606842041, 'epoch': 0.87} + 87%|████████▋ | 595/681 [25:16<03:36, 2.52s/it] 88%|████████▊ | 596/681 [25:19<03:36, 2.55s/it] {'loss': 1.2046, 'grad_norm': 58.43737030029297, 'learning_rate': 2.396839494982103e-08, 'fcm_dpo/beta': 0.001006106031127274, 'fcm_dpo/q_t': 0.4358452558517456, 'fcm_dpo/delta': 0.10965707898139954, 'fcm_dpo/margin': 291.58941650390625, 'margin_dpo/margin_mean': 291.58941650390625, 'margin_dpo/margin_std': 623.7481689453125, 'logps/chosen': -740.27783203125, 'logps/rejected': -1052.166748046875, 'logps/ref_chosen': -59.791683197021484, 'logps/ref_rejected': -80.09111785888672, 'KL/chosen_KL_mean': -680.4862060546875, 'KL/rejected_KL_mean': -972.0756225585938, 'KL/mean': -826.2808837890625, 'KL/std': 520.2750244140625, 'logits/chosen': -0.995140552520752, 'logits/rejected': -0.9648805856704712, 'epoch': 0.88} + 88%|████████▊ | 596/681 [25:19<03:36, 2.55s/it] 88%|████████▊ | 597/681 [25:21<03:34, 2.56s/it] {'loss': 1.057, 'grad_norm': 27.942764282226562, 'learning_rate': 2.3423053240837514e-08, 'fcm_dpo/beta': 0.0009849161142483354, 'fcm_dpo/q_t': 0.3910897970199585, 'fcm_dpo/delta': -0.10163434594869614, 'fcm_dpo/margin': 501.10113525390625, 'margin_dpo/margin_mean': 501.1011047363281, 'margin_dpo/margin_std': 665.6370849609375, 'logps/chosen': -704.072265625, 'logps/rejected': -1248.6063232421875, 'logps/ref_chosen': -57.26078796386719, 'logps/ref_rejected': -100.6937255859375, 'KL/chosen_KL_mean': -646.8114624023438, 'KL/rejected_KL_mean': -1147.91259765625, 'KL/mean': -897.362060546875, 'KL/std': 604.0191650390625, 'logits/chosen': -0.9165897369384766, 'logits/rejected': -0.9646108746528625, 'epoch': 0.88} + 88%|████████▊ | 597/681 [25:21<03:34, 2.56s/it] 88%|████████▊ | 598/681 [25:24<03:27, 2.50s/it] {'loss': 1.1145, 'grad_norm': 44.37862777709961, 'learning_rate': 2.2883682045119062e-08, 'fcm_dpo/beta': 0.0009835727978497744, 'fcm_dpo/q_t': 0.4106915593147278, 'fcm_dpo/delta': 0.011667370796203613, 'fcm_dpo/margin': 394.01727294921875, 'margin_dpo/margin_mean': 394.01727294921875, 'margin_dpo/margin_std': 565.1383666992188, 'logps/chosen': -693.0473022460938, 'logps/rejected': -1123.989990234375, 'logps/ref_chosen': -52.51850509643555, 'logps/ref_rejected': -89.44385528564453, 'KL/chosen_KL_mean': -640.52880859375, 'KL/rejected_KL_mean': -1034.5460205078125, 'KL/mean': -837.5374755859375, 'KL/std': 517.078857421875, 'logits/chosen': -1.0113518238067627, 'logits/rejected': -1.023685336112976, 'epoch': 0.88} + 88%|████████▊ | 598/681 [25:24<03:27, 2.50s/it] 88%|████████▊ | 599/681 [25:26<03:30, 2.56s/it] {'loss': 1.1434, 'grad_norm': 33.33484649658203, 'learning_rate': 2.2350295575598367e-08, 'fcm_dpo/beta': 0.0009813719661906362, 'fcm_dpo/q_t': 0.4202921986579895, 'fcm_dpo/delta': -0.055194415152072906, 'fcm_dpo/margin': 334.44903564453125, 'margin_dpo/margin_mean': 334.448974609375, 'margin_dpo/margin_std': 476.6112060546875, 'logps/chosen': -693.3181762695312, 'logps/rejected': -1060.943115234375, 'logps/ref_chosen': -49.802677154541016, 'logps/ref_rejected': -82.978515625, 'KL/chosen_KL_mean': -643.5155029296875, 'KL/rejected_KL_mean': -977.9644775390625, 'KL/mean': -810.739990234375, 'KL/std': 501.908935546875, 'logits/chosen': -0.9392881989479065, 'logits/rejected': -0.9477603435516357, 'epoch': 0.88} + 88%|████████▊ | 599/681 [25:27<03:30, 2.56s/it] 88%|████████▊ | 600/681 [25:29<03:30, 2.60s/it] {'loss': 1.1858, 'grad_norm': 31.344772338867188, 'learning_rate': 2.1822907887504932e-08, 'fcm_dpo/beta': 0.0009930970845744014, 'fcm_dpo/q_t': 0.4303100109100342, 'fcm_dpo/delta': 0.09185181558132172, 'fcm_dpo/margin': 313.265380859375, 'margin_dpo/margin_mean': 313.265380859375, 'margin_dpo/margin_std': 613.1177978515625, 'logps/chosen': -775.2310791015625, 'logps/rejected': -1107.51806640625, 'logps/ref_chosen': -66.43487548828125, 'logps/ref_rejected': -85.45649719238281, 'KL/chosen_KL_mean': -708.796142578125, 'KL/rejected_KL_mean': -1022.0615844726562, 'KL/mean': -865.4288330078125, 'KL/std': 508.1895446777344, 'logits/chosen': -1.0468547344207764, 'logits/rejected': -1.0439157485961914, 'epoch': 0.88} + 88%|████████▊ | 600/681 [25:29<03:30, 2.60s/it] 88%|████████▊ | 601/681 [25:32<03:22, 2.53s/it] {'loss': 1.0885, 'grad_norm': 33.107521057128906, 'learning_rate': 2.1301532877994742e-08, 'fcm_dpo/beta': 0.0009970087558031082, 'fcm_dpo/q_t': 0.4060080647468567, 'fcm_dpo/delta': -0.004234878346323967, 'fcm_dpo/margin': 405.19476318359375, 'margin_dpo/margin_mean': 405.19476318359375, 'margin_dpo/margin_std': 531.271728515625, 'logps/chosen': -768.872314453125, 'logps/rejected': -1209.6243896484375, 'logps/ref_chosen': -59.13361358642578, 'logps/ref_rejected': -94.69093322753906, 'KL/chosen_KL_mean': -709.7387084960938, 'KL/rejected_KL_mean': -1114.933349609375, 'KL/mean': -912.3361206054688, 'KL/std': 543.34033203125, 'logits/chosen': -0.9763351678848267, 'logits/rejected': -0.9957572221755981, 'epoch': 0.88} + 88%|████████▊ | 601/681 [25:32<03:22, 2.53s/it] 88%|████████▊ | 602/681 [25:34<03:19, 2.53s/it] {'loss': 1.0169, 'grad_norm': 53.6025276184082, 'learning_rate': 2.0786184285784298e-08, 'fcm_dpo/beta': 0.0009898185962811112, 'fcm_dpo/q_t': 0.38656604290008545, 'fcm_dpo/delta': -0.10039174556732178, 'fcm_dpo/margin': 500.39605712890625, 'margin_dpo/margin_mean': 500.39605712890625, 'margin_dpo/margin_std': 533.11669921875, 'logps/chosen': -550.4730224609375, 'logps/rejected': -1089.944091796875, 'logps/ref_chosen': -48.59352111816406, 'logps/ref_rejected': -87.6685562133789, 'KL/chosen_KL_mean': -501.87945556640625, 'KL/rejected_KL_mean': -1002.2755126953125, 'KL/mean': -752.0775146484375, 'KL/std': 506.71160888671875, 'logits/chosen': -1.0038371086120605, 'logits/rejected': -1.0388686656951904, 'epoch': 0.88} + 88%|████████▊ | 602/681 [25:34<03:19, 2.53s/it] 89%|████████▊ | 603/681 [25:36<03:14, 2.49s/it] {'loss': 1.0851, 'grad_norm': 34.395442962646484, 'learning_rate': 2.0276875690788204e-08, 'fcm_dpo/beta': 0.0009718415094539523, 'fcm_dpo/q_t': 0.40223926305770874, 'fcm_dpo/delta': -0.04316433519124985, 'fcm_dpo/margin': 453.99981689453125, 'margin_dpo/margin_mean': 453.99981689453125, 'margin_dpo/margin_std': 659.65673828125, 'logps/chosen': -700.80859375, 'logps/rejected': -1184.719482421875, 'logps/ref_chosen': -70.41461944580078, 'logps/ref_rejected': -100.32559967041016, 'KL/chosen_KL_mean': -630.3939819335938, 'KL/rejected_KL_mean': -1084.393798828125, 'KL/mean': -857.3939208984375, 'KL/std': 564.5196533203125, 'logits/chosen': -1.011382818222046, 'logits/rejected': -1.0053396224975586, 'epoch': 0.89} + 89%|████████▊ | 603/681 [25:37<03:14, 2.49s/it] 89%|████████▊ | 604/681 [25:39<03:21, 2.61s/it] {'loss': 1.0625, 'grad_norm': 32.600433349609375, 'learning_rate': 1.977362051376158e-08, 'fcm_dpo/beta': 0.0009594437433406711, 'fcm_dpo/q_t': 0.3948679566383362, 'fcm_dpo/delta': -0.07685627043247223, 'fcm_dpo/margin': 493.223876953125, 'margin_dpo/margin_mean': 493.223876953125, 'margin_dpo/margin_std': 673.2548828125, 'logps/chosen': -652.31005859375, 'logps/rejected': -1190.930419921875, 'logps/ref_chosen': -46.45808029174805, 'logps/ref_rejected': -91.8544921875, 'KL/chosen_KL_mean': -605.8519897460938, 'KL/rejected_KL_mean': -1099.075927734375, 'KL/mean': -852.4639892578125, 'KL/std': 546.70166015625, 'logits/chosen': -1.0046117305755615, 'logits/rejected': -1.0419707298278809, 'epoch': 0.89} + 89%|████████▊ | 604/681 [25:39<03:21, 2.61s/it] 89%|████████▉ | 605/681 [25:42<03:19, 2.62s/it] {'loss': 1.1441, 'grad_norm': 36.99518966674805, 'learning_rate': 1.9276432015946446e-08, 'fcm_dpo/beta': 0.0009607453248463571, 'fcm_dpo/q_t': 0.42265427112579346, 'fcm_dpo/delta': 0.051560450345277786, 'fcm_dpo/margin': 364.5632019042969, 'margin_dpo/margin_mean': 364.563232421875, 'margin_dpo/margin_std': 616.972412109375, 'logps/chosen': -703.0062866210938, 'logps/rejected': -1103.625244140625, 'logps/ref_chosen': -66.24933624267578, 'logps/ref_rejected': -102.30496978759766, 'KL/chosen_KL_mean': -636.7569580078125, 'KL/rejected_KL_mean': -1001.3201904296875, 'KL/mean': -819.03857421875, 'KL/std': 516.325927734375, 'logits/chosen': -0.9250746965408325, 'logits/rejected': -0.9353400468826294, 'epoch': 0.89} + 89%|████████▉ | 605/681 [25:42<03:19, 2.62s/it] 89%|████████▉ | 606/681 [25:44<03:11, 2.56s/it] {'loss': 1.1022, 'grad_norm': 30.389057159423828, 'learning_rate': 1.8785323298722093e-08, 'fcm_dpo/beta': 0.0009698671055957675, 'fcm_dpo/q_t': 0.41086679697036743, 'fcm_dpo/delta': 0.005952846258878708, 'fcm_dpo/margin': 406.254150390625, 'margin_dpo/margin_mean': 406.254150390625, 'margin_dpo/margin_std': 574.14404296875, 'logps/chosen': -710.735107421875, 'logps/rejected': -1160.5416259765625, 'logps/ref_chosen': -54.819122314453125, 'logps/ref_rejected': -98.37146759033203, 'KL/chosen_KL_mean': -655.916015625, 'KL/rejected_KL_mean': -1062.170166015625, 'KL/mean': -859.0430908203125, 'KL/std': 534.576171875, 'logits/chosen': -0.9856992363929749, 'logits/rejected': -1.002555012702942, 'epoch': 0.89} + 89%|████████▉ | 606/681 [25:44<03:11, 2.56s/it] 89%|████████▉ | 607/681 [25:47<03:11, 2.59s/it] {'loss': 1.1479, 'grad_norm': 26.738142013549805, 'learning_rate': 1.8300307303259904e-08, 'fcm_dpo/beta': 0.0009783967398107052, 'fcm_dpo/q_t': 0.42285820841789246, 'fcm_dpo/delta': 0.06167557090520859, 'fcm_dpo/margin': 347.606689453125, 'margin_dpo/margin_mean': 347.606689453125, 'margin_dpo/margin_std': 570.4724731445312, 'logps/chosen': -722.8115234375, 'logps/rejected': -1092.111328125, 'logps/ref_chosen': -58.08403778076172, 'logps/ref_rejected': -79.777099609375, 'KL/chosen_KL_mean': -664.7274780273438, 'KL/rejected_KL_mean': -1012.334228515625, 'KL/mean': -838.5308837890625, 'KL/std': 534.4456176757812, 'logits/chosen': -0.9779009222984314, 'logits/rejected': -0.9702655076980591, 'epoch': 0.89} + 89%|████████▉ | 607/681 [25:47<03:11, 2.59s/it] 89%|████████▉ | 608/681 [25:49<03:04, 2.53s/it] {'loss': 1.0892, 'grad_norm': 33.70753479003906, 'learning_rate': 1.7821396810182437e-08, 'fcm_dpo/beta': 0.0009828273905441165, 'fcm_dpo/q_t': 0.40994399785995483, 'fcm_dpo/delta': 0.012696724385023117, 'fcm_dpo/margin': 394.4361267089844, 'margin_dpo/margin_mean': 394.4361572265625, 'margin_dpo/margin_std': 485.2535705566406, 'logps/chosen': -645.7987060546875, 'logps/rejected': -1077.557373046875, 'logps/ref_chosen': -57.450836181640625, 'logps/ref_rejected': -94.77339172363281, 'KL/chosen_KL_mean': -588.347900390625, 'KL/rejected_KL_mean': -982.7840576171875, 'KL/mean': -785.5659790039062, 'KL/std': 475.997314453125, 'logits/chosen': -1.0321998596191406, 'logits/rejected': -1.0463464260101318, 'epoch': 0.89} + 89%|████████▉ | 608/681 [25:49<03:04, 2.53s/it] 89%|████████▉ | 609/681 [25:52<02:57, 2.46s/it] {'loss': 1.065, 'grad_norm': 30.651371002197266, 'learning_rate': 1.7348604439226617e-08, 'fcm_dpo/beta': 0.0009676434565335512, 'fcm_dpo/q_t': 0.3957828879356384, 'fcm_dpo/delta': -0.09796243906021118, 'fcm_dpo/margin': 509.67462158203125, 'margin_dpo/margin_mean': 509.67462158203125, 'margin_dpo/margin_std': 778.646728515625, 'logps/chosen': -674.9708862304688, 'logps/rejected': -1214.6561279296875, 'logps/ref_chosen': -58.805355072021484, 'logps/ref_rejected': -88.81600952148438, 'KL/chosen_KL_mean': -616.16552734375, 'KL/rejected_KL_mean': -1125.840087890625, 'KL/mean': -871.0028076171875, 'KL/std': 653.2919311523438, 'logits/chosen': -1.0793794393539429, 'logits/rejected': -1.107104778289795, 'epoch': 0.89} + 89%|████████▉ | 609/681 [25:52<02:57, 2.46s/it] 90%|████████▉ | 610/681 [25:54<02:52, 2.43s/it] {'loss': 1.18, 'grad_norm': 39.5369987487793, 'learning_rate': 1.6881942648911074e-08, 'fcm_dpo/beta': 0.0009778111707419157, 'fcm_dpo/q_t': 0.4288497567176819, 'fcm_dpo/delta': 0.09600942581892014, 'fcm_dpo/margin': 313.6623229980469, 'margin_dpo/margin_mean': 313.662353515625, 'margin_dpo/margin_std': 588.4511108398438, 'logps/chosen': -687.3477783203125, 'logps/rejected': -1018.720458984375, 'logps/ref_chosen': -65.69503784179688, 'logps/ref_rejected': -83.40538787841797, 'KL/chosen_KL_mean': -621.6527099609375, 'KL/rejected_KL_mean': -935.3150634765625, 'KL/mean': -778.48388671875, 'KL/std': 504.77032470703125, 'logits/chosen': -0.9661835432052612, 'logits/rejected': -0.9449666738510132, 'epoch': 0.9} + 90%|████████▉ | 610/681 [25:54<02:52, 2.43s/it] 90%|████████▉ | 611/681 [25:56<02:46, 2.38s/it] {'loss': 1.0452, 'grad_norm': 32.128814697265625, 'learning_rate': 1.6421423736208e-08, 'fcm_dpo/beta': 0.0009621235076338053, 'fcm_dpo/q_t': 0.3878698945045471, 'fcm_dpo/delta': -0.12539134919643402, 'fcm_dpo/margin': 539.230224609375, 'margin_dpo/margin_mean': 539.230224609375, 'margin_dpo/margin_std': 746.280029296875, 'logps/chosen': -700.2069091796875, 'logps/rejected': -1273.168701171875, 'logps/ref_chosen': -52.59946823120117, 'logps/ref_rejected': -86.33099365234375, 'KL/chosen_KL_mean': -647.6074829101562, 'KL/rejected_KL_mean': -1186.837646484375, 'KL/mean': -917.2225952148438, 'KL/std': 658.553466796875, 'logits/chosen': -1.0239993333816528, 'logits/rejected': -1.0698425769805908, 'epoch': 0.9} + 90%|████████▉ | 611/681 [25:56<02:46, 2.38s/it] 90%|████████▉ | 612/681 [25:59<02:42, 2.36s/it] {'loss': 1.0975, 'grad_norm': 29.971281051635742, 'learning_rate': 1.5967059836219042e-08, 'fcm_dpo/beta': 0.0009597926400601864, 'fcm_dpo/q_t': 0.40916839241981506, 'fcm_dpo/delta': 0.004164084792137146, 'fcm_dpo/margin': 412.4189453125, 'margin_dpo/margin_mean': 412.4189758300781, 'margin_dpo/margin_std': 563.840087890625, 'logps/chosen': -747.9703979492188, 'logps/rejected': -1189.3780517578125, 'logps/ref_chosen': -59.32372283935547, 'logps/ref_rejected': -88.31239318847656, 'KL/chosen_KL_mean': -688.6466674804688, 'KL/rejected_KL_mean': -1101.065673828125, 'KL/mean': -894.856201171875, 'KL/std': 530.3755493164062, 'logits/chosen': -1.008693814277649, 'logits/rejected': -1.013035535812378, 'epoch': 0.9} + 90%|████████▉ | 612/681 [25:59<02:42, 2.36s/it] 90%|█████████ | 613/681 [26:01<02:43, 2.41s/it] {'loss': 1.0444, 'grad_norm': 32.904354095458984, 'learning_rate': 1.551886292185553e-08, 'fcm_dpo/beta': 0.0009444322204217315, 'fcm_dpo/q_t': 0.3927931785583496, 'fcm_dpo/delta': -0.07297656685113907, 'fcm_dpo/margin': 496.83154296875, 'margin_dpo/margin_mean': 496.83154296875, 'margin_dpo/margin_std': 600.5185546875, 'logps/chosen': -667.3597412109375, 'logps/rejected': -1209.56884765625, 'logps/ref_chosen': -59.72996520996094, 'logps/ref_rejected': -105.10752868652344, 'KL/chosen_KL_mean': -607.6298217773438, 'KL/rejected_KL_mean': -1104.4613037109375, 'KL/mean': -856.0455932617188, 'KL/std': 585.9758911132812, 'logits/chosen': -0.9863793849945068, 'logits/rejected': -1.0393249988555908, 'epoch': 0.9} + 90%|█████████ | 613/681 [26:01<02:43, 2.41s/it] 90%|█████████ | 614/681 [26:04<02:45, 2.47s/it] {'loss': 1.0746, 'grad_norm': 37.47915267944336, 'learning_rate': 1.507684480352292e-08, 'fcm_dpo/beta': 0.000936733849812299, 'fcm_dpo/q_t': 0.3984524607658386, 'fcm_dpo/delta': -0.05709536373615265, 'fcm_dpo/margin': 485.2372131347656, 'margin_dpo/margin_mean': 485.2372131347656, 'margin_dpo/margin_std': 681.718505859375, 'logps/chosen': -730.3217163085938, 'logps/rejected': -1267.29931640625, 'logps/ref_chosen': -52.93898010253906, 'logps/ref_rejected': -104.67938232421875, 'KL/chosen_KL_mean': -677.3827514648438, 'KL/rejected_KL_mean': -1162.619873046875, 'KL/mean': -920.0013427734375, 'KL/std': 576.0763549804688, 'logits/chosen': -0.9485939741134644, 'logits/rejected': -1.0217413902282715, 'epoch': 0.9} + 90%|█████████ | 614/681 [26:04<02:45, 2.47s/it] 90%|█████████ | 615/681 [26:07<02:47, 2.54s/it] {'loss': 1.1261, 'grad_norm': 23.292583465576172, 'learning_rate': 1.4641017128809801e-08, 'fcm_dpo/beta': 0.0009383243741467595, 'fcm_dpo/q_t': 0.4133886396884918, 'fcm_dpo/delta': 0.01932334341108799, 'fcm_dpo/margin': 406.2218017578125, 'margin_dpo/margin_mean': 406.22174072265625, 'margin_dpo/margin_std': 651.3701171875, 'logps/chosen': -714.4732055664062, 'logps/rejected': -1150.05517578125, 'logps/ref_chosen': -65.81727600097656, 'logps/ref_rejected': -95.17749786376953, 'KL/chosen_KL_mean': -648.6558837890625, 'KL/rejected_KL_mean': -1054.877685546875, 'KL/mean': -851.766845703125, 'KL/std': 605.865234375, 'logits/chosen': -0.9777463674545288, 'logits/rejected': -0.999763548374176, 'epoch': 0.9} + 90%|█████████ | 615/681 [26:07<02:47, 2.54s/it] 90%|█████████ | 616/681 [26:09<02:49, 2.61s/it] {'loss': 1.1591, 'grad_norm': 29.635278701782227, 'learning_rate': 1.4211391382180637e-08, 'fcm_dpo/beta': 0.0009485027985647321, 'fcm_dpo/q_t': 0.4269304871559143, 'fcm_dpo/delta': 0.0827227309346199, 'fcm_dpo/margin': 337.23992919921875, 'margin_dpo/margin_mean': 337.2399597167969, 'margin_dpo/margin_std': 560.1079711914062, 'logps/chosen': -818.582763671875, 'logps/rejected': -1165.390380859375, 'logps/ref_chosen': -65.13285827636719, 'logps/ref_rejected': -74.70050048828125, 'KL/chosen_KL_mean': -753.4498901367188, 'KL/rejected_KL_mean': -1090.6898193359375, 'KL/mean': -922.0699462890625, 'KL/std': 498.5460205078125, 'logits/chosen': -1.066072702407837, 'logits/rejected': -1.0544729232788086, 'epoch': 0.9} + 90%|█████████ | 616/681 [26:09<02:49, 2.61s/it] 91%|█████████ | 617/681 [26:12<02:47, 2.61s/it] {'loss': 1.2347, 'grad_norm': 49.84982681274414, 'learning_rate': 1.378797888467345e-08, 'fcm_dpo/beta': 0.0009591138223186135, 'fcm_dpo/q_t': 0.4473581910133362, 'fcm_dpo/delta': 0.0745362788438797, 'fcm_dpo/margin': 239.3343048095703, 'margin_dpo/margin_mean': 239.3343048095703, 'margin_dpo/margin_std': 541.6489868164062, 'logps/chosen': -773.6142578125, 'logps/rejected': -1014.1771850585938, 'logps/ref_chosen': -63.005550384521484, 'logps/ref_rejected': -64.234130859375, 'KL/chosen_KL_mean': -710.6087646484375, 'KL/rejected_KL_mean': -949.943115234375, 'KL/mean': -830.27587890625, 'KL/std': 471.6356506347656, 'logits/chosen': -0.9499881267547607, 'logits/rejected': -0.9198344945907593, 'epoch': 0.91} + 91%|█████████ | 617/681 [26:12<02:47, 2.61s/it] 91%|█████████ | 618/681 [26:15<02:44, 2.61s/it] {'loss': 1.0908, 'grad_norm': 40.80027389526367, 'learning_rate': 1.3370790793601371e-08, 'fcm_dpo/beta': 0.0009580876212567091, 'fcm_dpo/q_t': 0.39342206716537476, 'fcm_dpo/delta': -0.1011531874537468, 'fcm_dpo/margin': 517.6361694335938, 'margin_dpo/margin_mean': 517.6361694335938, 'margin_dpo/margin_std': 831.257080078125, 'logps/chosen': -823.0704345703125, 'logps/rejected': -1365.7586669921875, 'logps/ref_chosen': -67.10134887695312, 'logps/ref_rejected': -92.15340423583984, 'KL/chosen_KL_mean': -755.9691162109375, 'KL/rejected_KL_mean': -1273.605224609375, 'KL/mean': -1014.7872314453125, 'KL/std': 649.2423095703125, 'logits/chosen': -1.010411024093628, 'logits/rejected': -1.0457146167755127, 'epoch': 0.91} + 91%|█████████ | 618/681 [26:15<02:44, 2.61s/it] 91%|█████████ | 619/681 [26:17<02:41, 2.60s/it] {'loss': 1.1713, 'grad_norm': 47.4921875, 'learning_rate': 1.2959838102258535e-08, 'fcm_dpo/beta': 0.0009506435599178076, 'fcm_dpo/q_t': 0.4217052459716797, 'fcm_dpo/delta': 0.024809934198856354, 'fcm_dpo/margin': 395.64404296875, 'margin_dpo/margin_mean': 395.64404296875, 'margin_dpo/margin_std': 793.0050048828125, 'logps/chosen': -800.1752319335938, 'logps/rejected': -1233.0263671875, 'logps/ref_chosen': -55.978233337402344, 'logps/ref_rejected': -93.1854019165039, 'KL/chosen_KL_mean': -744.197021484375, 'KL/rejected_KL_mean': -1139.841064453125, 'KL/mean': -942.0189819335938, 'KL/std': 602.969970703125, 'logits/chosen': -0.9666841626167297, 'logits/rejected': -0.9786205291748047, 'epoch': 0.91} + 91%|█████████ | 619/681 [26:17<02:41, 2.60s/it] 91%|█████████ | 620/681 [26:20<02:36, 2.57s/it] {'loss': 1.1439, 'grad_norm': 38.08302307128906, 'learning_rate': 1.2555131639630567e-08, 'fcm_dpo/beta': 0.0009550647810101509, 'fcm_dpo/q_t': 0.42078667879104614, 'fcm_dpo/delta': 0.05083069950342178, 'fcm_dpo/margin': 367.34222412109375, 'margin_dpo/margin_mean': 367.34222412109375, 'margin_dpo/margin_std': 601.9281005859375, 'logps/chosen': -737.3580322265625, 'logps/rejected': -1123.3134765625, 'logps/ref_chosen': -59.79750061035156, 'logps/ref_rejected': -78.41075134277344, 'KL/chosen_KL_mean': -677.560546875, 'KL/rejected_KL_mean': -1044.9027099609375, 'KL/mean': -861.231689453125, 'KL/std': 534.52587890625, 'logits/chosen': -1.0355204343795776, 'logits/rejected': -1.041193962097168, 'epoch': 0.91} + 91%|█████████ | 620/681 [26:20<02:36, 2.57s/it] 91%|█████████ | 621/681 [26:22<02:34, 2.57s/it] {'loss': 1.0292, 'grad_norm': 41.10859298706055, 'learning_rate': 1.2156682070109086e-08, 'fcm_dpo/beta': 0.0009489471558481455, 'fcm_dpo/q_t': 0.3808121085166931, 'fcm_dpo/delta': -0.13720259070396423, 'fcm_dpo/margin': 558.3480834960938, 'margin_dpo/margin_mean': 558.3480834960938, 'margin_dpo/margin_std': 704.4603271484375, 'logps/chosen': -694.7816772460938, 'logps/rejected': -1287.5654296875, 'logps/ref_chosen': -53.93375778198242, 'logps/ref_rejected': -88.36951446533203, 'KL/chosen_KL_mean': -640.847900390625, 'KL/rejected_KL_mean': -1199.1959228515625, 'KL/mean': -920.02197265625, 'KL/std': 635.8433227539062, 'logits/chosen': -1.0572166442871094, 'logits/rejected': -1.106241226196289, 'epoch': 0.91} + 91%|█████████ | 621/681 [26:22<02:34, 2.57s/it] 91%|█████████▏| 622/681 [26:25<02:31, 2.56s/it] {'loss': 1.1208, 'grad_norm': 29.741287231445312, 'learning_rate': 1.1764499893210878e-08, 'fcm_dpo/beta': 0.0009346996666863561, 'fcm_dpo/q_t': 0.41576099395751953, 'fcm_dpo/delta': 0.026821225881576538, 'fcm_dpo/margin': 399.9915771484375, 'margin_dpo/margin_mean': 399.9915771484375, 'margin_dpo/margin_std': 609.2018432617188, 'logps/chosen': -698.693115234375, 'logps/rejected': -1123.9176025390625, 'logps/ref_chosen': -60.28582000732422, 'logps/ref_rejected': -85.51873779296875, 'KL/chosen_KL_mean': -638.4073486328125, 'KL/rejected_KL_mean': -1038.39892578125, 'KL/mean': -838.403076171875, 'KL/std': 490.0394592285156, 'logits/chosen': -0.913569986820221, 'logits/rejected': -0.9021658897399902, 'epoch': 0.91} + 91%|█████████▏| 622/681 [26:25<02:31, 2.56s/it] 91%|█████████▏| 623/681 [26:27<02:21, 2.44s/it] {'loss': 1.188, 'grad_norm': 35.98710250854492, 'learning_rate': 1.1378595443300998e-08, 'fcm_dpo/beta': 0.0009554900461807847, 'fcm_dpo/q_t': 0.43424922227859497, 'fcm_dpo/delta': 0.10620071738958359, 'fcm_dpo/margin': 310.77447509765625, 'margin_dpo/margin_mean': 310.77447509765625, 'margin_dpo/margin_std': 607.74267578125, 'logps/chosen': -769.0152587890625, 'logps/rejected': -1100.7158203125, 'logps/ref_chosen': -64.1569595336914, 'logps/ref_rejected': -85.08304595947266, 'KL/chosen_KL_mean': -704.8582763671875, 'KL/rejected_KL_mean': -1015.6328125, 'KL/mean': -860.2454833984375, 'KL/std': 504.6408386230469, 'logits/chosen': -1.0679330825805664, 'logits/rejected': -1.069124460220337, 'epoch': 0.91} + 91%|█████████▏| 623/681 [26:27<02:21, 2.44s/it] 92%|█████████▏| 624/681 [26:29<02:20, 2.46s/it] {'loss': 1.0557, 'grad_norm': 39.16311264038086, 'learning_rate': 1.0998978889320582e-08, 'fcm_dpo/beta': 0.0009546733344905078, 'fcm_dpo/q_t': 0.39522331953048706, 'fcm_dpo/delta': -0.050946250557899475, 'fcm_dpo/margin': 469.9930419921875, 'margin_dpo/margin_mean': 469.9930419921875, 'margin_dpo/margin_std': 563.95068359375, 'logps/chosen': -768.2030029296875, 'logps/rejected': -1263.409423828125, 'logps/ref_chosen': -71.91862487792969, 'logps/ref_rejected': -97.13203430175781, 'KL/chosen_KL_mean': -696.2843627929688, 'KL/rejected_KL_mean': -1166.27734375, 'KL/mean': -931.2808837890625, 'KL/std': 522.650146484375, 'logits/chosen': -1.0855488777160645, 'logits/rejected': -1.0924354791641235, 'epoch': 0.92} + 92%|█████████▏| 624/681 [26:29<02:20, 2.46s/it] 92%|█████████▏| 625/681 [26:32<02:18, 2.48s/it] {'loss': 1.0202, 'grad_norm': 60.529544830322266, 'learning_rate': 1.0625660234518913e-08, 'fcm_dpo/beta': 0.0009412041981704533, 'fcm_dpo/q_t': 0.389559268951416, 'fcm_dpo/delta': -0.08514019101858139, 'fcm_dpo/margin': 511.17987060546875, 'margin_dpo/margin_mean': 511.17987060546875, 'margin_dpo/margin_std': 540.048095703125, 'logps/chosen': -707.16455078125, 'logps/rejected': -1246.0927734375, 'logps/ref_chosen': -58.342071533203125, 'logps/ref_rejected': -86.09038543701172, 'KL/chosen_KL_mean': -648.822509765625, 'KL/rejected_KL_mean': -1160.00244140625, 'KL/mean': -904.4124755859375, 'KL/std': 575.8782958984375, 'logits/chosen': -0.9935369491577148, 'logits/rejected': -1.0175690650939941, 'epoch': 0.92} + 92%|█████████▏| 625/681 [26:32<02:18, 2.48s/it] 92%|█████████▏| 626/681 [26:35<02:23, 2.60s/it] {'loss': 1.2216, 'grad_norm': 34.610740661621094, 'learning_rate': 1.0258649316189721e-08, 'fcm_dpo/beta': 0.0009531835094094276, 'fcm_dpo/q_t': 0.4358038902282715, 'fcm_dpo/delta': 0.1283356249332428, 'fcm_dpo/margin': 288.66387939453125, 'margin_dpo/margin_mean': 288.66387939453125, 'margin_dpo/margin_std': 637.5816650390625, 'logps/chosen': -906.321044921875, 'logps/rejected': -1219.06103515625, 'logps/ref_chosen': -75.11260986328125, 'logps/ref_rejected': -99.188720703125, 'KL/chosen_KL_mean': -831.2083740234375, 'KL/rejected_KL_mean': -1119.872314453125, 'KL/mean': -975.5403442382812, 'KL/std': 650.2010498046875, 'logits/chosen': -0.9936656951904297, 'logits/rejected': -0.986907422542572, 'epoch': 0.92} + 92%|█████████▏| 626/681 [26:35<02:23, 2.60s/it] 92%|█████████▏| 627/681 [26:38<02:23, 2.66s/it] {'loss': 1.0308, 'grad_norm': 25.602148056030273, 'learning_rate': 9.897955805412e-09, 'fcm_dpo/beta': 0.0009427897166460752, 'fcm_dpo/q_t': 0.3851046562194824, 'fcm_dpo/delta': -0.1610720157623291, 'fcm_dpo/margin': 585.8370361328125, 'margin_dpo/margin_mean': 585.8370361328125, 'margin_dpo/margin_std': 801.694580078125, 'logps/chosen': -609.9736328125, 'logps/rejected': -1254.822021484375, 'logps/ref_chosen': -47.74314880371094, 'logps/ref_rejected': -106.75448608398438, 'KL/chosen_KL_mean': -562.23046875, 'KL/rejected_KL_mean': -1148.0675048828125, 'KL/mean': -855.1490478515625, 'KL/std': 695.8411254882812, 'logits/chosen': -0.9343521595001221, 'logits/rejected': -1.0077568292617798, 'epoch': 0.92} + 92%|█████████▏| 627/681 [26:38<02:23, 2.66s/it] 92%|█████████▏| 628/681 [26:40<02:18, 2.62s/it] {'loss': 1.1016, 'grad_norm': 31.12415313720703, 'learning_rate': 9.543589206795238e-09, 'fcm_dpo/beta': 0.0009263536194339395, 'fcm_dpo/q_t': 0.40781164169311523, 'fcm_dpo/delta': -0.008988456800580025, 'fcm_dpo/margin': 441.0692138671875, 'margin_dpo/margin_mean': 441.0692138671875, 'margin_dpo/margin_std': 644.30810546875, 'logps/chosen': -778.8585815429688, 'logps/rejected': -1261.299560546875, 'logps/ref_chosen': -60.182945251464844, 'logps/ref_rejected': -101.55467224121094, 'KL/chosen_KL_mean': -718.6756591796875, 'KL/rejected_KL_mean': -1159.744873046875, 'KL/mean': -939.210205078125, 'KL/std': 562.9912109375, 'logits/chosen': -1.0448391437530518, 'logits/rejected': -1.059401273727417, 'epoch': 0.92} + 92%|█████████▏| 628/681 [26:40<02:18, 2.62s/it] 92%|█████████▏| 629/681 [26:43<02:15, 2.61s/it] {'loss': 1.1066, 'grad_norm': 37.944095611572266, 'learning_rate': 9.19555885822887e-09, 'fcm_dpo/beta': 0.0009307701839134097, 'fcm_dpo/q_t': 0.4127495288848877, 'fcm_dpo/delta': 0.027018554508686066, 'fcm_dpo/margin': 401.8076171875, 'margin_dpo/margin_mean': 401.8076171875, 'margin_dpo/margin_std': 538.9395751953125, 'logps/chosen': -782.427001953125, 'logps/rejected': -1211.6748046875, 'logps/ref_chosen': -64.21354675292969, 'logps/ref_rejected': -91.65367126464844, 'KL/chosen_KL_mean': -718.2134399414062, 'KL/rejected_KL_mean': -1120.0211181640625, 'KL/mean': -919.1171875, 'KL/std': 549.9234619140625, 'logits/chosen': -1.0313966274261475, 'logits/rejected': -1.0439316034317017, 'epoch': 0.92} + 92%|█████████▏| 629/681 [26:43<02:15, 2.61s/it] 93%|█████████▎| 630/681 [26:45<02:13, 2.62s/it] {'loss': 1.2752, 'grad_norm': 48.24060821533203, 'learning_rate': 8.85387393063622e-09, 'fcm_dpo/beta': 0.0009370100451633334, 'fcm_dpo/q_t': 0.4552198052406311, 'fcm_dpo/delta': 0.05341341719031334, 'fcm_dpo/margin': 228.9735565185547, 'margin_dpo/margin_mean': 228.9735565185547, 'margin_dpo/margin_std': 674.7388916015625, 'logps/chosen': -715.79296875, 'logps/rejected': -969.0738525390625, 'logps/ref_chosen': -59.29100036621094, 'logps/ref_rejected': -83.59829711914062, 'KL/chosen_KL_mean': -656.501953125, 'KL/rejected_KL_mean': -885.4755249023438, 'KL/mean': -770.98876953125, 'KL/std': 560.9046630859375, 'logits/chosen': -1.0444166660308838, 'logits/rejected': -1.0228123664855957, 'epoch': 0.93} + 93%|█████████▎| 630/681 [26:45<02:13, 2.62s/it] 93%|█████████▎| 631/681 [26:48<02:08, 2.58s/it] {'loss': 1.1601, 'grad_norm': 30.060415267944336, 'learning_rate': 8.518543427732949e-09, 'fcm_dpo/beta': 0.00095040921587497, 'fcm_dpo/q_t': 0.41983652114868164, 'fcm_dpo/delta': 0.051799606531858444, 'fcm_dpo/margin': 368.1885986328125, 'margin_dpo/margin_mean': 368.1886291503906, 'margin_dpo/margin_std': 662.2765502929688, 'logps/chosen': -858.2163696289062, 'logps/rejected': -1247.9029541015625, 'logps/ref_chosen': -59.45360565185547, 'logps/ref_rejected': -80.95156860351562, 'KL/chosen_KL_mean': -798.7627563476562, 'KL/rejected_KL_mean': -1166.951416015625, 'KL/mean': -982.8570556640625, 'KL/std': 575.3311767578125, 'logits/chosen': -1.1124560832977295, 'logits/rejected': -1.1201171875, 'epoch': 0.93} + 93%|█████████▎| 631/681 [26:48<02:08, 2.58s/it] 93%|█████████▎| 632/681 [26:50<02:02, 2.50s/it] {'loss': 1.1353, 'grad_norm': 43.195838928222656, 'learning_rate': 8.189576185789637e-09, 'fcm_dpo/beta': 0.0009511418174952269, 'fcm_dpo/q_t': 0.4149981141090393, 'fcm_dpo/delta': 0.026925835758447647, 'fcm_dpo/margin': 392.960205078125, 'margin_dpo/margin_mean': 392.960205078125, 'margin_dpo/margin_std': 638.5850830078125, 'logps/chosen': -754.5340576171875, 'logps/rejected': -1172.302978515625, 'logps/ref_chosen': -61.35155487060547, 'logps/ref_rejected': -86.16017150878906, 'KL/chosen_KL_mean': -693.1824951171875, 'KL/rejected_KL_mean': -1086.142822265625, 'KL/mean': -889.6626586914062, 'KL/std': 519.53759765625, 'logits/chosen': -1.0623399019241333, 'logits/rejected': -1.0656976699829102, 'epoch': 0.93} + 93%|█████████▎| 632/681 [26:50<02:02, 2.50s/it] 93%|█████████▎| 633/681 [26:53<01:58, 2.48s/it] {'loss': 1.2081, 'grad_norm': 51.9892692565918, 'learning_rate': 7.866980873399015e-09, 'fcm_dpo/beta': 0.0009779944084584713, 'fcm_dpo/q_t': 0.43694406747817993, 'fcm_dpo/delta': 0.12812459468841553, 'fcm_dpo/margin': 281.12689208984375, 'margin_dpo/margin_mean': 281.1269226074219, 'margin_dpo/margin_std': 584.89990234375, 'logps/chosen': -814.55224609375, 'logps/rejected': -1129.98486328125, 'logps/ref_chosen': -57.27816390991211, 'logps/ref_rejected': -91.58395385742188, 'KL/chosen_KL_mean': -757.2740478515625, 'KL/rejected_KL_mean': -1038.4010009765625, 'KL/mean': -897.8375244140625, 'KL/std': 511.4078369140625, 'logits/chosen': -1.1087684631347656, 'logits/rejected': -1.1197929382324219, 'epoch': 0.93} + 93%|█████████▎| 633/681 [26:53<01:58, 2.48s/it] 93%|█████████▎| 634/681 [26:55<01:58, 2.52s/it] {'loss': 1.2403, 'grad_norm': 50.689117431640625, 'learning_rate': 7.550765991247654e-09, 'fcm_dpo/beta': 0.0009954730048775673, 'fcm_dpo/q_t': 0.44643303751945496, 'fcm_dpo/delta': 0.06913463771343231, 'fcm_dpo/margin': 243.26641845703125, 'margin_dpo/margin_mean': 243.2664337158203, 'margin_dpo/margin_std': 593.8778076171875, 'logps/chosen': -937.604248046875, 'logps/rejected': -1221.37744140625, 'logps/ref_chosen': -66.61896514892578, 'logps/ref_rejected': -107.12564849853516, 'KL/chosen_KL_mean': -870.9852905273438, 'KL/rejected_KL_mean': -1114.251708984375, 'KL/mean': -992.6185302734375, 'KL/std': 614.6510009765625, 'logits/chosen': -0.9997051358222961, 'logits/rejected': -0.9940841197967529, 'epoch': 0.93} + 93%|█████████▎| 634/681 [26:55<01:58, 2.52s/it] 93%|█████████▎| 635/681 [26:58<01:55, 2.51s/it] {'loss': 1.1529, 'grad_norm': 40.568695068359375, 'learning_rate': 7.240939871891699e-09, 'fcm_dpo/beta': 0.0010012383572757244, 'fcm_dpo/q_t': 0.42191681265830994, 'fcm_dpo/delta': 0.04401912912726402, 'fcm_dpo/margin': 357.1339416503906, 'margin_dpo/margin_mean': 357.1339416503906, 'margin_dpo/margin_std': 645.6236572265625, 'logps/chosen': -805.5408935546875, 'logps/rejected': -1171.2197265625, 'logps/ref_chosen': -73.95551300048828, 'logps/ref_rejected': -82.50045776367188, 'KL/chosen_KL_mean': -731.5853271484375, 'KL/rejected_KL_mean': -1088.71923828125, 'KL/mean': -910.15234375, 'KL/std': 636.81201171875, 'logits/chosen': -1.059622049331665, 'logits/rejected': -1.0404071807861328, 'epoch': 0.93} + 93%|█████████▎| 635/681 [26:58<01:55, 2.51s/it] 93%|█████████▎| 636/681 [27:00<01:55, 2.56s/it] {'loss': 1.0945, 'grad_norm': 29.367713928222656, 'learning_rate': 6.937510679537628e-09, 'fcm_dpo/beta': 0.0010070966091006994, 'fcm_dpo/q_t': 0.40687400102615356, 'fcm_dpo/delta': -0.029895581305027008, 'fcm_dpo/margin': 424.7888488769531, 'margin_dpo/margin_mean': 424.78887939453125, 'margin_dpo/margin_std': 647.4033203125, 'logps/chosen': -753.655029296875, 'logps/rejected': -1200.793701171875, 'logps/ref_chosen': -59.628910064697266, 'logps/ref_rejected': -81.97883605957031, 'KL/chosen_KL_mean': -694.026123046875, 'KL/rejected_KL_mean': -1118.81494140625, 'KL/mean': -906.4205322265625, 'KL/std': 625.3888549804688, 'logits/chosen': -0.9780547618865967, 'logits/rejected': -0.9799286127090454, 'epoch': 0.93} + 93%|█████████▎| 636/681 [27:00<01:55, 2.56s/it] 94%|█████████▎| 637/681 [27:03<01:52, 2.56s/it] {'loss': 1.0659, 'grad_norm': 28.718305587768555, 'learning_rate': 6.640486409826785e-09, 'fcm_dpo/beta': 0.0009838433470577002, 'fcm_dpo/q_t': 0.3974034786224365, 'fcm_dpo/delta': -0.06574591249227524, 'fcm_dpo/margin': 469.54571533203125, 'margin_dpo/margin_mean': 469.54571533203125, 'margin_dpo/margin_std': 641.02294921875, 'logps/chosen': -751.2025756835938, 'logps/rejected': -1269.500732421875, 'logps/ref_chosen': -49.652687072753906, 'logps/ref_rejected': -98.40513610839844, 'KL/chosen_KL_mean': -701.5499267578125, 'KL/rejected_KL_mean': -1171.095703125, 'KL/mean': -936.32275390625, 'KL/std': 619.800537109375, 'logits/chosen': -1.07195246219635, 'logits/rejected': -1.1217677593231201, 'epoch': 0.94} + 94%|█████████▎| 637/681 [27:03<01:52, 2.56s/it] 94%|█████████▎| 638/681 [27:06<01:55, 2.68s/it] {'loss': 1.1675, 'grad_norm': 35.89247131347656, 'learning_rate': 6.349874889624962e-09, 'fcm_dpo/beta': 0.000977477291598916, 'fcm_dpo/q_t': 0.41288208961486816, 'fcm_dpo/delta': -0.07950125634670258, 'fcm_dpo/margin': 366.377685546875, 'margin_dpo/margin_mean': 366.377685546875, 'margin_dpo/margin_std': 677.9688720703125, 'logps/chosen': -741.9065551757812, 'logps/rejected': -1129.4290771484375, 'logps/ref_chosen': -58.156639099121094, 'logps/ref_rejected': -79.3014907836914, 'KL/chosen_KL_mean': -683.7498779296875, 'KL/rejected_KL_mean': -1050.1275634765625, 'KL/mean': -866.938720703125, 'KL/std': 584.358154296875, 'logits/chosen': -0.9825940728187561, 'logits/rejected': -0.9679138660430908, 'epoch': 0.94} + 94%|█████████▎| 638/681 [27:06<01:55, 2.68s/it] 94%|█████████▍| 639/681 [27:09<01:52, 2.68s/it] {'loss': 1.3398, 'grad_norm': 106.00291442871094, 'learning_rate': 6.065683776815933e-09, 'fcm_dpo/beta': 0.0009697063360363245, 'fcm_dpo/q_t': 0.4638892412185669, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 166.44647216796875, 'margin_dpo/margin_mean': 166.44647216796875, 'margin_dpo/margin_std': 696.6536865234375, 'logps/chosen': -1003.424072265625, 'logps/rejected': -1171.822265625, 'logps/ref_chosen': -72.32319641113281, 'logps/ref_rejected': -74.2749252319336, 'KL/chosen_KL_mean': -931.100830078125, 'KL/rejected_KL_mean': -1097.54736328125, 'KL/mean': -1014.3240966796875, 'KL/std': 560.0640869140625, 'logits/chosen': -0.96961510181427, 'logits/rejected': -0.9110531806945801, 'epoch': 0.94} + 94%|█████████▍| 639/681 [27:09<01:52, 2.68s/it] 94%|█████████▍| 640/681 [27:11<01:49, 2.66s/it] {'loss': 1.042, 'grad_norm': 35.883907318115234, 'learning_rate': 5.7879205600998296e-09, 'fcm_dpo/beta': 0.0009544800268486142, 'fcm_dpo/q_t': 0.3860216438770294, 'fcm_dpo/delta': -0.13660603761672974, 'fcm_dpo/margin': 554.8192138671875, 'margin_dpo/margin_mean': 554.8192138671875, 'margin_dpo/margin_std': 769.599853515625, 'logps/chosen': -783.435791015625, 'logps/rejected': -1390.7208251953125, 'logps/ref_chosen': -56.13436508178711, 'logps/ref_rejected': -108.60014343261719, 'KL/chosen_KL_mean': -727.3014526367188, 'KL/rejected_KL_mean': -1282.1207275390625, 'KL/mean': -1004.7110595703125, 'KL/std': 652.2802734375, 'logits/chosen': -0.9869524240493774, 'logits/rejected': -1.01767098903656, 'epoch': 0.94} + 94%|█████████▍| 640/681 [27:11<01:49, 2.66s/it] 94%|█████████▍| 641/681 [27:14<01:45, 2.63s/it] {'loss': 1.1796, 'grad_norm': 43.7893180847168, 'learning_rate': 5.516592558795746e-09, 'fcm_dpo/beta': 0.0009492564713582397, 'fcm_dpo/q_t': 0.4235016107559204, 'fcm_dpo/delta': 0.052702054381370544, 'fcm_dpo/margin': 367.82965087890625, 'margin_dpo/margin_mean': 367.82965087890625, 'margin_dpo/margin_std': 744.2984619140625, 'logps/chosen': -898.5880126953125, 'logps/rejected': -1288.4130859375, 'logps/ref_chosen': -64.99689483642578, 'logps/ref_rejected': -86.99232482910156, 'KL/chosen_KL_mean': -833.5911865234375, 'KL/rejected_KL_mean': -1201.420654296875, 'KL/mean': -1017.5059814453125, 'KL/std': 561.353759765625, 'logits/chosen': -1.0607787370681763, 'logits/rejected': -1.0730290412902832, 'epoch': 0.94} + 94%|█████████▍| 641/681 [27:14<01:45, 2.63s/it] 94%|█████████▍| 642/681 [27:16<01:40, 2.59s/it] {'loss': 1.1489, 'grad_norm': 38.126136779785156, 'learning_rate': 5.251706922648868e-09, 'fcm_dpo/beta': 0.0009536816505715251, 'fcm_dpo/q_t': 0.41450613737106323, 'fcm_dpo/delta': -0.025937873870134354, 'fcm_dpo/margin': 445.2503662109375, 'margin_dpo/margin_mean': 445.2503662109375, 'margin_dpo/margin_std': 889.4637451171875, 'logps/chosen': -847.4955444335938, 'logps/rejected': -1337.2987060546875, 'logps/ref_chosen': -65.68924713134766, 'logps/ref_rejected': -110.24205017089844, 'KL/chosen_KL_mean': -781.8062744140625, 'KL/rejected_KL_mean': -1227.056640625, 'KL/mean': -1004.431396484375, 'KL/std': 735.1798706054688, 'logits/chosen': -0.9798089861869812, 'logits/rejected': -1.0176451206207275, 'epoch': 0.94} + 94%|█████████▍| 642/681 [27:16<01:40, 2.59s/it] 94%|█████████▍| 643/681 [27:19<01:39, 2.62s/it] {'loss': 1.1546, 'grad_norm': 40.722110748291016, 'learning_rate': 4.993270631642038e-09, 'fcm_dpo/beta': 0.0009416728862561285, 'fcm_dpo/q_t': 0.42535167932510376, 'fcm_dpo/delta': -0.0368349552154541, 'fcm_dpo/margin': 342.09228515625, 'margin_dpo/margin_mean': 342.09228515625, 'margin_dpo/margin_std': 534.0775146484375, 'logps/chosen': -752.61669921875, 'logps/rejected': -1130.227294921875, 'logps/ref_chosen': -51.94999694824219, 'logps/ref_rejected': -87.46833801269531, 'KL/chosen_KL_mean': -700.666748046875, 'KL/rejected_KL_mean': -1042.759033203125, 'KL/mean': -871.712890625, 'KL/std': 530.568603515625, 'logits/chosen': -1.0998975038528442, 'logits/rejected': -1.1016184091567993, 'epoch': 0.94} + 94%|█████████▍| 643/681 [27:19<01:39, 2.62s/it] 95%|█████████▍| 644/681 [27:21<01:36, 2.62s/it] {'loss': 1.1883, 'grad_norm': 48.49483871459961, 'learning_rate': 4.741290495811873e-09, 'fcm_dpo/beta': 0.0009508873336017132, 'fcm_dpo/q_t': 0.42657724022865295, 'fcm_dpo/delta': 0.06925636529922485, 'fcm_dpo/margin': 350.287109375, 'margin_dpo/margin_mean': 350.287109375, 'margin_dpo/margin_std': 713.6632080078125, 'logps/chosen': -751.72900390625, 'logps/rejected': -1130.135009765625, 'logps/ref_chosen': -59.017662048339844, 'logps/ref_rejected': -87.13668823242188, 'KL/chosen_KL_mean': -692.7113037109375, 'KL/rejected_KL_mean': -1042.9984130859375, 'KL/mean': -867.8548583984375, 'KL/std': 628.4418334960938, 'logits/chosen': -1.0195714235305786, 'logits/rejected': -1.0287786722183228, 'epoch': 0.95} + 95%|█████████▍| 644/681 [27:22<01:36, 2.62s/it] 95%|█████████▍| 645/681 [27:24<01:34, 2.61s/it] {'loss': 1.3337, 'grad_norm': 94.91219329833984, 'learning_rate': 4.495773155069299e-09, 'fcm_dpo/beta': 0.0009679758222773671, 'fcm_dpo/q_t': 0.4633423388004303, 'fcm_dpo/delta': 0.06848917156457901, 'fcm_dpo/margin': 174.8188934326172, 'margin_dpo/margin_mean': 174.81890869140625, 'margin_dpo/margin_std': 677.0361328125, 'logps/chosen': -787.1890258789062, 'logps/rejected': -1003.9127197265625, 'logps/ref_chosen': -55.87602233886719, 'logps/ref_rejected': -97.78080749511719, 'KL/chosen_KL_mean': -731.31298828125, 'KL/rejected_KL_mean': -906.1319580078125, 'KL/mean': -818.722412109375, 'KL/std': 500.92510986328125, 'logits/chosen': -1.0337581634521484, 'logits/rejected': -1.0212106704711914, 'epoch': 0.95} + 95%|█████████▍| 645/681 [27:24<01:34, 2.61s/it] 95%|█████████▍| 646/681 [27:27<01:29, 2.56s/it] {'loss': 1.1804, 'grad_norm': 50.75778579711914, 'learning_rate': 4.256725079024553e-09, 'fcm_dpo/beta': 0.000979509437456727, 'fcm_dpo/q_t': 0.4318525791168213, 'fcm_dpo/delta': 0.10140877962112427, 'fcm_dpo/margin': 308.0731201171875, 'margin_dpo/margin_mean': 308.0731201171875, 'margin_dpo/margin_std': 558.4867553710938, 'logps/chosen': -740.1429443359375, 'logps/rejected': -1064.4461669921875, 'logps/ref_chosen': -61.275787353515625, 'logps/ref_rejected': -77.50580596923828, 'KL/chosen_KL_mean': -678.8671875, 'KL/rejected_KL_mean': -986.9403686523438, 'KL/mean': -832.9037475585938, 'KL/std': 467.6722412109375, 'logits/chosen': -1.0464283227920532, 'logits/rejected': -1.0351706743240356, 'epoch': 0.95} + 95%|█████████▍| 646/681 [27:27<01:29, 2.56s/it] 95%|█████████▌| 647/681 [27:29<01:28, 2.60s/it] {'loss': 1.1086, 'grad_norm': 31.015090942382812, 'learning_rate': 4.024152566816791e-09, 'fcm_dpo/beta': 0.0009915875270962715, 'fcm_dpo/q_t': 0.41298890113830566, 'fcm_dpo/delta': 0.024278640747070312, 'fcm_dpo/margin': 379.82562255859375, 'margin_dpo/margin_mean': 379.82562255859375, 'margin_dpo/margin_std': 518.4892578125, 'logps/chosen': -656.57861328125, 'logps/rejected': -1075.0712890625, 'logps/ref_chosen': -54.8524169921875, 'logps/ref_rejected': -93.5194091796875, 'KL/chosen_KL_mean': -601.7261962890625, 'KL/rejected_KL_mean': -981.5518188476562, 'KL/mean': -791.6389770507812, 'KL/std': 535.917724609375, 'logits/chosen': -0.9510085582733154, 'logits/rejected': -0.9799119830131531, 'epoch': 0.95} + 95%|█████████▌| 647/681 [27:29<01:28, 2.60s/it] 95%|█████████▌| 648/681 [27:32<01:23, 2.53s/it] {'loss': 1.0313, 'grad_norm': 26.95428466796875, 'learning_rate': 3.798061746947995e-09, 'fcm_dpo/beta': 0.0009713097242638469, 'fcm_dpo/q_t': 0.3838508427143097, 'fcm_dpo/delta': -0.15041759610176086, 'fcm_dpo/margin': 558.360107421875, 'margin_dpo/margin_mean': 558.360107421875, 'margin_dpo/margin_std': 770.7064208984375, 'logps/chosen': -704.860595703125, 'logps/rejected': -1307.761962890625, 'logps/ref_chosen': -54.17146682739258, 'logps/ref_rejected': -98.7127914428711, 'KL/chosen_KL_mean': -650.6891479492188, 'KL/rejected_KL_mean': -1209.0491943359375, 'KL/mean': -929.8692016601562, 'KL/std': 650.9400024414062, 'logits/chosen': -1.0590667724609375, 'logits/rejected': -1.1171326637268066, 'epoch': 0.95} + 95%|█████████▌| 648/681 [27:32<01:23, 2.53s/it] 95%|█████████▌| 649/681 [27:34<01:21, 2.56s/it] {'loss': 1.2299, 'grad_norm': 30.479537963867188, 'learning_rate': 3.5784585771215235e-09, 'fcm_dpo/beta': 0.0009851048234850168, 'fcm_dpo/q_t': 0.44306886196136475, 'fcm_dpo/delta': 0.13432249426841736, 'fcm_dpo/margin': 272.9219970703125, 'margin_dpo/margin_mean': 272.9219665527344, 'margin_dpo/margin_std': 634.4632568359375, 'logps/chosen': -737.660400390625, 'logps/rejected': -1028.1793212890625, 'logps/ref_chosen': -62.480350494384766, 'logps/ref_rejected': -80.07717895507812, 'KL/chosen_KL_mean': -675.1800537109375, 'KL/rejected_KL_mean': -948.1021118164062, 'KL/mean': -811.64111328125, 'KL/std': 509.8891296386719, 'logits/chosen': -1.0864759683609009, 'logits/rejected': -1.081239938735962, 'epoch': 0.95} + 95%|█████████▌| 649/681 [27:34<01:21, 2.56s/it] 95%|█████████▌| 650/681 [27:37<01:20, 2.59s/it] {'loss': 1.125, 'grad_norm': 33.085140228271484, 'learning_rate': 3.3653488440851253e-09, 'fcm_dpo/beta': 0.000984450918622315, 'fcm_dpo/q_t': 0.4085081219673157, 'fcm_dpo/delta': -0.017372816801071167, 'fcm_dpo/margin': 423.06866455078125, 'margin_dpo/margin_mean': 423.06866455078125, 'margin_dpo/margin_std': 715.4996337890625, 'logps/chosen': -775.5250244140625, 'logps/rejected': -1240.765625, 'logps/ref_chosen': -56.09281921386719, 'logps/ref_rejected': -98.26483917236328, 'KL/chosen_KL_mean': -719.4322509765625, 'KL/rejected_KL_mean': -1142.5008544921875, 'KL/mean': -930.966552734375, 'KL/std': 623.9461059570312, 'logits/chosen': -1.0022144317626953, 'logits/rejected': -1.0228140354156494, 'epoch': 0.95} + 95%|█████████▌| 650/681 [27:37<01:20, 2.59s/it] 96%|█████████▌| 651/681 [27:39<01:17, 2.60s/it] {'loss': 1.0046, 'grad_norm': 39.065223693847656, 'learning_rate': 3.158738163478475e-09, 'fcm_dpo/beta': 0.0009662117809057236, 'fcm_dpo/q_t': 0.3815712332725525, 'fcm_dpo/delta': -0.13117295503616333, 'fcm_dpo/margin': 542.51806640625, 'margin_dpo/margin_mean': 542.5181274414062, 'margin_dpo/margin_std': 605.7119140625, 'logps/chosen': -529.326171875, 'logps/rejected': -1128.376708984375, 'logps/ref_chosen': -43.42544937133789, 'logps/ref_rejected': -99.95791625976562, 'KL/chosen_KL_mean': -485.9007263183594, 'KL/rejected_KL_mean': -1028.4189453125, 'KL/mean': -757.1597900390625, 'KL/std': 586.595703125, 'logits/chosen': -1.039862871170044, 'logits/rejected': -1.0981051921844482, 'epoch': 0.96} + 96%|█████████▌| 651/681 [27:40<01:17, 2.60s/it] 96%|█████████▌| 652/681 [27:42<01:15, 2.60s/it] {'loss': 1.1236, 'grad_norm': 32.109920501708984, 'learning_rate': 2.9586319796851555e-09, 'fcm_dpo/beta': 0.0009623857913538814, 'fcm_dpo/q_t': 0.41180309653282166, 'fcm_dpo/delta': 0.0026037218049168587, 'fcm_dpo/margin': 412.978759765625, 'margin_dpo/margin_mean': 412.978759765625, 'margin_dpo/margin_std': 679.6004638671875, 'logps/chosen': -682.8228759765625, 'logps/rejected': -1144.99267578125, 'logps/ref_chosen': -62.57680892944336, 'logps/ref_rejected': -111.76779174804688, 'KL/chosen_KL_mean': -620.2460327148438, 'KL/rejected_KL_mean': -1033.224853515625, 'KL/mean': -826.7354736328125, 'KL/std': 587.8150024414062, 'logits/chosen': -1.0359432697296143, 'logits/rejected': -1.061659812927246, 'epoch': 0.96} + 96%|█████████▌| 652/681 [27:42<01:15, 2.60s/it] 96%|█████████▌| 653/681 [27:45<01:12, 2.58s/it] {'loss': 1.1426, 'grad_norm': 33.81359100341797, 'learning_rate': 2.7650355656892166e-09, 'fcm_dpo/beta': 0.0009670084109529853, 'fcm_dpo/q_t': 0.418673038482666, 'fcm_dpo/delta': 0.031598955392837524, 'fcm_dpo/margin': 382.0786437988281, 'margin_dpo/margin_mean': 382.07867431640625, 'margin_dpo/margin_std': 657.617431640625, 'logps/chosen': -812.2350463867188, 'logps/rejected': -1236.4503173828125, 'logps/ref_chosen': -61.11295700073242, 'logps/ref_rejected': -103.24960327148438, 'KL/chosen_KL_mean': -751.1220703125, 'KL/rejected_KL_mean': -1133.20068359375, 'KL/mean': -942.161376953125, 'KL/std': 632.44580078125, 'logits/chosen': -1.081420660018921, 'logits/rejected': -1.106847882270813, 'epoch': 0.96} + 96%|█████████▌| 653/681 [27:45<01:12, 2.58s/it] 96%|█████████▌| 654/681 [27:47<01:09, 2.58s/it] {'loss': 1.1425, 'grad_norm': 36.13345718383789, 'learning_rate': 2.577954022936174e-09, 'fcm_dpo/beta': 0.0009726278949528933, 'fcm_dpo/q_t': 0.42105910181999207, 'fcm_dpo/delta': 0.041933320462703705, 'fcm_dpo/margin': 369.70977783203125, 'margin_dpo/margin_mean': 369.70977783203125, 'margin_dpo/margin_std': 623.5421142578125, 'logps/chosen': -761.7586059570312, 'logps/rejected': -1168.5140380859375, 'logps/ref_chosen': -61.7281379699707, 'logps/ref_rejected': -98.7738037109375, 'KL/chosen_KL_mean': -700.0304565429688, 'KL/rejected_KL_mean': -1069.740234375, 'KL/mean': -884.8853759765625, 'KL/std': 530.6134643554688, 'logits/chosen': -1.0889091491699219, 'logits/rejected': -1.1061911582946777, 'epoch': 0.96} + 96%|█████████▌| 654/681 [27:47<01:09, 2.58s/it] 96%|█████████▌| 655/681 [27:50<01:06, 2.55s/it] {'loss': 1.1251, 'grad_norm': 30.601289749145508, 'learning_rate': 2.397392281198729e-09, 'fcm_dpo/beta': 0.0009784356225281954, 'fcm_dpo/q_t': 0.4150038957595825, 'fcm_dpo/delta': 0.01965608447790146, 'fcm_dpo/margin': 389.4818115234375, 'margin_dpo/margin_mean': 389.4818115234375, 'margin_dpo/margin_std': 619.3175659179688, 'logps/chosen': -686.934814453125, 'logps/rejected': -1125.131591796875, 'logps/ref_chosen': -49.576812744140625, 'logps/ref_rejected': -98.29183197021484, 'KL/chosen_KL_mean': -637.3580322265625, 'KL/rejected_KL_mean': -1026.83984375, 'KL/mean': -832.098876953125, 'KL/std': 523.131103515625, 'logits/chosen': -1.062340259552002, 'logits/rejected': -1.1041678190231323, 'epoch': 0.96} + 96%|█████████▌| 655/681 [27:50<01:06, 2.55s/it] 96%|█████████▋| 656/681 [27:52<01:05, 2.61s/it] {'loss': 0.9582, 'grad_norm': 84.34307861328125, 'learning_rate': 2.223355098446622e-09, 'fcm_dpo/beta': 0.0009496092097833753, 'fcm_dpo/q_t': 0.36486658453941345, 'fcm_dpo/delta': -0.2287594974040985, 'fcm_dpo/margin': 647.8305053710938, 'margin_dpo/margin_mean': 647.8305053710938, 'margin_dpo/margin_std': 702.5700073242188, 'logps/chosen': -750.5755615234375, 'logps/rejected': -1459.5313720703125, 'logps/ref_chosen': -52.54943084716797, 'logps/ref_rejected': -113.67464447021484, 'KL/chosen_KL_mean': -698.026123046875, 'KL/rejected_KL_mean': -1345.856689453125, 'KL/mean': -1021.94140625, 'KL/std': 696.60986328125, 'logits/chosen': -0.9495760202407837, 'logits/rejected': -1.0183899402618408, 'epoch': 0.96} + 96%|█████████▋| 656/681 [27:52<01:05, 2.61s/it] 96%|█████████▋| 657/681 [27:55<00:59, 2.49s/it] {'loss': 1.0539, 'grad_norm': 39.40578079223633, 'learning_rate': 2.055847060721566e-09, 'fcm_dpo/beta': 0.0009227419504895806, 'fcm_dpo/q_t': 0.39258694648742676, 'fcm_dpo/delta': -0.08039526641368866, 'fcm_dpo/margin': 516.2921142578125, 'margin_dpo/margin_mean': 516.2921142578125, 'margin_dpo/margin_std': 684.8028564453125, 'logps/chosen': -695.731201171875, 'logps/rejected': -1263.23779296875, 'logps/ref_chosen': -46.700538635253906, 'logps/ref_rejected': -97.91487121582031, 'KL/chosen_KL_mean': -649.0306396484375, 'KL/rejected_KL_mean': -1165.3228759765625, 'KL/mean': -907.1767578125, 'KL/std': 657.1854248046875, 'logits/chosen': -1.1030490398406982, 'logits/rejected': -1.1474685668945312, 'epoch': 0.96} + 96%|█████████▋| 657/681 [27:55<00:59, 2.49s/it] 97%|█████████▋| 658/681 [27:57<00:56, 2.45s/it] {'loss': 1.1197, 'grad_norm': 35.71732711791992, 'learning_rate': 1.8948725820160662e-09, 'fcm_dpo/beta': 0.0009188736439682543, 'fcm_dpo/q_t': 0.41408517956733704, 'fcm_dpo/delta': 0.026215653866529465, 'fcm_dpo/margin': 407.352783203125, 'margin_dpo/margin_mean': 407.352783203125, 'margin_dpo/margin_std': 595.02197265625, 'logps/chosen': -768.4239501953125, 'logps/rejected': -1210.758056640625, 'logps/ref_chosen': -60.95820999145508, 'logps/ref_rejected': -95.93949127197266, 'KL/chosen_KL_mean': -707.4656982421875, 'KL/rejected_KL_mean': -1114.818603515625, 'KL/mean': -911.14208984375, 'KL/std': 517.7315063476562, 'logits/chosen': -1.0394493341445923, 'logits/rejected': -1.0688188076019287, 'epoch': 0.97} + 97%|█████████▋| 658/681 [27:57<00:56, 2.45s/it] 97%|█████████▋| 659/681 [28:00<00:55, 2.53s/it] {'loss': 1.1117, 'grad_norm': 32.48310852050781, 'learning_rate': 1.7404359041573723e-09, 'fcm_dpo/beta': 0.000925220490898937, 'fcm_dpo/q_t': 0.414547324180603, 'fcm_dpo/delta': 0.022823944687843323, 'fcm_dpo/margin': 408.4324645996094, 'margin_dpo/margin_mean': 408.4324645996094, 'margin_dpo/margin_std': 585.33984375, 'logps/chosen': -699.755859375, 'logps/rejected': -1118.916259765625, 'logps/ref_chosen': -76.74298095703125, 'logps/ref_rejected': -87.4709701538086, 'KL/chosen_KL_mean': -623.0128784179688, 'KL/rejected_KL_mean': -1031.4453125, 'KL/mean': -827.2291259765625, 'KL/std': 528.8475341796875, 'logits/chosen': -0.990066409111023, 'logits/rejected': -0.9657001495361328, 'epoch': 0.97} + 97%|█████████▋| 659/681 [28:00<00:55, 2.53s/it] 97%|█████████▋| 660/681 [28:02<00:52, 2.51s/it] {'loss': 1.0459, 'grad_norm': 41.30915451049805, 'learning_rate': 1.592541096695571e-09, 'fcm_dpo/beta': 0.0009196768514811993, 'fcm_dpo/q_t': 0.39171260595321655, 'fcm_dpo/delta': -0.08219671249389648, 'fcm_dpo/margin': 520.118896484375, 'margin_dpo/margin_mean': 520.118896484375, 'margin_dpo/margin_std': 653.3119506835938, 'logps/chosen': -732.5086669921875, 'logps/rejected': -1269.539794921875, 'logps/ref_chosen': -59.04788589477539, 'logps/ref_rejected': -75.96005249023438, 'KL/chosen_KL_mean': -673.4608154296875, 'KL/rejected_KL_mean': -1193.5797119140625, 'KL/mean': -933.520263671875, 'KL/std': 613.996826171875, 'logits/chosen': -1.0613317489624023, 'logits/rejected': -1.0815818309783936, 'epoch': 0.97} + 97%|█████████▋| 660/681 [28:02<00:52, 2.51s/it] 97%|█████████▋| 661/681 [28:04<00:48, 2.42s/it] {'loss': 1.0866, 'grad_norm': 51.29008865356445, 'learning_rate': 1.4511920567963908e-09, 'fcm_dpo/beta': 0.0009147179080173373, 'fcm_dpo/q_t': 0.4064168334007263, 'fcm_dpo/delta': -0.02594481222331524, 'fcm_dpo/margin': 464.16583251953125, 'margin_dpo/margin_mean': 464.16583251953125, 'margin_dpo/margin_std': 674.4207153320312, 'logps/chosen': -657.1035766601562, 'logps/rejected': -1156.6011962890625, 'logps/ref_chosen': -50.673973083496094, 'logps/ref_rejected': -86.00569152832031, 'KL/chosen_KL_mean': -606.4295654296875, 'KL/rejected_KL_mean': -1070.595458984375, 'KL/mean': -838.5125732421875, 'KL/std': 678.2152709960938, 'logits/chosen': -1.0680885314941406, 'logits/rejected': -1.084218978881836, 'epoch': 0.97} + 97%|█████████▋| 661/681 [28:04<00:48, 2.42s/it] 97%|█████████▋| 662/681 [28:07<00:48, 2.56s/it] {'loss': 1.1704, 'grad_norm': 30.058595657348633, 'learning_rate': 1.3163925091384532e-09, 'fcm_dpo/beta': 0.0009189635748043656, 'fcm_dpo/q_t': 0.42484885454177856, 'fcm_dpo/delta': 0.06803098320960999, 'fcm_dpo/margin': 363.70428466796875, 'margin_dpo/margin_mean': 363.70428466796875, 'margin_dpo/margin_std': 682.1209106445312, 'logps/chosen': -761.7333984375, 'logps/rejected': -1145.232666015625, 'logps/ref_chosen': -69.26106262207031, 'logps/ref_rejected': -89.05593872070312, 'KL/chosen_KL_mean': -692.4723510742188, 'KL/rejected_KL_mean': -1056.1767578125, 'KL/mean': -874.324462890625, 'KL/std': 554.696044921875, 'logits/chosen': -0.986479640007019, 'logits/rejected': -0.9849323034286499, 'epoch': 0.97} + 97%|█████████▋| 662/681 [28:07<00:48, 2.56s/it] 97%|█████████▋| 663/681 [28:10<00:47, 2.67s/it] {'loss': 1.1207, 'grad_norm': 27.409046173095703, 'learning_rate': 1.1881460058152382e-09, 'fcm_dpo/beta': 0.0009198928019031882, 'fcm_dpo/q_t': 0.4112858176231384, 'fcm_dpo/delta': -0.004432424902915955, 'fcm_dpo/margin': 439.39404296875, 'margin_dpo/margin_mean': 439.39404296875, 'margin_dpo/margin_std': 730.9638671875, 'logps/chosen': -712.1788330078125, 'logps/rejected': -1200.619384765625, 'logps/ref_chosen': -64.87890625, 'logps/ref_rejected': -113.92536926269531, 'KL/chosen_KL_mean': -647.2999267578125, 'KL/rejected_KL_mean': -1086.6939697265625, 'KL/mean': -866.9969482421875, 'KL/std': 632.6029052734375, 'logits/chosen': -1.0472636222839355, 'logits/rejected': -1.0724174976348877, 'epoch': 0.97} + 97%|█████████▋| 663/681 [28:10<00:47, 2.67s/it] 98%|█████████▊| 664/681 [28:13<00:45, 2.67s/it] {'loss': 1.0745, 'grad_norm': 32.19367218017578, 'learning_rate': 1.066455926241383e-09, 'fcm_dpo/beta': 0.0009121259208768606, 'fcm_dpo/q_t': 0.401122510433197, 'fcm_dpo/delta': -0.04079785570502281, 'fcm_dpo/margin': 480.83197021484375, 'margin_dpo/margin_mean': 480.83197021484375, 'margin_dpo/margin_std': 644.64111328125, 'logps/chosen': -733.25732421875, 'logps/rejected': -1258.7225341796875, 'logps/ref_chosen': -60.88847351074219, 'logps/ref_rejected': -105.521728515625, 'KL/chosen_KL_mean': -672.3688354492188, 'KL/rejected_KL_mean': -1153.2008056640625, 'KL/mean': -912.7847900390625, 'KL/std': 606.9482421875, 'logits/chosen': -1.0128577947616577, 'logits/rejected': -1.0457968711853027, 'epoch': 0.98} + 98%|█████████▊| 664/681 [28:13<00:45, 2.67s/it] 98%|█████████▊| 665/681 [28:15<00:41, 2.57s/it] {'loss': 1.0985, 'grad_norm': 42.685340881347656, 'learning_rate': 9.513254770636137e-10, 'fcm_dpo/beta': 0.0009164921357296407, 'fcm_dpo/q_t': 0.41263529658317566, 'fcm_dpo/delta': 0.022980544716119766, 'fcm_dpo/margin': 412.32568359375, 'margin_dpo/margin_mean': 412.32568359375, 'margin_dpo/margin_std': 522.885009765625, 'logps/chosen': -674.4127807617188, 'logps/rejected': -1110.983154296875, 'logps/ref_chosen': -60.56413269042969, 'logps/ref_rejected': -84.80882263183594, 'KL/chosen_KL_mean': -613.8486328125, 'KL/rejected_KL_mean': -1026.17431640625, 'KL/mean': -820.011474609375, 'KL/std': 517.1300048828125, 'logits/chosen': -1.1305358409881592, 'logits/rejected': -1.1555566787719727, 'epoch': 0.98} + 98%|█████████▊| 665/681 [28:15<00:41, 2.57s/it] 98%|█████████▊| 666/681 [28:18<00:39, 2.61s/it] {'loss': 1.1055, 'grad_norm': 31.538972854614258, 'learning_rate': 8.427576920763956e-10, 'fcm_dpo/beta': 0.0009179958724416792, 'fcm_dpo/q_t': 0.4115890562534332, 'fcm_dpo/delta': 0.010700155980885029, 'fcm_dpo/margin': 424.4996643066406, 'margin_dpo/margin_mean': 424.4996643066406, 'margin_dpo/margin_std': 592.3191528320312, 'logps/chosen': -721.06298828125, 'logps/rejected': -1177.034423828125, 'logps/ref_chosen': -64.41996002197266, 'logps/ref_rejected': -95.8916244506836, 'KL/chosen_KL_mean': -656.64306640625, 'KL/rejected_KL_mean': -1081.1427001953125, 'KL/mean': -868.8929443359375, 'KL/std': 534.3782348632812, 'logits/chosen': -0.9657202959060669, 'logits/rejected': -0.9787443280220032, 'epoch': 0.98} + 98%|█████████▊| 666/681 [28:18<00:39, 2.61s/it] 98%|█████████▊| 667/681 [28:21<00:36, 2.62s/it] {'loss': 1.0643, 'grad_norm': 36.603797912597656, 'learning_rate': 7.407554321417764e-10, 'fcm_dpo/beta': 0.0009104580385610461, 'fcm_dpo/q_t': 0.3979244828224182, 'fcm_dpo/delta': -0.05206644535064697, 'fcm_dpo/margin': 493.6817321777344, 'margin_dpo/margin_mean': 493.68170166015625, 'margin_dpo/margin_std': 641.2989501953125, 'logps/chosen': -809.1754150390625, 'logps/rejected': -1321.41552734375, 'logps/ref_chosen': -69.27702331542969, 'logps/ref_rejected': -87.83549499511719, 'KL/chosen_KL_mean': -739.8983764648438, 'KL/rejected_KL_mean': -1233.580078125, 'KL/mean': -986.7392578125, 'KL/std': 586.67529296875, 'logits/chosen': -0.9688647389411926, 'logits/rejected': -0.9707045555114746, 'epoch': 0.98} + 98%|█████████▊| 667/681 [28:21<00:36, 2.62s/it] 98%|█████████▊| 668/681 [28:23<00:34, 2.64s/it] {'loss': 1.2043, 'grad_norm': 52.49308395385742, 'learning_rate': 6.453213851142225e-10, 'fcm_dpo/beta': 0.0009259539656341076, 'fcm_dpo/q_t': 0.4307333827018738, 'fcm_dpo/delta': 0.07960406690835953, 'fcm_dpo/margin': 347.7958984375, 'margin_dpo/margin_mean': 347.7958984375, 'margin_dpo/margin_std': 759.7967529296875, 'logps/chosen': -874.7958984375, 'logps/rejected': -1253.726806640625, 'logps/ref_chosen': -72.60400390625, 'logps/ref_rejected': -103.73905944824219, 'KL/chosen_KL_mean': -802.19189453125, 'KL/rejected_KL_mean': -1149.98779296875, 'KL/mean': -976.08984375, 'KL/std': 634.327392578125, 'logits/chosen': -1.0459859371185303, 'logits/rejected': -1.0500774383544922, 'epoch': 0.98} + 98%|█████████▊| 668/681 [28:23<00:34, 2.64s/it] 98%|█████████▊| 669/681 [28:26<00:31, 2.66s/it] {'loss': 1.0627, 'grad_norm': 30.24985122680664, 'learning_rate': 5.564580657695939e-10, 'fcm_dpo/beta': 0.0009176377207040787, 'fcm_dpo/q_t': 0.3957340717315674, 'fcm_dpo/delta': -0.06017923727631569, 'fcm_dpo/margin': 498.524169921875, 'margin_dpo/margin_mean': 498.524169921875, 'margin_dpo/margin_std': 653.9466552734375, 'logps/chosen': -637.45068359375, 'logps/rejected': -1167.78271484375, 'logps/ref_chosen': -46.116416931152344, 'logps/ref_rejected': -77.92434692382812, 'KL/chosen_KL_mean': -591.334228515625, 'KL/rejected_KL_mean': -1089.8583984375, 'KL/mean': -840.5963134765625, 'KL/std': 579.8489990234375, 'logits/chosen': -1.0195106267929077, 'logits/rejected': -1.0302537679672241, 'epoch': 0.98} + 98%|█████████▊| 669/681 [28:26<00:31, 2.66s/it] 98%|█████████▊| 670/681 [28:29<00:29, 2.65s/it] {'loss': 1.0676, 'grad_norm': 27.409191131591797, 'learning_rate': 4.741678157389739e-10, 'fcm_dpo/beta': 0.0009101468604058027, 'fcm_dpo/q_t': 0.39692699909210205, 'fcm_dpo/delta': -0.055214740335941315, 'fcm_dpo/margin': 497.284912109375, 'margin_dpo/margin_mean': 497.284912109375, 'margin_dpo/margin_std': 651.83740234375, 'logps/chosen': -627.3289794921875, 'logps/rejected': -1159.208740234375, 'logps/ref_chosen': -62.34575271606445, 'logps/ref_rejected': -96.9405517578125, 'KL/chosen_KL_mean': -564.9832763671875, 'KL/rejected_KL_mean': -1062.26806640625, 'KL/mean': -813.625732421875, 'KL/std': 539.938232421875, 'logits/chosen': -0.9492954015731812, 'logits/rejected': -0.9691870212554932, 'epoch': 0.98} + 98%|█████████▊| 670/681 [28:29<00:29, 2.65s/it] 99%|█████████▊| 671/681 [28:31<00:25, 2.56s/it] {'loss': 1.1388, 'grad_norm': 29.13888168334961, 'learning_rate': 3.9845280344705245e-10, 'fcm_dpo/beta': 0.000911594950594008, 'fcm_dpo/q_t': 0.4158746898174286, 'fcm_dpo/delta': 0.03665146976709366, 'fcm_dpo/margin': 399.72637939453125, 'margin_dpo/margin_mean': 399.7263488769531, 'margin_dpo/margin_std': 654.765869140625, 'logps/chosen': -777.5747680664062, 'logps/rejected': -1213.120361328125, 'logps/ref_chosen': -48.00010681152344, 'logps/ref_rejected': -83.81932067871094, 'KL/chosen_KL_mean': -729.5746459960938, 'KL/rejected_KL_mean': -1129.301025390625, 'KL/mean': -929.4378662109375, 'KL/std': 544.60791015625, 'logits/chosen': -1.0544450283050537, 'logits/rejected': -1.084800362586975, 'epoch': 0.99} + 99%|█████████▊| 671/681 [28:31<00:25, 2.56s/it] 99%|█████████▊| 672/681 [28:33<00:22, 2.55s/it] {'loss': 1.156, 'grad_norm': 53.180294036865234, 'learning_rate': 3.293150240547549e-10, 'fcm_dpo/beta': 0.0009131274418905377, 'fcm_dpo/q_t': 0.4185020923614502, 'fcm_dpo/delta': 0.03172078728675842, 'fcm_dpo/margin': 404.588623046875, 'margin_dpo/margin_mean': 404.588623046875, 'margin_dpo/margin_std': 734.8128662109375, 'logps/chosen': -863.9659423828125, 'logps/rejected': -1303.1114501953125, 'logps/ref_chosen': -58.58328628540039, 'logps/ref_rejected': -93.14015197753906, 'KL/chosen_KL_mean': -805.3826904296875, 'KL/rejected_KL_mean': -1209.97119140625, 'KL/mean': -1007.677001953125, 'KL/std': 662.1884155273438, 'logits/chosen': -1.1241331100463867, 'logits/rejected': -1.1317377090454102, 'epoch': 0.99} + 99%|█████████▊| 672/681 [28:33<00:22, 2.55s/it] 99%|█████████▉| 673/681 [28:36<00:19, 2.47s/it] {'loss': 1.1365, 'grad_norm': 33.6239128112793, 'learning_rate': 2.6675629940689504e-10, 'fcm_dpo/beta': 0.000922200852073729, 'fcm_dpo/q_t': 0.41957566142082214, 'fcm_dpo/delta': 0.04443016275763512, 'fcm_dpo/margin': 387.2952880859375, 'margin_dpo/margin_mean': 387.2952880859375, 'margin_dpo/margin_std': 632.4697875976562, 'logps/chosen': -772.0306396484375, 'logps/rejected': -1197.89892578125, 'logps/ref_chosen': -46.72320556640625, 'logps/ref_rejected': -85.29623413085938, 'KL/chosen_KL_mean': -725.3074951171875, 'KL/rejected_KL_mean': -1112.602783203125, 'KL/mean': -918.955078125, 'KL/std': 561.255859375, 'logits/chosen': -1.0476765632629395, 'logits/rejected': -1.0524837970733643, 'epoch': 0.99} + 99%|█████████▉| 673/681 [28:36<00:19, 2.47s/it] 99%|█████████▉| 674/681 [28:38<00:17, 2.56s/it] {'loss': 1.0637, 'grad_norm': 33.85618209838867, 'learning_rate': 2.1077827798404725e-10, 'fcm_dpo/beta': 0.0009187724208459258, 'fcm_dpo/q_t': 0.39939314126968384, 'fcm_dpo/delta': -0.055665817111730576, 'fcm_dpo/margin': 493.232666015625, 'margin_dpo/margin_mean': 493.232666015625, 'margin_dpo/margin_std': 653.6361083984375, 'logps/chosen': -630.609619140625, 'logps/rejected': -1148.442626953125, 'logps/ref_chosen': -45.445526123046875, 'logps/ref_rejected': -70.04593658447266, 'KL/chosen_KL_mean': -585.1640625, 'KL/rejected_KL_mean': -1078.396728515625, 'KL/mean': -831.7803955078125, 'KL/std': 547.6962890625, 'logits/chosen': -0.9810643196105957, 'logits/rejected': -1.0035473108291626, 'epoch': 0.99} + 99%|█████████▉| 674/681 [28:39<00:17, 2.56s/it] 99%|█████████▉| 675/681 [28:41<00:15, 2.54s/it] {'loss': 1.0612, 'grad_norm': 27.58077049255371, 'learning_rate': 1.6138243485910863e-10, 'fcm_dpo/beta': 0.0008998748380690813, 'fcm_dpo/q_t': 0.39825230836868286, 'fcm_dpo/delta': -0.0599069781601429, 'fcm_dpo/margin': 506.56787109375, 'margin_dpo/margin_mean': 506.56787109375, 'margin_dpo/margin_std': 637.0610961914062, 'logps/chosen': -707.8739624023438, 'logps/rejected': -1244.357666015625, 'logps/ref_chosen': -44.17628479003906, 'logps/ref_rejected': -74.09197998046875, 'KL/chosen_KL_mean': -663.6976318359375, 'KL/rejected_KL_mean': -1170.265625, 'KL/mean': -916.9816284179688, 'KL/std': 600.1426391601562, 'logits/chosen': -1.038741111755371, 'logits/rejected': -1.0553760528564453, 'epoch': 0.99} + 99%|█████████▉| 675/681 [28:41<00:15, 2.54s/it] 99%|█████████▉| 676/681 [28:44<00:13, 2.60s/it] {'loss': 1.0661, 'grad_norm': 27.7912654876709, 'learning_rate': 1.1857007165852472e-10, 'fcm_dpo/beta': 0.0009004472522065043, 'fcm_dpo/q_t': 0.4012463092803955, 'fcm_dpo/delta': -0.03510238975286484, 'fcm_dpo/margin': 481.5036315917969, 'margin_dpo/margin_mean': 481.50360107421875, 'margin_dpo/margin_std': 591.6707763671875, 'logps/chosen': -797.0269165039062, 'logps/rejected': -1295.49072265625, 'logps/ref_chosen': -71.39852905273438, 'logps/ref_rejected': -88.3587646484375, 'KL/chosen_KL_mean': -725.62841796875, 'KL/rejected_KL_mean': -1207.1319580078125, 'KL/mean': -966.3801879882812, 'KL/std': 584.3846435546875, 'logits/chosen': -0.995841920375824, 'logits/rejected': -1.0157501697540283, 'epoch': 0.99} + 99%|█████████▉| 676/681 [28:44<00:13, 2.60s/it] 99%|█████████▉| 677/681 [28:46<00:10, 2.52s/it] {'loss': 1.1046, 'grad_norm': 29.935705184936523, 'learning_rate': 8.23423165278725e-11, 'fcm_dpo/beta': 0.0008935732766985893, 'fcm_dpo/q_t': 0.41068124771118164, 'fcm_dpo/delta': -0.016854500398039818, 'fcm_dpo/margin': 465.65484619140625, 'margin_dpo/margin_mean': 465.65484619140625, 'margin_dpo/margin_std': 735.0831298828125, 'logps/chosen': -786.2052612304688, 'logps/rejected': -1273.559326171875, 'logps/ref_chosen': -56.527435302734375, 'logps/ref_rejected': -78.22654724121094, 'KL/chosen_KL_mean': -729.6778564453125, 'KL/rejected_KL_mean': -1195.332763671875, 'KL/mean': -962.5052490234375, 'KL/std': 609.0902099609375, 'logits/chosen': -1.073415756225586, 'logits/rejected': -1.0710859298706055, 'epoch': 0.99} + 99%|█████████▉| 677/681 [28:46<00:10, 2.52s/it] 100%|█████████▉| 678/681 [28:49<00:07, 2.49s/it] {'loss': 1.0566, 'grad_norm': 33.50616455078125, 'learning_rate': 5.270012410216185e-11, 'fcm_dpo/beta': 0.00088664231589064, 'fcm_dpo/q_t': 0.39224404096603394, 'fcm_dpo/delta': -0.08726058155298233, 'fcm_dpo/margin': 544.7600708007812, 'margin_dpo/margin_mean': 544.7600708007812, 'margin_dpo/margin_std': 738.8284912109375, 'logps/chosen': -654.531982421875, 'logps/rejected': -1233.76220703125, 'logps/ref_chosen': -46.13447570800781, 'logps/ref_rejected': -80.60462951660156, 'KL/chosen_KL_mean': -608.3975830078125, 'KL/rejected_KL_mean': -1153.1575927734375, 'KL/mean': -880.777587890625, 'KL/std': 650.5914306640625, 'logits/chosen': -1.0082026720046997, 'logits/rejected': -1.0482615232467651, 'epoch': 1.0} + 100%|█████████▉| 678/681 [28:49<00:07, 2.49s/it] 100%|█████████▉| 679/681 [28:51<00:05, 2.57s/it] {'loss': 1.1451, 'grad_norm': 31.147796630859375, 'learning_rate': 2.9644275480772416e-11, 'fcm_dpo/beta': 0.0008858998189680278, 'fcm_dpo/q_t': 0.42340487241744995, 'fcm_dpo/delta': 0.06133866682648659, 'fcm_dpo/margin': 384.67840576171875, 'margin_dpo/margin_mean': 384.678466796875, 'margin_dpo/margin_std': 626.0743408203125, 'logps/chosen': -758.1961669921875, 'logps/rejected': -1169.177734375, 'logps/ref_chosen': -50.294921875, 'logps/ref_rejected': -76.59813690185547, 'KL/chosen_KL_mean': -707.9012451171875, 'KL/rejected_KL_mean': -1092.57958984375, 'KL/mean': -900.2404174804688, 'KL/std': 537.857177734375, 'logits/chosen': -1.03668212890625, 'logits/rejected': -1.0291433334350586, 'epoch': 1.0} + 100%|█████████▉| 679/681 [28:51<00:05, 2.57s/it] 100%|█████████▉| 680/681 [28:54<00:02, 2.59s/it] {'loss': 1.0875, 'grad_norm': 36.256160736083984, 'learning_rate': 1.31753782067201e-11, 'fcm_dpo/beta': 0.0008777154725976288, 'fcm_dpo/q_t': 0.39790278673171997, 'fcm_dpo/delta': -0.05377676337957382, 'fcm_dpo/margin': 513.332275390625, 'margin_dpo/margin_mean': 513.332275390625, 'margin_dpo/margin_std': 758.932373046875, 'logps/chosen': -797.5235595703125, 'logps/rejected': -1346.32470703125, 'logps/ref_chosen': -76.91569519042969, 'logps/ref_rejected': -112.384765625, 'KL/chosen_KL_mean': -720.6077880859375, 'KL/rejected_KL_mean': -1233.93994140625, 'KL/mean': -977.27392578125, 'KL/std': 704.15478515625, 'logits/chosen': -1.0335376262664795, 'logits/rejected': -1.0608773231506348, 'epoch': 1.0} + 100%|█████████▉| 680/681 [28:54<00:02, 2.59s/it] 100%|██████████| 681/681 [28:56<00:00, 2.57s/it] {'loss': 1.1404, 'grad_norm': 42.854522705078125, 'learning_rate': 3.2938662507808745e-12, 'fcm_dpo/beta': 0.0008919438696466386, 'fcm_dpo/q_t': 0.41937246918678284, 'fcm_dpo/delta': 0.04472469165921211, 'fcm_dpo/margin': 398.6950988769531, 'margin_dpo/margin_mean': 398.6950988769531, 'margin_dpo/margin_std': 622.65087890625, 'logps/chosen': -768.9052734375, 'logps/rejected': -1195.200927734375, 'logps/ref_chosen': -60.957279205322266, 'logps/ref_rejected': -88.55797576904297, 'KL/chosen_KL_mean': -707.947998046875, 'KL/rejected_KL_mean': -1106.64306640625, 'KL/mean': -907.2955322265625, 'KL/std': 573.4292602539062, 'logits/chosen': -1.0845022201538086, 'logits/rejected': -1.1038618087768555, 'epoch': 1.0} + 100%|██████████| 681/681 [28:56<00:00, 2.57s/it][INFO|trainer.py:2681] 2026-04-29 16:41:55,545 >> + +Training completed. Do not forget to share your model on huggingface.co/models =) + + + {'train_runtime': 1736.9553, 'train_samples_per_second': 25.1, 'train_steps_per_second': 0.392, 'train_loss': 1.095842420442164, 'epoch': 1.0} + 100%|██████████| 681/681 [28:56<00:00, 2.57s/it] 100%|██████████| 681/681 [28:56<00:00, 2.55s/it] +***** train metrics ***** + epoch = 1.0 + total_flos = 0GF + train_loss = 1.0958 + train_runtime = 0:28:56.95 + train_samples = 43598 + train_samples_per_second = 25.1 + train_steps_per_second = 0.392 +2026-04-29 16:41:55 - INFO - __main__ - *** Training complete *** +2026-04-29 16:41:55 - INFO - __main__ - *** Save model *** +[INFO|configuration_utils.py:419] 2026-04-29 16:42:28,395 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/config.json +[INFO|configuration_utils.py:911] 2026-04-29 16:42:28,399 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/generation_config.json +[INFO|modeling_utils.py:3580] 2026-04-29 16:43:41,594 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 7 checkpoint shards. You can find where each parameters has been saved in the index located at /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/model.safetensors.index.json. +[INFO|tokenization_utils_base.py:2510] 2026-04-29 16:43:41,599 >> tokenizer config file saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/tokenizer_config.json +[INFO|tokenization_utils_base.py:2519] 2026-04-29 16:43:41,601 >> Special tokens file saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/special_tokens_map.json +2026-04-29 16:43:41 - INFO - __main__ - Saved HF-compatible model artifacts to /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449 +[INFO|modelcard.py:450] 2026-04-29 16:43:43,275 >> Dropping the following result as it does not have all the necessary fields: +{'dataset': {'name': 'Anthropic/hh-rlhf', 'type': 'Anthropic/hh-rlhf'}} +[INFO|configuration_utils.py:419] 2026-04-29 16:43:43,282 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/config.json +2026-04-29 16:43:43 - INFO - __main__ - Skipping margin dataset upload because push_margin_dataset is false. +2026-04-29 16:43:43 - INFO - __main__ - *** Training complete! *** +wandb: - 0.011 MB of 0.011 MB uploaded wandb: \ 0.011 MB of 0.011 MB uploaded wandb: | 0.011 MB of 0.635 MB uploaded wandb: / 0.011 MB of 0.635 MB uploaded wandb: - 0.635 MB of 0.635 MB uploaded wandb: \ 0.635 MB of 0.635 MB uploaded wandb: +wandb: Run history: +wandb: train/KL/chosen_KL_mean ████████▇▇▇▇▇▆▆▆▆▅▅▅▅▆▅▅▄▅▄▁▃▃▃▃▃▁▁▂▁▁▁▁ +wandb: train/KL/mean ████████▇▇▇▇▇▆▆▆▆▅▆▆▅▅▅▅▄▅▄▂▃▃▃▃▂▂▂▃▂▁▁▁ +wandb: train/KL/rejected_KL_mean ████████▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▄▅▄▂▃▃▄▃▂▃▃▃▂▁▁▂ +wandb: train/KL/std ▁▁▁▁▁▁▁▂▂▂▂▂▂▃▂▃▃▃▃▃▄▃▃▄▄▄▅▇▆▆▅▆▇▆▆▆▆▇██ +wandb: train/epoch ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███ +wandb: train/fcm_dpo/beta ███▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +wandb: train/fcm_dpo/delta ▆▆▅▂▁▅▄▆▆▅▅▆▆█▅▆▅▆▆▆█▄▇▄▄▅▅▅▇▅▇▄▃█▇▇▅▄▃▅ +wandb: train/fcm_dpo/margin ▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▂▃▃▃▄▃▅▄▄▅▅▅▅▄▆▇▄▅▅▆▇█▇ +wandb: train/fcm_dpo/q_t █▇▃▁▁▄▃▄▄▃▃▄▄▅▃▃▃▅▄▄▅▂▄▂▃▃▃▃▄▃▄▃▂▅▄▄▃▃▂▃ +wandb: train/global_step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███ +wandb: train/grad_norm ██▅▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▂▂▁▁▁▁▂▁▂▂▂▁▂▁▃▂ +wandb: train/learning_rate ▂▃▅▇██████▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁ +wandb: train/logits/chosen ▆▆▆▅▅▅▅▆▇▇██▇▇▇▇█▇█▇▆▆▆▆▆▆▅▂▃▃▃▃▂▁▂▁▁▂▂▁ +wandb: train/logits/rejected ▆▆▆▅▅▅▅▆▇▇██▇█▇▇▇▇█▇▆▆▆▆▆▆▄▂▃▃▃▃▂▁▂▁▁▂▂▁ +wandb: train/logps/chosen ███████▇▇▇▇▇▆▆▆▆▆▅▅▅▅▆▅▅▄▅▄▁▃▃▃▃▃▁▂▂▁▁▁▁ +wandb: train/logps/ref_chosen ▅█▆▆▄▃▆▆▅▄▇▃▄▃█▅▅▄▇▅▃█▇▅▅▃▆▁▇▅▃▇▅▆▇▆▃▆▆▂ +wandb: train/logps/ref_rejected ▇▄█▅▅▃▆█▇▄▅▃▅▆▅▅▄▅█▅█▄▆▃▅▄▃▁▆▃▄▆▃▄█▅▄▂▂▂ +wandb: train/logps/rejected ████████▇▇▇▇▇▇▆▆▆▆▆▆▆▅▅▅▄▅▄▂▃▃▄▃▂▃▃▃▂▁▁▂ +wandb: train/loss █▇▃▂▁▄▃▄▄▄▃▄▄▅▃▃▃▅▃▃▅▂▄▂▂▃▂▄▄▃▄▂▂▅▅▃▃▃▂▃ +wandb: train/margin_dpo/margin_mean ▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▂▃▃▃▄▃▅▄▄▅▅▅▅▄▆▇▄▅▅▆▇█▇ +wandb: train/margin_dpo/margin_std ▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▃▃▄▄▄▄▇▅▆▅▇▇▆█▅▆█▇█ +wandb: +wandb: Run summary: +wandb: total_flos 0.0 +wandb: train/KL/chosen_KL_mean -707.948 +wandb: train/KL/mean -907.29553 +wandb: train/KL/rejected_KL_mean -1106.64307 +wandb: train/KL/std 573.42926 +wandb: train/epoch 1.0 +wandb: train/fcm_dpo/beta 0.00089 +wandb: train/fcm_dpo/delta 0.04472 +wandb: train/fcm_dpo/margin 398.6951 +wandb: train/fcm_dpo/q_t 0.41937 +wandb: train/global_step 681 +wandb: train/grad_norm 42.85452 +wandb: train/learning_rate 0.0 +wandb: train/logits/chosen -1.0845 +wandb: train/logits/rejected -1.10386 +wandb: train/logps/chosen -768.90527 +wandb: train/logps/ref_chosen -60.95728 +wandb: train/logps/ref_rejected -88.55798 +wandb: train/logps/rejected -1195.20093 +wandb: train/loss 1.1404 +wandb: train/margin_dpo/margin_mean 398.6951 +wandb: train/margin_dpo/margin_std 622.65088 +wandb: train_loss 1.09584 +wandb: train_runtime 1736.9553 +wandb: train_samples_per_second 25.1 +wandb: train_steps_per_second 0.392 +wandb: +wandb: 🚀 View run llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449 at: https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep/runs/5ilq5gld +wandb: ⭐️ View project at: https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep +wandb: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s) +wandb: Find logs at: ./wandb/wandb/run-20260429_161214-5ilq5gld/logs +wandb: WARNING The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require("core")`! See https://wandb.me/wandb-core for more information. diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..51d2a7a --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 0.0, + "train_loss": 1.095842420442164, + "train_runtime": 1736.9553, + "train_samples": 43598, + "train_samples_per_second": 25.1, + "train_steps_per_second": 0.392 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..e3d7e3e --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,15706 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 200, + "global_step": 681, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "KL/chosen_KL_mean": 0.00527191162109375, + "KL/mean": 0.016706019639968872, + "KL/rejected_KL_mean": 0.028141021728515625, + "KL/std": 0.272699236869812, + "epoch": 0.0014684287812041115, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.02287006378173828, + "fcm_dpo/q_t": 0.501685619354248, + "grad_norm": 251.27125549316406, + "learning_rate": 0.0, + "logits/chosen": -0.4974287748336792, + "logits/rejected": -0.43299180269241333, + "logps/chosen": -50.1435661315918, + "logps/ref_chosen": -50.14883804321289, + "logps/ref_rejected": -74.1280517578125, + "logps/rejected": -74.09991455078125, + "loss": 1.3971, + "margin_dpo/margin_mean": -0.02287048101425171, + "margin_dpo/margin_std": 0.41920793056488037, + "step": 1 + }, + { + "KL/chosen_KL_mean": -0.03498649597167969, + "KL/mean": -0.00212840735912323, + "KL/rejected_KL_mean": 0.030735015869140625, + "KL/std": 0.24797174334526062, + "epoch": 0.002936857562408223, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.06572261452674866, + "fcm_dpo/q_t": 0.5049160718917847, + "grad_norm": 217.6841278076172, + "learning_rate": 7.246376811594203e-09, + "logits/chosen": -0.49536412954330444, + "logits/rejected": -0.4594460427761078, + "logps/chosen": -52.65568923950195, + "logps/ref_chosen": -52.620704650878906, + "logps/ref_rejected": -75.30413818359375, + "logps/rejected": -75.27340698242188, + "loss": 1.4089, + "margin_dpo/margin_mean": -0.06572240591049194, + "margin_dpo/margin_std": 0.35048407316207886, + "step": 2 + }, + { + "KL/chosen_KL_mean": 0.019153594970703125, + "KL/mean": -0.0025722086429595947, + "KL/rejected_KL_mean": -0.02429962158203125, + "KL/std": 0.2354850471019745, + "epoch": 0.004405286343612335, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.04345357418060303, + "fcm_dpo/q_t": 0.49674931168556213, + "grad_norm": 210.45652770996094, + "learning_rate": 1.4492753623188406e-08, + "logits/chosen": -0.4816104471683502, + "logits/rejected": -0.4421927034854889, + "logps/chosen": -60.962440490722656, + "logps/ref_chosen": -60.981597900390625, + "logps/ref_rejected": -68.67259216308594, + "logps/rejected": -68.6968994140625, + "loss": 1.3761, + "margin_dpo/margin_mean": 0.04345354437828064, + "margin_dpo/margin_std": 0.35039910674095154, + "step": 3 + }, + { + "KL/chosen_KL_mean": -0.001567840576171875, + "KL/mean": 0.023254141211509705, + "KL/rejected_KL_mean": 0.04807281494140625, + "KL/std": 0.26486122608184814, + "epoch": 0.005873715124816446, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.04963979125022888, + "fcm_dpo/q_t": 0.5037118196487427, + "grad_norm": 217.65200805664062, + "learning_rate": 2.1739130434782606e-08, + "logits/chosen": -0.4678453207015991, + "logits/rejected": -0.4402541518211365, + "logps/chosen": -56.76927947998047, + "logps/ref_chosen": -56.7677116394043, + "logps/ref_rejected": -86.64710998535156, + "logps/rejected": -86.59903717041016, + "loss": 1.4047, + "margin_dpo/margin_mean": -0.04964029788970947, + "margin_dpo/margin_std": 0.39235860109329224, + "step": 4 + }, + { + "KL/chosen_KL_mean": 0.030719757080078125, + "KL/mean": 0.031741127371788025, + "KL/rejected_KL_mean": 0.032764434814453125, + "KL/std": 0.2725304961204529, + "epoch": 0.007342143906020558, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.00204351544380188, + "fcm_dpo/q_t": 0.5001676678657532, + "grad_norm": 270.9964294433594, + "learning_rate": 2.898550724637681e-08, + "logits/chosen": -0.4972953498363495, + "logits/rejected": -0.4523712396621704, + "logps/chosen": -53.82865524291992, + "logps/ref_chosen": -53.859375, + "logps/ref_rejected": -84.14918518066406, + "logps/rejected": -84.11642456054688, + "loss": 1.3902, + "margin_dpo/margin_mean": -0.0020435750484466553, + "margin_dpo/margin_std": 0.37501761317253113, + "step": 5 + }, + { + "KL/chosen_KL_mean": -0.0196380615234375, + "KL/mean": -0.020306527614593506, + "KL/rejected_KL_mean": -0.02097320556640625, + "KL/std": 0.27848026156425476, + "epoch": 0.00881057268722467, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.001337289810180664, + "fcm_dpo/q_t": 0.49989837408065796, + "grad_norm": 276.3399963378906, + "learning_rate": 3.6231884057971014e-08, + "logits/chosen": -0.5145661234855652, + "logits/rejected": -0.47312256693840027, + "logps/chosen": -63.027122497558594, + "logps/ref_chosen": -63.007484436035156, + "logps/ref_rejected": -92.64534759521484, + "logps/rejected": -92.66632080078125, + "loss": 1.3894, + "margin_dpo/margin_mean": 0.0013370811939239502, + "margin_dpo/margin_std": 0.3880041539669037, + "step": 6 + }, + { + "KL/chosen_KL_mean": 0.03285026550292969, + "KL/mean": 0.02877350151538849, + "KL/rejected_KL_mean": 0.02469635009765625, + "KL/std": 0.30477985739707947, + "epoch": 0.010279001468428781, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.008149892091751099, + "fcm_dpo/q_t": 0.49938228726387024, + "grad_norm": 245.37692260742188, + "learning_rate": 4.347826086956521e-08, + "logits/chosen": -0.5077540874481201, + "logits/rejected": -0.47386452555656433, + "logps/chosen": -57.74197006225586, + "logps/ref_chosen": -57.774818420410156, + "logps/ref_rejected": -103.92059326171875, + "logps/rejected": -103.8958969116211, + "loss": 1.3873, + "margin_dpo/margin_mean": 0.008150070905685425, + "margin_dpo/margin_std": 0.38711145520210266, + "step": 7 + }, + { + "KL/chosen_KL_mean": -0.0022296905517578125, + "KL/mean": 0.031146153807640076, + "KL/rejected_KL_mean": 0.06452560424804688, + "KL/std": 0.33025887608528137, + "epoch": 0.011747430249632892, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.06675639748573303, + "fcm_dpo/q_t": 0.5049271583557129, + "grad_norm": 240.70013427734375, + "learning_rate": 5.0724637681159424e-08, + "logits/chosen": -0.5015411376953125, + "logits/rejected": -0.47501832246780396, + "logps/chosen": -58.718265533447266, + "logps/ref_chosen": -58.716033935546875, + "logps/ref_rejected": -79.3114242553711, + "logps/rejected": -79.24690246582031, + "loss": 1.4116, + "margin_dpo/margin_mean": -0.06675609946250916, + "margin_dpo/margin_std": 0.47787904739379883, + "step": 8 + }, + { + "KL/chosen_KL_mean": 0.02227783203125, + "KL/mean": -0.00015251338481903076, + "KL/rejected_KL_mean": -0.0225830078125, + "KL/std": 0.30635231733322144, + "epoch": 0.013215859030837005, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.04486778378486633, + "fcm_dpo/q_t": 0.49667075276374817, + "grad_norm": 254.40870666503906, + "learning_rate": 5.797101449275362e-08, + "logits/chosen": -0.4882626235485077, + "logits/rejected": -0.4411010444164276, + "logps/chosen": -69.84456634521484, + "logps/ref_chosen": -69.8668441772461, + "logps/ref_rejected": -99.6026611328125, + "logps/rejected": -99.625244140625, + "loss": 1.3778, + "margin_dpo/margin_mean": 0.04486680030822754, + "margin_dpo/margin_std": 0.4566071927547455, + "step": 9 + }, + { + "KL/chosen_KL_mean": 0.022472381591796875, + "KL/mean": 0.017022237181663513, + "KL/rejected_KL_mean": 0.01157379150390625, + "KL/std": 0.24305114150047302, + "epoch": 0.014684287812041116, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.010898560285568237, + "fcm_dpo/q_t": 0.4991758465766907, + "grad_norm": 212.15330505371094, + "learning_rate": 6.521739130434782e-08, + "logits/chosen": -0.4911458492279053, + "logits/rejected": -0.4477323889732361, + "logps/chosen": -48.33521270751953, + "logps/ref_chosen": -48.35768508911133, + "logps/ref_rejected": -80.37206268310547, + "logps/rejected": -80.36048889160156, + "loss": 1.3858, + "margin_dpo/margin_mean": 0.010898619890213013, + "margin_dpo/margin_std": 0.34846025705337524, + "step": 10 + }, + { + "KL/chosen_KL_mean": -0.0001010894775390625, + "KL/mean": 0.0009690821170806885, + "KL/rejected_KL_mean": 0.00203704833984375, + "KL/std": 0.291149377822876, + "epoch": 0.016152716593245228, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.0021440982818603516, + "fcm_dpo/q_t": 0.5001416802406311, + "grad_norm": 207.24131774902344, + "learning_rate": 7.246376811594203e-08, + "logits/chosen": -0.4701375365257263, + "logits/rejected": -0.4457797110080719, + "logps/chosen": -53.0169563293457, + "logps/ref_chosen": -53.01685333251953, + "logps/ref_rejected": -87.78038024902344, + "logps/rejected": -87.77833557128906, + "loss": 1.3909, + "margin_dpo/margin_mean": -0.0021438300609588623, + "margin_dpo/margin_std": 0.4191063344478607, + "step": 11 + }, + { + "KL/chosen_KL_mean": -0.010473251342773438, + "KL/mean": -0.01090405136346817, + "KL/rejected_KL_mean": -0.011325836181640625, + "KL/std": 0.27011072635650635, + "epoch": 0.01762114537444934, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.0008537918329238892, + "fcm_dpo/q_t": 0.4999362528324127, + "grad_norm": 271.4062194824219, + "learning_rate": 7.971014492753623e-08, + "logits/chosen": -0.538188099861145, + "logits/rejected": -0.5020288228988647, + "logps/chosen": -61.81591033935547, + "logps/ref_chosen": -61.80543518066406, + "logps/ref_rejected": -104.8582763671875, + "logps/rejected": -104.86959838867188, + "loss": 1.3887, + "margin_dpo/margin_mean": 0.0008526891469955444, + "margin_dpo/margin_std": 0.3452816605567932, + "step": 12 + }, + { + "KL/chosen_KL_mean": 0.04018592834472656, + "KL/mean": 0.009547561407089233, + "KL/rejected_KL_mean": -0.021087646484375, + "KL/std": 0.2959768772125244, + "epoch": 0.01908957415565345, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.06127917766571045, + "fcm_dpo/q_t": 0.495451420545578, + "grad_norm": 236.69508361816406, + "learning_rate": 8.695652173913042e-08, + "logits/chosen": -0.4695357084274292, + "logits/rejected": -0.44066792726516724, + "logps/chosen": -64.22016906738281, + "logps/ref_chosen": -64.2603530883789, + "logps/ref_rejected": -87.20307922363281, + "logps/rejected": -87.22416687011719, + "loss": 1.3727, + "margin_dpo/margin_mean": 0.061279088258743286, + "margin_dpo/margin_std": 0.44392725825309753, + "step": 13 + }, + { + "KL/chosen_KL_mean": -0.011384963989257812, + "KL/mean": -0.02230377495288849, + "KL/rejected_KL_mean": -0.03322601318359375, + "KL/std": 0.2484772801399231, + "epoch": 0.020558002936857563, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.021841615438461304, + "fcm_dpo/q_t": 0.49836230278015137, + "grad_norm": 255.34683227539062, + "learning_rate": 9.420289855072464e-08, + "logits/chosen": -0.46936067938804626, + "logits/rejected": -0.4296714961528778, + "logps/chosen": -58.12159729003906, + "logps/ref_chosen": -58.11021041870117, + "logps/ref_rejected": -104.04708099365234, + "logps/rejected": -104.08030700683594, + "loss": 1.3824, + "margin_dpo/margin_mean": 0.021842211484909058, + "margin_dpo/margin_std": 0.34157758951187134, + "step": 14 + }, + { + "KL/chosen_KL_mean": -0.05281257629394531, + "KL/mean": -0.04400016367435455, + "KL/rejected_KL_mean": -0.035190582275390625, + "KL/std": 0.252704918384552, + "epoch": 0.022026431718061675, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.01762327551841736, + "fcm_dpo/q_t": 0.501282811164856, + "grad_norm": 193.75828552246094, + "learning_rate": 1.0144927536231885e-07, + "logits/chosen": -0.5062054991722107, + "logits/rejected": -0.4881584942340851, + "logps/chosen": -57.01972198486328, + "logps/ref_chosen": -56.96691131591797, + "logps/ref_rejected": -80.80863952636719, + "logps/rejected": -80.84383392333984, + "loss": 1.3952, + "margin_dpo/margin_mean": -0.017623186111450195, + "margin_dpo/margin_std": 0.3986828327178955, + "step": 15 + }, + { + "KL/chosen_KL_mean": -0.016290664672851562, + "KL/mean": -0.02468542754650116, + "KL/rejected_KL_mean": -0.033077239990234375, + "KL/std": 0.264546275138855, + "epoch": 0.023494860499265784, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.016786009073257446, + "fcm_dpo/q_t": 0.49875974655151367, + "grad_norm": 251.1475830078125, + "learning_rate": 1.0869565217391303e-07, + "logits/chosen": -0.5262372493743896, + "logits/rejected": -0.48537588119506836, + "logps/chosen": -61.75618362426758, + "logps/ref_chosen": -61.739891052246094, + "logps/ref_rejected": -84.36947631835938, + "logps/rejected": -84.40254974365234, + "loss": 1.3846, + "margin_dpo/margin_mean": 0.016786575317382812, + "margin_dpo/margin_std": 0.38438552618026733, + "step": 16 + }, + { + "KL/chosen_KL_mean": 0.045101165771484375, + "KL/mean": -0.004545360803604126, + "KL/rejected_KL_mean": -0.054195404052734375, + "KL/std": 0.26345258951187134, + "epoch": 0.024963289280469897, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.09929555654525757, + "fcm_dpo/q_t": 0.492563396692276, + "grad_norm": 233.72305297851562, + "learning_rate": 1.1594202898550725e-07, + "logits/chosen": -0.4858800768852234, + "logits/rejected": -0.44683146476745605, + "logps/chosen": -67.66523742675781, + "logps/ref_chosen": -67.71033477783203, + "logps/ref_rejected": -85.37865447998047, + "logps/rejected": -85.43284606933594, + "loss": 1.3596, + "margin_dpo/margin_mean": 0.09929636120796204, + "margin_dpo/margin_std": 0.3553627133369446, + "step": 17 + }, + { + "KL/chosen_KL_mean": 0.018472671508789062, + "KL/mean": -0.009962007403373718, + "KL/rejected_KL_mean": -0.03839874267578125, + "KL/std": 0.21256747841835022, + "epoch": 0.02643171806167401, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.05686947703361511, + "fcm_dpo/q_t": 0.4957374036312103, + "grad_norm": 245.1805877685547, + "learning_rate": 1.2318840579710146e-07, + "logits/chosen": -0.5064246654510498, + "logits/rejected": -0.45240044593811035, + "logps/chosen": -47.72101593017578, + "logps/ref_chosen": -47.7394905090332, + "logps/ref_rejected": -75.4722900390625, + "logps/rejected": -75.51068115234375, + "loss": 1.3715, + "margin_dpo/margin_mean": 0.05686900019645691, + "margin_dpo/margin_std": 0.30619317293167114, + "step": 18 + }, + { + "KL/chosen_KL_mean": 0.057403564453125, + "KL/mean": 0.0009317547082901001, + "KL/rejected_KL_mean": -0.05553436279296875, + "KL/std": 0.27681607007980347, + "epoch": 0.027900146842878122, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.11293384432792664, + "fcm_dpo/q_t": 0.49154412746429443, + "grad_norm": 221.10816955566406, + "learning_rate": 1.3043478260869563e-07, + "logits/chosen": -0.5035191774368286, + "logits/rejected": -0.45468592643737793, + "logps/chosen": -70.14795684814453, + "logps/ref_chosen": -70.20536041259766, + "logps/ref_rejected": -89.7575912475586, + "logps/rejected": -89.81312561035156, + "loss": 1.3562, + "margin_dpo/margin_mean": 0.11293420195579529, + "margin_dpo/margin_std": 0.39282259345054626, + "step": 19 + }, + { + "KL/chosen_KL_mean": 0.002490997314453125, + "KL/mean": -0.047424912452697754, + "KL/rejected_KL_mean": -0.0973358154296875, + "KL/std": 0.25252386927604675, + "epoch": 0.02936857562408223, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.09983032941818237, + "fcm_dpo/q_t": 0.4925253391265869, + "grad_norm": 218.9619903564453, + "learning_rate": 1.3768115942028986e-07, + "logits/chosen": -0.5584119558334351, + "logits/rejected": -0.5027008652687073, + "logps/chosen": -50.800750732421875, + "logps/ref_chosen": -50.80324172973633, + "logps/ref_rejected": -78.82334899902344, + "logps/rejected": -78.92068481445312, + "loss": 1.359, + "margin_dpo/margin_mean": 0.0998302698135376, + "margin_dpo/margin_std": 0.32752203941345215, + "step": 20 + }, + { + "KL/chosen_KL_mean": 0.0040454864501953125, + "KL/mean": -0.04826641082763672, + "KL/rejected_KL_mean": -0.10057830810546875, + "KL/std": 0.28005871176719666, + "epoch": 0.030837004405286344, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.1046256422996521, + "fcm_dpo/q_t": 0.49217915534973145, + "grad_norm": 227.61415100097656, + "learning_rate": 1.4492753623188405e-07, + "logits/chosen": -0.4951311945915222, + "logits/rejected": -0.4713231921195984, + "logps/chosen": -50.0589714050293, + "logps/ref_chosen": -50.063018798828125, + "logps/ref_rejected": -77.86878967285156, + "logps/rejected": -77.96937561035156, + "loss": 1.3583, + "margin_dpo/margin_mean": 0.10462629795074463, + "margin_dpo/margin_std": 0.3697164058685303, + "step": 21 + }, + { + "KL/chosen_KL_mean": 0.0416259765625, + "KL/mean": -0.07057403028011322, + "KL/rejected_KL_mean": -0.18277359008789062, + "KL/std": 0.27579018473625183, + "epoch": 0.032305433186490456, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.22439709305763245, + "fcm_dpo/q_t": 0.48323309421539307, + "grad_norm": 243.1344451904297, + "learning_rate": 1.5217391304347825e-07, + "logits/chosen": -0.46972396969795227, + "logits/rejected": -0.4252376854419708, + "logps/chosen": -59.01601028442383, + "logps/ref_chosen": -59.05763626098633, + "logps/ref_rejected": -97.50466918945312, + "logps/rejected": -97.68744659423828, + "loss": 1.3233, + "margin_dpo/margin_mean": 0.22439703345298767, + "margin_dpo/margin_std": 0.3758489489555359, + "step": 22 + }, + { + "KL/chosen_KL_mean": -0.0016460418701171875, + "KL/mean": -0.07504256069660187, + "KL/rejected_KL_mean": -0.14843368530273438, + "KL/std": 0.2956269383430481, + "epoch": 0.033773861967694566, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.14679017663002014, + "fcm_dpo/q_t": 0.48901820182800293, + "grad_norm": 235.14389038085938, + "learning_rate": 1.5942028985507245e-07, + "logits/chosen": -0.4931301474571228, + "logits/rejected": -0.470862478017807, + "logps/chosen": -60.079341888427734, + "logps/ref_chosen": -60.07769775390625, + "logps/ref_rejected": -81.13955688476562, + "logps/rejected": -81.2879867553711, + "loss": 1.3475, + "margin_dpo/margin_mean": 0.1467902660369873, + "margin_dpo/margin_std": 0.4319424331188202, + "step": 23 + }, + { + "KL/chosen_KL_mean": 0.04685783386230469, + "KL/mean": -0.06474106758832932, + "KL/rejected_KL_mean": -0.1763458251953125, + "KL/std": 0.2938792407512665, + "epoch": 0.03524229074889868, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.2231999784708023, + "fcm_dpo/q_t": 0.4833376705646515, + "grad_norm": 242.87490844726562, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.5050971508026123, + "logits/rejected": -0.48868709802627563, + "logps/chosen": -44.244178771972656, + "logps/ref_chosen": -44.29103469848633, + "logps/ref_rejected": -99.12521362304688, + "logps/rejected": -99.30155944824219, + "loss": 1.3241, + "margin_dpo/margin_mean": 0.22320020198822021, + "margin_dpo/margin_std": 0.3832412362098694, + "step": 24 + }, + { + "KL/chosen_KL_mean": 0.03863716125488281, + "KL/mean": -0.07194776833057404, + "KL/rejected_KL_mean": -0.18252944946289062, + "KL/std": 0.36078929901123047, + "epoch": 0.03671071953010279, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.22117015719413757, + "fcm_dpo/q_t": 0.48354804515838623, + "grad_norm": 215.2100830078125, + "learning_rate": 1.7391304347826085e-07, + "logits/chosen": -0.5046179294586182, + "logits/rejected": -0.47490301728248596, + "logps/chosen": -52.4984130859375, + "logps/ref_chosen": -52.537052154541016, + "logps/ref_rejected": -89.34219360351562, + "logps/rejected": -89.52471923828125, + "loss": 1.3255, + "margin_dpo/margin_mean": 0.22116953134536743, + "margin_dpo/margin_std": 0.4389370083808899, + "step": 25 + }, + { + "KL/chosen_KL_mean": 0.09479713439941406, + "KL/mean": -0.10598999261856079, + "KL/rejected_KL_mean": -0.3067779541015625, + "KL/std": 0.4495195746421814, + "epoch": 0.0381791483113069, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.40157991647720337, + "fcm_dpo/q_t": 0.470198392868042, + "grad_norm": 240.65769958496094, + "learning_rate": 1.8115942028985507e-07, + "logits/chosen": -0.5313920974731445, + "logits/rejected": -0.49980974197387695, + "logps/chosen": -53.82801055908203, + "logps/ref_chosen": -53.92280578613281, + "logps/ref_rejected": -103.35971069335938, + "logps/rejected": -103.66648864746094, + "loss": 1.277, + "margin_dpo/margin_mean": 0.40158015489578247, + "margin_dpo/margin_std": 0.5663931965827942, + "step": 26 + }, + { + "KL/chosen_KL_mean": 0.09834671020507812, + "KL/mean": -0.15826506912708282, + "KL/rejected_KL_mean": -0.41487884521484375, + "KL/std": 0.47776395082473755, + "epoch": 0.039647577092511016, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.5132265090942383, + "fcm_dpo/q_t": 0.4619525671005249, + "grad_norm": 256.8159484863281, + "learning_rate": 1.8840579710144927e-07, + "logits/chosen": -0.5238237977027893, + "logits/rejected": -0.4873714745044708, + "logps/chosen": -42.8001823425293, + "logps/ref_chosen": -42.898529052734375, + "logps/ref_rejected": -98.72419738769531, + "logps/rejected": -99.13908386230469, + "loss": 1.2455, + "margin_dpo/margin_mean": 0.5132263898849487, + "margin_dpo/margin_std": 0.5646921992301941, + "step": 27 + }, + { + "KL/chosen_KL_mean": 0.015338897705078125, + "KL/mean": -0.14923109114170074, + "KL/rejected_KL_mean": -0.3137969970703125, + "KL/std": 0.4466787576675415, + "epoch": 0.041116005873715125, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.3291375935077667, + "fcm_dpo/q_t": 0.4754894971847534, + "grad_norm": 209.7417755126953, + "learning_rate": 1.9565217391304347e-07, + "logits/chosen": -0.5104295611381531, + "logits/rejected": -0.4556117355823517, + "logps/chosen": -60.541160583496094, + "logps/ref_chosen": -60.55650329589844, + "logps/ref_rejected": -91.40111541748047, + "logps/rejected": -91.71491241455078, + "loss": 1.299, + "margin_dpo/margin_mean": 0.32913774251937866, + "margin_dpo/margin_std": 0.6227332353591919, + "step": 28 + }, + { + "KL/chosen_KL_mean": 0.1466350555419922, + "KL/mean": -0.15072329342365265, + "KL/rejected_KL_mean": -0.44808197021484375, + "KL/std": 0.5040621161460876, + "epoch": 0.042584434654919234, + "fcm_dpo/beta": 0.30000001192092896, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.5947138071060181, + "fcm_dpo/q_t": 0.4558557868003845, + "grad_norm": 247.98696899414062, + "learning_rate": 2.028985507246377e-07, + "logits/chosen": -0.5724257826805115, + "logits/rejected": -0.5275709629058838, + "logps/chosen": -57.66114807128906, + "logps/ref_chosen": -57.80778503417969, + "logps/ref_rejected": -97.39434814453125, + "logps/rejected": -97.84243774414062, + "loss": 1.2226, + "margin_dpo/margin_mean": 0.5947141647338867, + "margin_dpo/margin_std": 0.5515247583389282, + "step": 29 + }, + { + "KL/chosen_KL_mean": 0.13365936279296875, + "KL/mean": -0.17379064857959747, + "KL/rejected_KL_mean": -0.4812431335449219, + "KL/std": 0.5387458801269531, + "epoch": 0.04405286343612335, + "fcm_dpo/beta": 0.30578601360321045, + "fcm_dpo/delta": 0.09551539272069931, + "fcm_dpo/margin": 0.6149008274078369, + "fcm_dpo/q_t": 0.4539734125137329, + "grad_norm": 242.61647033691406, + "learning_rate": 2.1014492753623187e-07, + "logits/chosen": -0.5310481190681458, + "logits/rejected": -0.5018342137336731, + "logps/chosen": -52.44371032714844, + "logps/ref_chosen": -52.577369689941406, + "logps/ref_rejected": -98.48920440673828, + "logps/rejected": -98.97044372558594, + "loss": 1.2141, + "margin_dpo/margin_mean": 0.6149011850357056, + "margin_dpo/margin_std": 0.5445628762245178, + "step": 30 + }, + { + "KL/chosen_KL_mean": 0.1033782958984375, + "KL/mean": -0.14287717640399933, + "KL/rejected_KL_mean": -0.3891334533691406, + "KL/std": 0.5448415279388428, + "epoch": 0.04552129221732746, + "fcm_dpo/beta": 0.30578601360321045, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.4925115406513214, + "fcm_dpo/q_t": 0.46302998065948486, + "grad_norm": 189.3855438232422, + "learning_rate": 2.1739130434782607e-07, + "logits/chosen": -0.4839329719543457, + "logits/rejected": -0.43673014640808105, + "logps/chosen": -63.70354461669922, + "logps/ref_chosen": -63.806922912597656, + "logps/ref_rejected": -72.89400482177734, + "logps/rejected": -73.28314208984375, + "loss": 1.2527, + "margin_dpo/margin_mean": 0.4925113320350647, + "margin_dpo/margin_std": 0.6987070441246033, + "step": 31 + }, + { + "KL/chosen_KL_mean": 0.14173126220703125, + "KL/mean": -0.19788116216659546, + "KL/rejected_KL_mean": -0.5374908447265625, + "KL/std": 0.67319655418396, + "epoch": 0.04698972099853157, + "fcm_dpo/beta": 0.30862361192703247, + "fcm_dpo/delta": 0.0919463187456131, + "fcm_dpo/margin": 0.6792210936546326, + "fcm_dpo/q_t": 0.4495420455932617, + "grad_norm": 225.79928588867188, + "learning_rate": 2.2463768115942027e-07, + "logits/chosen": -0.5124537944793701, + "logits/rejected": -0.4711976647377014, + "logps/chosen": -62.59779357910156, + "logps/ref_chosen": -62.739524841308594, + "logps/ref_rejected": -89.3175048828125, + "logps/rejected": -89.85499572753906, + "loss": 1.2053, + "margin_dpo/margin_mean": 0.6792212724685669, + "margin_dpo/margin_std": 0.8771206140518188, + "step": 32 + }, + { + "KL/chosen_KL_mean": 0.15187644958496094, + "KL/mean": -0.1501408815383911, + "KL/rejected_KL_mean": -0.45215606689453125, + "KL/std": 0.5369387865066528, + "epoch": 0.048458149779735685, + "fcm_dpo/beta": 0.31455251574516296, + "fcm_dpo/delta": 0.09827958792448044, + "fcm_dpo/margin": 0.6040317416191101, + "fcm_dpo/q_t": 0.45361170172691345, + "grad_norm": 205.43460083007812, + "learning_rate": 2.318840579710145e-07, + "logits/chosen": -0.5107744932174683, + "logits/rejected": -0.4852328896522522, + "logps/chosen": -53.10909652709961, + "logps/ref_chosen": -53.26097106933594, + "logps/ref_rejected": -87.8851318359375, + "logps/rejected": -88.33729553222656, + "loss": 1.2149, + "margin_dpo/margin_mean": 0.6040312051773071, + "margin_dpo/margin_std": 0.6277109384536743, + "step": 33 + }, + { + "KL/chosen_KL_mean": 0.06952667236328125, + "KL/mean": -0.3327553868293762, + "KL/rejected_KL_mean": -0.7350387573242188, + "KL/std": 0.7567273378372192, + "epoch": 0.049926578560939794, + "fcm_dpo/beta": 0.3245845437049866, + "fcm_dpo/delta": 0.1425287425518036, + "fcm_dpo/margin": 0.8045636415481567, + "fcm_dpo/q_t": 0.43769991397857666, + "grad_norm": 211.18833923339844, + "learning_rate": 2.391304347826087e-07, + "logits/chosen": -0.48843878507614136, + "logits/rejected": -0.471035897731781, + "logps/chosen": -50.747802734375, + "logps/ref_chosen": -50.81732940673828, + "logps/ref_rejected": -101.92184448242188, + "logps/rejected": -102.6568832397461, + "loss": 1.1632, + "margin_dpo/margin_mean": 0.8045632839202881, + "margin_dpo/margin_std": 0.911353349685669, + "step": 34 + }, + { + "KL/chosen_KL_mean": 0.14473533630371094, + "KL/mean": -0.47121092677116394, + "KL/rejected_KL_mean": -1.087158203125, + "KL/std": 1.0332869291305542, + "epoch": 0.0513950073421439, + "fcm_dpo/beta": 0.32634085416793823, + "fcm_dpo/delta": -0.0021304162219166756, + "fcm_dpo/margin": 1.2318875789642334, + "fcm_dpo/q_t": 0.40531784296035767, + "grad_norm": 206.67367553710938, + "learning_rate": 2.463768115942029e-07, + "logits/chosen": -0.520686149597168, + "logits/rejected": -0.48392248153686523, + "logps/chosen": -50.87975311279297, + "logps/ref_chosen": -51.02449035644531, + "logps/ref_rejected": -106.82443237304688, + "logps/rejected": -107.91159057617188, + "loss": 1.0562, + "margin_dpo/margin_mean": 1.2318875789642334, + "margin_dpo/margin_std": 1.157043218612671, + "step": 35 + }, + { + "KL/chosen_KL_mean": 0.04113960266113281, + "KL/mean": -0.5248871445655823, + "KL/rejected_KL_mean": -1.090911865234375, + "KL/std": 1.0772857666015625, + "epoch": 0.05286343612334802, + "fcm_dpo/beta": 0.3298990726470947, + "fcm_dpo/delta": 0.026729058474302292, + "fcm_dpo/margin": 1.132055640220642, + "fcm_dpo/q_t": 0.41170650720596313, + "grad_norm": 183.64088439941406, + "learning_rate": 2.536231884057971e-07, + "logits/chosen": -0.5615625381469727, + "logits/rejected": -0.5254453420639038, + "logps/chosen": -51.95035171508789, + "logps/ref_chosen": -51.991493225097656, + "logps/ref_rejected": -86.0406265258789, + "logps/rejected": -87.13153839111328, + "loss": 1.0873, + "margin_dpo/margin_mean": 1.1320552825927734, + "margin_dpo/margin_std": 1.191450834274292, + "step": 36 + }, + { + "KL/chosen_KL_mean": -0.014146804809570312, + "KL/mean": -0.48922494053840637, + "KL/rejected_KL_mean": -0.9643096923828125, + "KL/std": 1.0300785303115845, + "epoch": 0.05433186490455213, + "fcm_dpo/beta": 0.3311406373977661, + "fcm_dpo/delta": 0.08806828409433365, + "fcm_dpo/margin": 0.9501617550849915, + "fcm_dpo/q_t": 0.4271436929702759, + "grad_norm": 166.49075317382812, + "learning_rate": 2.6086956521739126e-07, + "logits/chosen": -0.5190207958221436, + "logits/rejected": -0.4760361909866333, + "logps/chosen": -62.82125473022461, + "logps/ref_chosen": -62.807106018066406, + "logps/ref_rejected": -77.89507293701172, + "logps/rejected": -78.85938262939453, + "loss": 1.1429, + "margin_dpo/margin_mean": 0.9501620531082153, + "margin_dpo/margin_std": 1.3070077896118164, + "step": 37 + }, + { + "KL/chosen_KL_mean": 0.10162544250488281, + "KL/mean": -0.5765421390533447, + "KL/rejected_KL_mean": -1.2547111511230469, + "KL/std": 1.3713576793670654, + "epoch": 0.055800293685756244, + "fcm_dpo/beta": 0.332706481218338, + "fcm_dpo/delta": -0.053727779537439346, + "fcm_dpo/margin": 1.3563368320465088, + "fcm_dpo/q_t": 0.3991077244281769, + "grad_norm": 170.35545349121094, + "learning_rate": 2.681159420289855e-07, + "logits/chosen": -0.5162097215652466, + "logits/rejected": -0.483456015586853, + "logps/chosen": -48.28889465332031, + "logps/ref_chosen": -48.39051818847656, + "logps/ref_rejected": -97.91244506835938, + "logps/rejected": -99.16715240478516, + "loss": 1.0542, + "margin_dpo/margin_mean": 1.3563368320465088, + "margin_dpo/margin_std": 1.683530569076538, + "step": 38 + }, + { + "KL/chosen_KL_mean": 0.09050559997558594, + "KL/mean": -0.7401334047317505, + "KL/rejected_KL_mean": -1.5707664489746094, + "KL/std": 1.3007447719573975, + "epoch": 0.05726872246696035, + "fcm_dpo/beta": 0.3225635588169098, + "fcm_dpo/delta": -0.1443103402853012, + "fcm_dpo/margin": 1.6612706184387207, + "fcm_dpo/q_t": 0.37417465448379517, + "grad_norm": 174.84286499023438, + "learning_rate": 2.753623188405797e-07, + "logits/chosen": -0.5401022434234619, + "logits/rejected": -0.49907436966896057, + "logps/chosen": -50.65996551513672, + "logps/ref_chosen": -50.75047302246094, + "logps/ref_rejected": -78.56951141357422, + "logps/rejected": -80.14027404785156, + "loss": 0.9688, + "margin_dpo/margin_mean": 1.6612703800201416, + "margin_dpo/margin_std": 1.4079031944274902, + "step": 39 + }, + { + "KL/chosen_KL_mean": 0.1924877166748047, + "KL/mean": -0.5853748321533203, + "KL/rejected_KL_mean": -1.3632469177246094, + "KL/std": 1.4208192825317383, + "epoch": 0.05873715124816446, + "fcm_dpo/beta": 0.31791430711746216, + "fcm_dpo/delta": -0.09945414215326309, + "fcm_dpo/margin": 1.5557353496551514, + "fcm_dpo/q_t": 0.38887178897857666, + "grad_norm": 139.58270263671875, + "learning_rate": 2.8260869565217386e-07, + "logits/chosen": -0.5097917318344116, + "logits/rejected": -0.4792172312736511, + "logps/chosen": -57.792579650878906, + "logps/ref_chosen": -57.985069274902344, + "logps/ref_rejected": -74.3000717163086, + "logps/rejected": -75.66331481933594, + "loss": 1.0208, + "margin_dpo/margin_mean": 1.5557358264923096, + "margin_dpo/margin_std": 1.7217731475830078, + "step": 40 + }, + { + "KL/chosen_KL_mean": 0.018938064575195312, + "KL/mean": -0.8975176811218262, + "KL/rejected_KL_mean": -1.8139724731445312, + "KL/std": 1.8013949394226074, + "epoch": 0.06020558002936858, + "fcm_dpo/beta": 0.3076004981994629, + "fcm_dpo/delta": -0.17332524061203003, + "fcm_dpo/margin": 1.8329108953475952, + "fcm_dpo/q_t": 0.3731822073459625, + "grad_norm": 151.85443115234375, + "learning_rate": 2.898550724637681e-07, + "logits/chosen": -0.5335030555725098, + "logits/rejected": -0.49662622809410095, + "logps/chosen": -62.6768798828125, + "logps/ref_chosen": -62.69581604003906, + "logps/ref_rejected": -97.02352905273438, + "logps/rejected": -98.8375015258789, + "loss": 0.9812, + "margin_dpo/margin_mean": 1.8329112529754639, + "margin_dpo/margin_std": 1.9542649984359741, + "step": 41 + }, + { + "KL/chosen_KL_mean": 0.2197589874267578, + "KL/mean": -1.0017893314361572, + "KL/rejected_KL_mean": -2.2233352661132812, + "KL/std": 2.0296993255615234, + "epoch": 0.06167400881057269, + "fcm_dpo/beta": 0.2897103428840637, + "fcm_dpo/delta": -0.33123135566711426, + "fcm_dpo/margin": 2.4430980682373047, + "fcm_dpo/q_t": 0.34266549348831177, + "grad_norm": 148.23951721191406, + "learning_rate": 2.971014492753623e-07, + "logits/chosen": -0.549653172492981, + "logits/rejected": -0.5031782984733582, + "logps/chosen": -58.746665954589844, + "logps/ref_chosen": -58.966426849365234, + "logps/ref_rejected": -109.90837097167969, + "logps/rejected": -112.1317138671875, + "loss": 0.8866, + "margin_dpo/margin_mean": 2.4430971145629883, + "margin_dpo/margin_std": 2.242748737335205, + "step": 42 + }, + { + "KL/chosen_KL_mean": 0.5042438507080078, + "KL/mean": -0.717160165309906, + "KL/rejected_KL_mean": -1.938568115234375, + "KL/std": 1.816794991493225, + "epoch": 0.0631424375917768, + "fcm_dpo/beta": 0.2717617154121399, + "fcm_dpo/delta": -0.2843329906463623, + "fcm_dpo/margin": 2.442809581756592, + "fcm_dpo/q_t": 0.3463453948497772, + "grad_norm": 136.68927001953125, + "learning_rate": 3.043478260869565e-07, + "logits/chosen": -0.558872640132904, + "logits/rejected": -0.5347921848297119, + "logps/chosen": -53.65175247192383, + "logps/ref_chosen": -54.15599822998047, + "logps/ref_rejected": -96.48019409179688, + "logps/rejected": -98.41876220703125, + "loss": 0.888, + "margin_dpo/margin_mean": 2.442809581756592, + "margin_dpo/margin_std": 1.9328808784484863, + "step": 43 + }, + { + "KL/chosen_KL_mean": 0.22796630859375, + "KL/mean": -1.16841721534729, + "KL/rejected_KL_mean": -2.5648040771484375, + "KL/std": 2.2068114280700684, + "epoch": 0.06461086637298091, + "fcm_dpo/beta": 0.2552001476287842, + "fcm_dpo/delta": -0.3377786874771118, + "fcm_dpo/margin": 2.792766571044922, + "fcm_dpo/q_t": 0.33637571334838867, + "grad_norm": 138.35983276367188, + "learning_rate": 3.115942028985507e-07, + "logits/chosen": -0.4246031641960144, + "logits/rejected": -0.40571877360343933, + "logps/chosen": -49.85053253173828, + "logps/ref_chosen": -50.07849884033203, + "logps/ref_rejected": -108.78376007080078, + "logps/rejected": -111.34856414794922, + "loss": 0.8594, + "margin_dpo/margin_mean": 2.79276704788208, + "margin_dpo/margin_std": 2.16209077835083, + "step": 44 + }, + { + "KL/chosen_KL_mean": 0.16518402099609375, + "KL/mean": -0.9224708676338196, + "KL/rejected_KL_mean": -2.0101280212402344, + "KL/std": 1.9454594850540161, + "epoch": 0.06607929515418502, + "fcm_dpo/beta": 0.24679788947105408, + "fcm_dpo/delta": -0.14438273012638092, + "fcm_dpo/margin": 2.1753125190734863, + "fcm_dpo/q_t": 0.38011178374290466, + "grad_norm": 111.90202331542969, + "learning_rate": 3.188405797101449e-07, + "logits/chosen": -0.4974350333213806, + "logits/rejected": -0.4851893186569214, + "logps/chosen": -48.2497444152832, + "logps/ref_chosen": -48.4149284362793, + "logps/ref_rejected": -77.93643188476562, + "logps/rejected": -79.9465560913086, + "loss": 0.9987, + "margin_dpo/margin_mean": 2.1753129959106445, + "margin_dpo/margin_std": 2.421452283859253, + "step": 45 + }, + { + "KL/chosen_KL_mean": 0.19502639770507812, + "KL/mean": -1.2299586534500122, + "KL/rejected_KL_mean": -2.6549415588378906, + "KL/std": 2.5399794578552246, + "epoch": 0.06754772393538913, + "fcm_dpo/beta": 0.23457413911819458, + "fcm_dpo/delta": -0.2872818112373352, + "fcm_dpo/margin": 2.849971294403076, + "fcm_dpo/q_t": 0.3544684946537018, + "grad_norm": 118.44244384765625, + "learning_rate": 3.260869565217391e-07, + "logits/chosen": -0.527849555015564, + "logits/rejected": -0.4767192304134369, + "logps/chosen": -55.80439758300781, + "logps/ref_chosen": -55.999427795410156, + "logps/ref_rejected": -95.652587890625, + "logps/rejected": -98.30752563476562, + "loss": 0.9307, + "margin_dpo/margin_mean": 2.8499715328216553, + "margin_dpo/margin_std": 3.0548930168151855, + "step": 46 + }, + { + "KL/chosen_KL_mean": 0.384002685546875, + "KL/mean": -1.0363800525665283, + "KL/rejected_KL_mean": -2.4567604064941406, + "KL/std": 2.471060276031494, + "epoch": 0.06901615271659324, + "fcm_dpo/beta": 0.22347593307495117, + "fcm_dpo/delta": -0.25001367926597595, + "fcm_dpo/margin": 2.840768337249756, + "fcm_dpo/q_t": 0.35581424832344055, + "grad_norm": 111.23075866699219, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.5658366680145264, + "logits/rejected": -0.5126087665557861, + "logps/chosen": -57.542076110839844, + "logps/ref_chosen": -57.92607879638672, + "logps/ref_rejected": -94.67920684814453, + "logps/rejected": -97.13597106933594, + "loss": 0.9238, + "margin_dpo/margin_mean": 2.840768575668335, + "margin_dpo/margin_std": 2.5524120330810547, + "step": 47 + }, + { + "KL/chosen_KL_mean": 0.06220054626464844, + "KL/mean": -1.376787781715393, + "KL/rejected_KL_mean": -2.815776824951172, + "KL/std": 2.4476280212402344, + "epoch": 0.07048458149779736, + "fcm_dpo/beta": 0.21101200580596924, + "fcm_dpo/delta": -0.22269634902477264, + "fcm_dpo/margin": 2.8779749870300293, + "fcm_dpo/q_t": 0.36156171560287476, + "grad_norm": 119.95755767822266, + "learning_rate": 3.4057971014492755e-07, + "logits/chosen": -0.611646294593811, + "logits/rejected": -0.5553910732269287, + "logps/chosen": -57.12587356567383, + "logps/ref_chosen": -57.188072204589844, + "logps/ref_rejected": -88.0166015625, + "logps/rejected": -90.83238220214844, + "loss": 0.9519, + "margin_dpo/margin_mean": 2.877974510192871, + "margin_dpo/margin_std": 2.711777448654175, + "step": 48 + }, + { + "KL/chosen_KL_mean": 0.34456825256347656, + "KL/mean": -1.4146552085876465, + "KL/rejected_KL_mean": -3.1738739013671875, + "KL/std": 3.116457939147949, + "epoch": 0.07195301027900147, + "fcm_dpo/beta": 0.20099371671676636, + "fcm_dpo/delta": -0.329367995262146, + "fcm_dpo/margin": 3.5184366703033447, + "fcm_dpo/q_t": 0.3465607166290283, + "grad_norm": 93.63461303710938, + "learning_rate": 3.478260869565217e-07, + "logits/chosen": -0.537588357925415, + "logits/rejected": -0.4779571294784546, + "logps/chosen": -61.340702056884766, + "logps/ref_chosen": -61.685272216796875, + "logps/ref_rejected": -83.76747131347656, + "logps/rejected": -86.94134521484375, + "loss": 0.9077, + "margin_dpo/margin_mean": 3.5184359550476074, + "margin_dpo/margin_std": 3.583613157272339, + "step": 49 + }, + { + "KL/chosen_KL_mean": -0.04874992370605469, + "KL/mean": -1.9753637313842773, + "KL/rejected_KL_mean": -3.9019737243652344, + "KL/std": 3.2308237552642822, + "epoch": 0.07342143906020558, + "fcm_dpo/beta": 0.18617978692054749, + "fcm_dpo/delta": -0.3431151509284973, + "fcm_dpo/margin": 3.8532233238220215, + "fcm_dpo/q_t": 0.34047919511795044, + "grad_norm": 91.7352066040039, + "learning_rate": 3.5507246376811595e-07, + "logits/chosen": -0.5507527589797974, + "logits/rejected": -0.5151888728141785, + "logps/chosen": -58.77288818359375, + "logps/ref_chosen": -58.72413635253906, + "logps/ref_rejected": -96.35814666748047, + "logps/rejected": -100.26011657714844, + "loss": 0.8888, + "margin_dpo/margin_mean": 3.8532235622406006, + "margin_dpo/margin_std": 3.644498825073242, + "step": 50 + }, + { + "KL/chosen_KL_mean": -0.19082260131835938, + "KL/mean": -2.0723307132720947, + "KL/rejected_KL_mean": -3.9538421630859375, + "KL/std": 3.815108299255371, + "epoch": 0.07488986784140969, + "fcm_dpo/beta": 0.17457202076911926, + "fcm_dpo/delta": -0.27861201763153076, + "fcm_dpo/margin": 3.763016700744629, + "fcm_dpo/q_t": 0.3622833490371704, + "grad_norm": 73.21631622314453, + "learning_rate": 3.6231884057971015e-07, + "logits/chosen": -0.5122474431991577, + "logits/rejected": -0.47880104184150696, + "logps/chosen": -61.564491271972656, + "logps/ref_chosen": -61.3736686706543, + "logps/ref_rejected": -76.00199890136719, + "logps/rejected": -79.95584106445312, + "loss": 0.9655, + "margin_dpo/margin_mean": 3.76301646232605, + "margin_dpo/margin_std": 4.63081693649292, + "step": 51 + }, + { + "KL/chosen_KL_mean": 0.4765663146972656, + "KL/mean": -2.4252328872680664, + "KL/rejected_KL_mean": -5.327030181884766, + "KL/std": 4.417823314666748, + "epoch": 0.0763582966226138, + "fcm_dpo/beta": 0.15911118686199188, + "fcm_dpo/delta": -0.5776325464248657, + "fcm_dpo/margin": 5.8035993576049805, + "fcm_dpo/q_t": 0.2984340786933899, + "grad_norm": 73.25430297851562, + "learning_rate": 3.695652173913043e-07, + "logits/chosen": -0.5584224462509155, + "logits/rejected": -0.5033497214317322, + "logps/chosen": -51.86079025268555, + "logps/ref_chosen": -52.33735656738281, + "logps/ref_rejected": -79.97391510009766, + "logps/rejected": -85.30094909667969, + "loss": 0.7706, + "margin_dpo/margin_mean": 5.803599834442139, + "margin_dpo/margin_std": 4.6530866622924805, + "step": 52 + }, + { + "KL/chosen_KL_mean": -0.0835723876953125, + "KL/mean": -3.0129737854003906, + "KL/rejected_KL_mean": -5.942371368408203, + "KL/std": 5.052390098571777, + "epoch": 0.07782672540381791, + "fcm_dpo/beta": 0.14530491828918457, + "fcm_dpo/delta": -0.49047210812568665, + "fcm_dpo/margin": 5.85880708694458, + "fcm_dpo/q_t": 0.32626470923423767, + "grad_norm": 72.15726470947266, + "learning_rate": 3.7681159420289855e-07, + "logits/chosen": -0.606256365776062, + "logits/rejected": -0.5844460725784302, + "logps/chosen": -53.398223876953125, + "logps/ref_chosen": -53.31465148925781, + "logps/ref_rejected": -91.78359985351562, + "logps/rejected": -97.72596740722656, + "loss": 0.8522, + "margin_dpo/margin_mean": 5.858806610107422, + "margin_dpo/margin_std": 5.748600006103516, + "step": 53 + }, + { + "KL/chosen_KL_mean": -0.2327747344970703, + "KL/mean": -2.8028464317321777, + "KL/rejected_KL_mean": -5.372917175292969, + "KL/std": 4.757123947143555, + "epoch": 0.07929515418502203, + "fcm_dpo/beta": 0.13407519459724426, + "fcm_dpo/delta": -0.3100808262825012, + "fcm_dpo/margin": 5.140138626098633, + "fcm_dpo/q_t": 0.3468964397907257, + "grad_norm": 64.02351379394531, + "learning_rate": 3.8405797101449274e-07, + "logits/chosen": -0.5881419777870178, + "logits/rejected": -0.534300684928894, + "logps/chosen": -50.92143630981445, + "logps/ref_chosen": -50.68865966796875, + "logps/ref_rejected": -91.71539306640625, + "logps/rejected": -97.08831024169922, + "loss": 0.8972, + "margin_dpo/margin_mean": 5.140138626098633, + "margin_dpo/margin_std": 4.812758445739746, + "step": 54 + }, + { + "KL/chosen_KL_mean": -0.7527198791503906, + "KL/mean": -3.7856006622314453, + "KL/rejected_KL_mean": -6.8184814453125, + "KL/std": 5.726006507873535, + "epoch": 0.08076358296622614, + "fcm_dpo/beta": 0.12486197054386139, + "fcm_dpo/delta": -0.38554632663726807, + "fcm_dpo/margin": 6.0657572746276855, + "fcm_dpo/q_t": 0.3410576581954956, + "grad_norm": 62.06749725341797, + "learning_rate": 3.9130434782608694e-07, + "logits/chosen": -0.6522265076637268, + "logits/rejected": -0.590487003326416, + "logps/chosen": -63.36795425415039, + "logps/ref_chosen": -62.615234375, + "logps/ref_rejected": -88.99349975585938, + "logps/rejected": -95.81198120117188, + "loss": 0.915, + "margin_dpo/margin_mean": 6.065756797790527, + "margin_dpo/margin_std": 6.8796820640563965, + "step": 55 + }, + { + "KL/chosen_KL_mean": -0.548431396484375, + "KL/mean": -3.4274816513061523, + "KL/rejected_KL_mean": -6.3065338134765625, + "KL/std": 5.56746768951416, + "epoch": 0.08223201174743025, + "fcm_dpo/beta": 0.11680299043655396, + "fcm_dpo/delta": -0.2921079993247986, + "fcm_dpo/margin": 5.758103847503662, + "fcm_dpo/q_t": 0.3566039800643921, + "grad_norm": 53.04601287841797, + "learning_rate": 3.9855072463768114e-07, + "logits/chosen": -0.6132587194442749, + "logits/rejected": -0.5706372261047363, + "logps/chosen": -58.48115921020508, + "logps/ref_chosen": -57.9327278137207, + "logps/ref_rejected": -94.1744384765625, + "logps/rejected": -100.48097229003906, + "loss": 0.9546, + "margin_dpo/margin_mean": 5.758103847503662, + "margin_dpo/margin_std": 6.777911186218262, + "step": 56 + }, + { + "KL/chosen_KL_mean": -0.5712432861328125, + "KL/mean": -3.808412551879883, + "KL/rejected_KL_mean": -7.045585632324219, + "KL/std": 5.480106353759766, + "epoch": 0.08370044052863436, + "fcm_dpo/beta": 0.10994692891836166, + "fcm_dpo/delta": -0.3346494138240814, + "fcm_dpo/margin": 6.474340915679932, + "fcm_dpo/q_t": 0.34009259939193726, + "grad_norm": 57.49006652832031, + "learning_rate": 4.057971014492754e-07, + "logits/chosen": -0.5823420882225037, + "logits/rejected": -0.5546176433563232, + "logps/chosen": -71.0665283203125, + "logps/ref_chosen": -70.49528503417969, + "logps/ref_rejected": -95.56546020507812, + "logps/rejected": -102.61103820800781, + "loss": 0.8905, + "margin_dpo/margin_mean": 6.474340438842773, + "margin_dpo/margin_std": 5.933760643005371, + "step": 57 + }, + { + "KL/chosen_KL_mean": -0.6897735595703125, + "KL/mean": -4.305522918701172, + "KL/rejected_KL_mean": -7.921272277832031, + "KL/std": 6.297882556915283, + "epoch": 0.08516886930983847, + "fcm_dpo/beta": 0.10236389189958572, + "fcm_dpo/delta": -0.36620625853538513, + "fcm_dpo/margin": 7.231494903564453, + "fcm_dpo/q_t": 0.3412542939186096, + "grad_norm": 58.814815521240234, + "learning_rate": 4.1304347826086954e-07, + "logits/chosen": -0.6043993234634399, + "logits/rejected": -0.529456377029419, + "logps/chosen": -62.822715759277344, + "logps/ref_chosen": -62.13294219970703, + "logps/ref_rejected": -84.61729431152344, + "logps/rejected": -92.53856658935547, + "loss": 0.9009, + "margin_dpo/margin_mean": 7.2314958572387695, + "margin_dpo/margin_std": 7.5085673332214355, + "step": 58 + }, + { + "KL/chosen_KL_mean": -1.2114276885986328, + "KL/mean": -5.092733383178711, + "KL/rejected_KL_mean": -8.974040985107422, + "KL/std": 6.752954959869385, + "epoch": 0.08663729809104258, + "fcm_dpo/beta": 0.09383856505155563, + "fcm_dpo/delta": -0.3598101735115051, + "fcm_dpo/margin": 7.762610912322998, + "fcm_dpo/q_t": 0.34148186445236206, + "grad_norm": 54.276611328125, + "learning_rate": 4.2028985507246374e-07, + "logits/chosen": -0.6205647587776184, + "logits/rejected": -0.5793225765228271, + "logps/chosen": -53.143951416015625, + "logps/ref_chosen": -51.932525634765625, + "logps/ref_rejected": -88.88520050048828, + "logps/rejected": -97.85923767089844, + "loss": 0.8996, + "margin_dpo/margin_mean": 7.76261043548584, + "margin_dpo/margin_std": 7.849611282348633, + "step": 59 + }, + { + "KL/chosen_KL_mean": -2.1884403228759766, + "KL/mean": -5.350527286529541, + "KL/rejected_KL_mean": -8.512611389160156, + "KL/std": 6.1919403076171875, + "epoch": 0.0881057268722467, + "fcm_dpo/beta": 0.09018626809120178, + "fcm_dpo/delta": -0.1816769540309906, + "fcm_dpo/margin": 6.3241682052612305, + "fcm_dpo/q_t": 0.36957529187202454, + "grad_norm": 60.4672966003418, + "learning_rate": 4.2753623188405794e-07, + "logits/chosen": -0.5891699194908142, + "logits/rejected": -0.527586042881012, + "logps/chosen": -63.13063049316406, + "logps/ref_chosen": -60.94218826293945, + "logps/ref_rejected": -85.39340209960938, + "logps/rejected": -93.906005859375, + "loss": 0.9876, + "margin_dpo/margin_mean": 6.3241682052612305, + "margin_dpo/margin_std": 7.076349258422852, + "step": 60 + }, + { + "KL/chosen_KL_mean": -1.1373729705810547, + "KL/mean": -5.013064861297607, + "KL/rejected_KL_mean": -8.888755798339844, + "KL/std": 8.38675594329834, + "epoch": 0.08957415565345081, + "fcm_dpo/beta": 0.08559857308864594, + "fcm_dpo/delta": -0.2840117812156677, + "fcm_dpo/margin": 7.751380443572998, + "fcm_dpo/q_t": 0.36387041211128235, + "grad_norm": 49.0460205078125, + "learning_rate": 4.3478260869565214e-07, + "logits/chosen": -0.5909574627876282, + "logits/rejected": -0.5559062361717224, + "logps/chosen": -61.770896911621094, + "logps/ref_chosen": -60.633522033691406, + "logps/ref_rejected": -89.85249328613281, + "logps/rejected": -98.74125671386719, + "loss": 0.9755, + "margin_dpo/margin_mean": 7.751380920410156, + "margin_dpo/margin_std": 10.394891738891602, + "step": 61 + }, + { + "KL/chosen_KL_mean": -1.3573627471923828, + "KL/mean": -4.249211311340332, + "KL/rejected_KL_mean": -7.141059875488281, + "KL/std": 6.042973518371582, + "epoch": 0.09104258443465492, + "fcm_dpo/beta": 0.08392874896526337, + "fcm_dpo/delta": -0.08972346782684326, + "fcm_dpo/margin": 5.783695697784424, + "fcm_dpo/q_t": 0.3917636275291443, + "grad_norm": 47.25103759765625, + "learning_rate": 4.420289855072464e-07, + "logits/chosen": -0.635386049747467, + "logits/rejected": -0.6032891273498535, + "logps/chosen": -57.50813293457031, + "logps/ref_chosen": -56.15077209472656, + "logps/ref_rejected": -75.56619262695312, + "logps/rejected": -82.70726013183594, + "loss": 1.0451, + "margin_dpo/margin_mean": 5.783695697784424, + "margin_dpo/margin_std": 7.337882995605469, + "step": 62 + }, + { + "KL/chosen_KL_mean": -2.206483840942383, + "KL/mean": -6.167753219604492, + "KL/rejected_KL_mean": -10.129024505615234, + "KL/std": 7.77467679977417, + "epoch": 0.09251101321585903, + "fcm_dpo/beta": 0.07972732186317444, + "fcm_dpo/delta": -0.2495255470275879, + "fcm_dpo/margin": 7.922541618347168, + "fcm_dpo/q_t": 0.3571065664291382, + "grad_norm": 47.997623443603516, + "learning_rate": 4.4927536231884053e-07, + "logits/chosen": -0.6038184762001038, + "logits/rejected": -0.5584800243377686, + "logps/chosen": -75.3538818359375, + "logps/ref_chosen": -73.14739227294922, + "logps/ref_rejected": -97.61006164550781, + "logps/rejected": -107.73908996582031, + "loss": 0.9445, + "margin_dpo/margin_mean": 7.922541618347168, + "margin_dpo/margin_std": 8.097877502441406, + "step": 63 + }, + { + "KL/chosen_KL_mean": -0.7482147216796875, + "KL/mean": -5.395984649658203, + "KL/rejected_KL_mean": -10.043754577636719, + "KL/std": 8.429512023925781, + "epoch": 0.09397944199706314, + "fcm_dpo/beta": 0.07469938695430756, + "fcm_dpo/delta": -0.3220548927783966, + "fcm_dpo/margin": 9.295536041259766, + "fcm_dpo/q_t": 0.34917110204696655, + "grad_norm": 44.42738342285156, + "learning_rate": 4.5652173913043473e-07, + "logits/chosen": -0.5919687747955322, + "logits/rejected": -0.5603554248809814, + "logps/chosen": -54.7468147277832, + "logps/ref_chosen": -53.998600006103516, + "logps/ref_rejected": -93.53019714355469, + "logps/rejected": -103.57395935058594, + "loss": 0.9301, + "margin_dpo/margin_mean": 9.295536041259766, + "margin_dpo/margin_std": 10.248291969299316, + "step": 64 + }, + { + "KL/chosen_KL_mean": -2.768260955810547, + "KL/mean": -7.429973602294922, + "KL/rejected_KL_mean": -12.091690063476562, + "KL/std": 9.386064529418945, + "epoch": 0.09544787077826726, + "fcm_dpo/beta": 0.07084572315216064, + "fcm_dpo/delta": -0.2809777855873108, + "fcm_dpo/margin": 9.323431015014648, + "fcm_dpo/q_t": 0.35268324613571167, + "grad_norm": 44.16692352294922, + "learning_rate": 4.63768115942029e-07, + "logits/chosen": -0.6519949436187744, + "logits/rejected": -0.6382172107696533, + "logps/chosen": -67.60426330566406, + "logps/ref_chosen": -64.83599853515625, + "logps/ref_rejected": -109.94645690917969, + "logps/rejected": -122.03814697265625, + "loss": 0.9463, + "margin_dpo/margin_mean": 9.323431015014648, + "margin_dpo/margin_std": 10.194602012634277, + "step": 65 + }, + { + "KL/chosen_KL_mean": -2.5610218048095703, + "KL/mean": -6.84706974029541, + "KL/rejected_KL_mean": -11.133113861083984, + "KL/std": 8.479511260986328, + "epoch": 0.09691629955947137, + "fcm_dpo/beta": 0.06770157068967819, + "fcm_dpo/delta": -0.19393965601921082, + "fcm_dpo/margin": 8.572092056274414, + "fcm_dpo/q_t": 0.37264156341552734, + "grad_norm": 39.67411422729492, + "learning_rate": 4.7101449275362313e-07, + "logits/chosen": -0.6443692445755005, + "logits/rejected": -0.6113446950912476, + "logps/chosen": -54.004547119140625, + "logps/ref_chosen": -51.44352722167969, + "logps/ref_rejected": -75.63629913330078, + "logps/rejected": -86.7694091796875, + "loss": 0.987, + "margin_dpo/margin_mean": 8.572092056274414, + "margin_dpo/margin_std": 10.129063606262207, + "step": 66 + }, + { + "KL/chosen_KL_mean": -2.2398548126220703, + "KL/mean": -6.684027671813965, + "KL/rejected_KL_mean": -11.128204345703125, + "KL/std": 8.494741439819336, + "epoch": 0.09838472834067548, + "fcm_dpo/beta": 0.06574313342571259, + "fcm_dpo/delta": -0.19544380903244019, + "fcm_dpo/margin": 8.888347625732422, + "fcm_dpo/q_t": 0.37113136053085327, + "grad_norm": 38.83026885986328, + "learning_rate": 4.782608695652174e-07, + "logits/chosen": -0.6339254975318909, + "logits/rejected": -0.5946371555328369, + "logps/chosen": -61.580657958984375, + "logps/ref_chosen": -59.34080505371094, + "logps/ref_rejected": -72.78728485107422, + "logps/rejected": -83.91548919677734, + "loss": 0.9797, + "margin_dpo/margin_mean": 8.888347625732422, + "margin_dpo/margin_std": 10.183094024658203, + "step": 67 + }, + { + "KL/chosen_KL_mean": -2.51678466796875, + "KL/mean": -6.736393928527832, + "KL/rejected_KL_mean": -10.956001281738281, + "KL/std": 7.671031951904297, + "epoch": 0.09985315712187959, + "fcm_dpo/beta": 0.06370236724615097, + "fcm_dpo/delta": -0.1451815813779831, + "fcm_dpo/margin": 8.439210891723633, + "fcm_dpo/q_t": 0.3772110342979431, + "grad_norm": 38.64521408081055, + "learning_rate": 4.855072463768116e-07, + "logits/chosen": -0.6282086968421936, + "logits/rejected": -0.5680973529815674, + "logps/chosen": -67.72261810302734, + "logps/ref_chosen": -65.2058334350586, + "logps/ref_rejected": -77.20724487304688, + "logps/rejected": -88.16325378417969, + "loss": 0.986, + "margin_dpo/margin_mean": 8.439210891723633, + "margin_dpo/margin_std": 8.544252395629883, + "step": 68 + }, + { + "KL/chosen_KL_mean": -3.0470409393310547, + "KL/mean": -8.152204513549805, + "KL/rejected_KL_mean": -13.257366180419922, + "KL/std": 9.061971664428711, + "epoch": 0.1013215859030837, + "fcm_dpo/beta": 0.06064834073185921, + "fcm_dpo/delta": -0.23509711027145386, + "fcm_dpo/margin": 10.210319519042969, + "fcm_dpo/q_t": 0.3607165217399597, + "grad_norm": 40.75960159301758, + "learning_rate": 4.927536231884058e-07, + "logits/chosen": -0.5967146158218384, + "logits/rejected": -0.5730553865432739, + "logps/chosen": -62.86627960205078, + "logps/ref_chosen": -59.81924057006836, + "logps/ref_rejected": -103.38886260986328, + "logps/rejected": -116.64622497558594, + "loss": 0.9375, + "margin_dpo/margin_mean": 10.210320472717285, + "margin_dpo/margin_std": 9.960121154785156, + "step": 69 + }, + { + "KL/chosen_KL_mean": -4.498558044433594, + "KL/mean": -10.02988052368164, + "KL/rejected_KL_mean": -15.561203002929688, + "KL/std": 11.078158378601074, + "epoch": 0.1027900146842878, + "fcm_dpo/beta": 0.058067694306373596, + "fcm_dpo/delta": -0.25848639011383057, + "fcm_dpo/margin": 11.062643051147461, + "fcm_dpo/q_t": 0.35891324281692505, + "grad_norm": 40.870914459228516, + "learning_rate": 5e-07, + "logits/chosen": -0.6100128889083862, + "logits/rejected": -0.5743746757507324, + "logps/chosen": -66.42919921875, + "logps/ref_chosen": -61.930641174316406, + "logps/ref_rejected": -91.06078338623047, + "logps/rejected": -106.62198638916016, + "loss": 0.9454, + "margin_dpo/margin_mean": 11.062643051147461, + "margin_dpo/margin_std": 11.850614547729492, + "step": 70 + }, + { + "KL/chosen_KL_mean": -4.099088668823242, + "KL/mean": -10.583032608032227, + "KL/rejected_KL_mean": -17.066974639892578, + "KL/std": 11.512796401977539, + "epoch": 0.10425844346549193, + "fcm_dpo/beta": 0.05431191250681877, + "fcm_dpo/delta": -0.3282097578048706, + "fcm_dpo/margin": 12.967889785766602, + "fcm_dpo/q_t": 0.34613728523254395, + "grad_norm": 37.54128646850586, + "learning_rate": 4.999967061337492e-07, + "logits/chosen": -0.6697078943252563, + "logits/rejected": -0.6301345825195312, + "logps/chosen": -65.84942626953125, + "logps/ref_chosen": -61.750335693359375, + "logps/ref_rejected": -97.33662414550781, + "logps/rejected": -114.40359497070312, + "loss": 0.9003, + "margin_dpo/margin_mean": 12.967889785766602, + "margin_dpo/margin_std": 12.603883743286133, + "step": 71 + }, + { + "KL/chosen_KL_mean": -4.795114517211914, + "KL/mean": -11.703153610229492, + "KL/rejected_KL_mean": -18.611186981201172, + "KL/std": 12.470186233520508, + "epoch": 0.10572687224669604, + "fcm_dpo/beta": 0.05085095018148422, + "fcm_dpo/delta": -0.32645586133003235, + "fcm_dpo/margin": 13.816070556640625, + "fcm_dpo/q_t": 0.3455986976623535, + "grad_norm": 37.724822998046875, + "learning_rate": 4.999868246217933e-07, + "logits/chosen": -0.6510441303253174, + "logits/rejected": -0.6148891448974609, + "logps/chosen": -70.84852600097656, + "logps/ref_chosen": -66.05341339111328, + "logps/ref_rejected": -95.2869873046875, + "logps/rejected": -113.89817810058594, + "loss": 0.9125, + "margin_dpo/margin_mean": 13.816070556640625, + "margin_dpo/margin_std": 14.364995956420898, + "step": 72 + }, + { + "KL/chosen_KL_mean": -6.796857833862305, + "KL/mean": -13.687841415405273, + "KL/rejected_KL_mean": -20.57882308959961, + "KL/std": 16.359634399414062, + "epoch": 0.10719530102790015, + "fcm_dpo/beta": 0.047877371311187744, + "fcm_dpo/delta": -0.2798731029033661, + "fcm_dpo/margin": 13.781963348388672, + "fcm_dpo/q_t": 0.3676333427429199, + "grad_norm": 36.75204849243164, + "learning_rate": 4.999703557245192e-07, + "logits/chosen": -0.6967453956604004, + "logits/rejected": -0.6562691926956177, + "logps/chosen": -73.05313110351562, + "logps/ref_chosen": -66.25627136230469, + "logps/ref_rejected": -90.45613098144531, + "logps/rejected": -111.03495788574219, + "loss": 1.0192, + "margin_dpo/margin_mean": 13.781963348388672, + "margin_dpo/margin_std": 20.299331665039062, + "step": 73 + }, + { + "KL/chosen_KL_mean": -7.430627822875977, + "KL/mean": -14.996770858764648, + "KL/rejected_KL_mean": -22.562911987304688, + "KL/std": 18.265613555908203, + "epoch": 0.10866372980910426, + "fcm_dpo/beta": 0.04516391456127167, + "fcm_dpo/delta": -0.3053804337978363, + "fcm_dpo/margin": 15.132284164428711, + "fcm_dpo/q_t": 0.3618400990962982, + "grad_norm": 37.91171646118164, + "learning_rate": 4.999472998758977e-07, + "logits/chosen": -0.6205891370773315, + "logits/rejected": -0.6105706691741943, + "logps/chosen": -60.85551071166992, + "logps/ref_chosen": -53.42488098144531, + "logps/ref_rejected": -95.94693756103516, + "logps/rejected": -118.50984954833984, + "loss": 0.9871, + "margin_dpo/margin_mean": 15.132284164428711, + "margin_dpo/margin_std": 22.55142593383789, + "step": 74 + }, + { + "KL/chosen_KL_mean": -7.307668685913086, + "KL/mean": -18.042144775390625, + "KL/rejected_KL_mean": -28.776607513427734, + "KL/std": 18.98027801513672, + "epoch": 0.11013215859030837, + "fcm_dpo/beta": 0.041482701897621155, + "fcm_dpo/delta": -0.5359930396080017, + "fcm_dpo/margin": 21.468948364257812, + "fcm_dpo/q_t": 0.3164390027523041, + "grad_norm": 33.1515998840332, + "learning_rate": 4.999176576834721e-07, + "logits/chosen": -0.6864483952522278, + "logits/rejected": -0.6774381399154663, + "logps/chosen": -59.16933059692383, + "logps/ref_chosen": -51.861663818359375, + "logps/ref_rejected": -111.25398254394531, + "logps/rejected": -140.0305938720703, + "loss": 0.8343, + "margin_dpo/margin_mean": 21.468948364257812, + "margin_dpo/margin_std": 21.272823333740234, + "step": 75 + }, + { + "KL/chosen_KL_mean": -9.035161972045898, + "KL/mean": -15.948980331420898, + "KL/rejected_KL_mean": -22.862796783447266, + "KL/std": 15.473119735717773, + "epoch": 0.11160058737151249, + "fcm_dpo/beta": 0.039188824594020844, + "fcm_dpo/delta": -0.14997366070747375, + "fcm_dpo/margin": 13.827640533447266, + "fcm_dpo/q_t": 0.3768173158168793, + "grad_norm": 32.834896087646484, + "learning_rate": 4.998814299283415e-07, + "logits/chosen": -0.714850664138794, + "logits/rejected": -0.6740258932113647, + "logps/chosen": -62.30120086669922, + "logps/ref_chosen": -53.26603698730469, + "logps/ref_rejected": -78.21662902832031, + "logps/rejected": -101.07942199707031, + "loss": 1.0119, + "margin_dpo/margin_mean": 13.827640533447266, + "margin_dpo/margin_std": 16.90443229675293, + "step": 76 + }, + { + "KL/chosen_KL_mean": -8.176126480102539, + "KL/mean": -19.34077262878418, + "KL/rejected_KL_mean": -30.505416870117188, + "KL/std": 21.328655242919922, + "epoch": 0.1130690161527166, + "fcm_dpo/beta": 0.036142949014902115, + "fcm_dpo/delta": -0.4468532204627991, + "fcm_dpo/margin": 22.32929039001465, + "fcm_dpo/q_t": 0.3258803188800812, + "grad_norm": 34.206050872802734, + "learning_rate": 4.998386175651409e-07, + "logits/chosen": -0.6671018004417419, + "logits/rejected": -0.6263935565948486, + "logps/chosen": -66.2728042602539, + "logps/ref_chosen": -58.0966796875, + "logps/ref_rejected": -93.77361297607422, + "logps/rejected": -124.2790298461914, + "loss": 0.882, + "margin_dpo/margin_mean": 22.329288482666016, + "margin_dpo/margin_std": 23.451766967773438, + "step": 77 + }, + { + "KL/chosen_KL_mean": -8.268457412719727, + "KL/mean": -16.85858917236328, + "KL/rejected_KL_mean": -25.448719024658203, + "KL/std": 17.313419342041016, + "epoch": 0.1145374449339207, + "fcm_dpo/beta": 0.03458146005868912, + "fcm_dpo/delta": -0.2059612274169922, + "fcm_dpo/margin": 17.180259704589844, + "fcm_dpo/q_t": 0.3691740036010742, + "grad_norm": 30.835861206054688, + "learning_rate": 4.997892217220159e-07, + "logits/chosen": -0.6544767618179321, + "logits/rejected": -0.6291429996490479, + "logps/chosen": -63.882240295410156, + "logps/ref_chosen": -55.61378479003906, + "logps/ref_rejected": -84.93436431884766, + "logps/rejected": -110.38308715820312, + "loss": 0.9801, + "margin_dpo/margin_mean": 17.180259704589844, + "margin_dpo/margin_std": 19.697816848754883, + "step": 78 + }, + { + "KL/chosen_KL_mean": -8.609933853149414, + "KL/mean": -18.018735885620117, + "KL/rejected_KL_mean": -27.427539825439453, + "KL/std": 19.773313522338867, + "epoch": 0.11600587371512482, + "fcm_dpo/beta": 0.032929353415966034, + "fcm_dpo/delta": -0.2343355119228363, + "fcm_dpo/margin": 18.817596435546875, + "fcm_dpo/q_t": 0.3699612617492676, + "grad_norm": 27.264328002929688, + "learning_rate": 4.997332437005931e-07, + "logits/chosen": -0.6396697163581848, + "logits/rejected": -0.6080412268638611, + "logps/chosen": -64.0604248046875, + "logps/ref_chosen": -55.45048522949219, + "logps/ref_rejected": -87.64756774902344, + "logps/rejected": -115.07510375976562, + "loss": 0.9939, + "margin_dpo/margin_mean": 18.81760025024414, + "margin_dpo/margin_std": 24.226604461669922, + "step": 79 + }, + { + "KL/chosen_KL_mean": -11.600616455078125, + "KL/mean": -20.326831817626953, + "KL/rejected_KL_mean": -29.053043365478516, + "KL/std": 20.81574058532715, + "epoch": 0.11747430249632893, + "fcm_dpo/beta": 0.03151794895529747, + "fcm_dpo/delta": -0.16158056259155273, + "fcm_dpo/margin": 17.452417373657227, + "fcm_dpo/q_t": 0.38502687215805054, + "grad_norm": 29.68206214904785, + "learning_rate": 4.996706849759452e-07, + "logits/chosen": -0.723793625831604, + "logits/rejected": -0.681006669998169, + "logps/chosen": -70.11990356445312, + "logps/ref_chosen": -58.519290924072266, + "logps/ref_rejected": -87.54750061035156, + "logps/rejected": -116.60054016113281, + "loss": 1.039, + "margin_dpo/margin_mean": 17.45241928100586, + "margin_dpo/margin_std": 24.047962188720703, + "step": 80 + }, + { + "KL/chosen_KL_mean": -10.693958282470703, + "KL/mean": -22.869586944580078, + "KL/rejected_KL_mean": -35.04521942138672, + "KL/std": 24.97785186767578, + "epoch": 0.11894273127753303, + "fcm_dpo/beta": 0.029724348336458206, + "fcm_dpo/delta": -0.3522689640522003, + "fcm_dpo/margin": 24.351259231567383, + "fcm_dpo/q_t": 0.34924542903900146, + "grad_norm": 30.155729293823242, + "learning_rate": 4.996015471965529e-07, + "logits/chosen": -0.6988470554351807, + "logits/rejected": -0.6684309244155884, + "logps/chosen": -77.142822265625, + "logps/ref_chosen": -66.44886779785156, + "logps/ref_rejected": -129.66270446777344, + "logps/rejected": -164.70791625976562, + "loss": 0.9381, + "margin_dpo/margin_mean": 24.35125732421875, + "margin_dpo/margin_std": 29.631593704223633, + "step": 81 + }, + { + "KL/chosen_KL_mean": -13.060468673706055, + "KL/mean": -22.69424057006836, + "KL/rejected_KL_mean": -32.3280143737793, + "KL/std": 22.624094009399414, + "epoch": 0.12041116005873716, + "fcm_dpo/beta": 0.028949948027729988, + "fcm_dpo/delta": -0.16753321886062622, + "fcm_dpo/margin": 19.267545700073242, + "fcm_dpo/q_t": 0.3837572932243347, + "grad_norm": 32.10773849487305, + "learning_rate": 4.995258321842611e-07, + "logits/chosen": -0.6523622274398804, + "logits/rejected": -0.6406021118164062, + "logps/chosen": -65.29285430908203, + "logps/ref_chosen": -52.232383728027344, + "logps/ref_rejected": -90.74325561523438, + "logps/rejected": -123.0712661743164, + "loss": 1.0786, + "margin_dpo/margin_mean": 19.267547607421875, + "margin_dpo/margin_std": 31.091327667236328, + "step": 82 + }, + { + "KL/chosen_KL_mean": -13.508848190307617, + "KL/mean": -24.929195404052734, + "KL/rejected_KL_mean": -36.349544525146484, + "KL/std": 23.749820709228516, + "epoch": 0.12187958883994127, + "fcm_dpo/beta": 0.027322106063365936, + "fcm_dpo/delta": -0.2400093972682953, + "fcm_dpo/margin": 22.84069061279297, + "fcm_dpo/q_t": 0.3655874729156494, + "grad_norm": 31.6568660736084, + "learning_rate": 4.994435419342304e-07, + "logits/chosen": -0.6658318042755127, + "logits/rejected": -0.6298344135284424, + "logps/chosen": -69.33623504638672, + "logps/ref_chosen": -55.82738494873047, + "logps/ref_rejected": -103.71589660644531, + "logps/rejected": -140.06544494628906, + "loss": 0.9834, + "margin_dpo/margin_mean": 22.84069061279297, + "margin_dpo/margin_std": 28.148263931274414, + "step": 83 + }, + { + "KL/chosen_KL_mean": -12.589178085327148, + "KL/mean": -22.418010711669922, + "KL/rejected_KL_mean": -32.24684524536133, + "KL/std": 20.389741897583008, + "epoch": 0.12334801762114538, + "fcm_dpo/beta": 0.026361385360360146, + "fcm_dpo/delta": -0.12693113088607788, + "fcm_dpo/margin": 19.657663345336914, + "fcm_dpo/q_t": 0.38021910190582275, + "grad_norm": 26.903989791870117, + "learning_rate": 4.993546786148857e-07, + "logits/chosen": -0.6667696237564087, + "logits/rejected": -0.6284007430076599, + "logps/chosen": -79.76535034179688, + "logps/ref_chosen": -67.1761703491211, + "logps/ref_rejected": -87.29859924316406, + "logps/rejected": -119.54544830322266, + "loss": 1.0067, + "margin_dpo/margin_mean": 19.657665252685547, + "margin_dpo/margin_std": 20.888431549072266, + "step": 84 + }, + { + "KL/chosen_KL_mean": -13.00227165222168, + "KL/mean": -23.157577514648438, + "KL/rejected_KL_mean": -33.31288146972656, + "KL/std": 20.965476989746094, + "epoch": 0.12481644640234948, + "fcm_dpo/beta": 0.02606545016169548, + "fcm_dpo/delta": -0.13678425550460815, + "fcm_dpo/margin": 20.310611724853516, + "fcm_dpo/q_t": 0.3828505277633667, + "grad_norm": 27.330554962158203, + "learning_rate": 4.992592445678582e-07, + "logits/chosen": -0.6378351449966431, + "logits/rejected": -0.6047541499137878, + "logps/chosen": -71.40888977050781, + "logps/ref_chosen": -58.4066162109375, + "logps/ref_rejected": -78.63880157470703, + "logps/rejected": -111.9516830444336, + "loss": 1.0268, + "margin_dpo/margin_mean": 20.310611724853516, + "margin_dpo/margin_std": 25.04244613647461, + "step": 85 + }, + { + "KL/chosen_KL_mean": -17.00665855407715, + "KL/mean": -27.735424041748047, + "KL/rejected_KL_mean": -38.46418380737305, + "KL/std": 28.39218521118164, + "epoch": 0.1262848751835536, + "fcm_dpo/beta": 0.025230124592781067, + "fcm_dpo/delta": -0.14920490980148315, + "fcm_dpo/margin": 21.457521438598633, + "fcm_dpo/q_t": 0.3947563171386719, + "grad_norm": 30.875211715698242, + "learning_rate": 4.991572423079235e-07, + "logits/chosen": -0.6909030675888062, + "logits/rejected": -0.6786030530929565, + "logps/chosen": -73.14411926269531, + "logps/ref_chosen": -56.13746643066406, + "logps/ref_rejected": -88.12165069580078, + "logps/rejected": -126.58583068847656, + "loss": 1.11, + "margin_dpo/margin_mean": 21.457523345947266, + "margin_dpo/margin_std": 39.128604888916016, + "step": 86 + }, + { + "KL/chosen_KL_mean": -16.64571189880371, + "KL/mean": -29.247547149658203, + "KL/rejected_KL_mean": -41.84938430786133, + "KL/std": 28.30339813232422, + "epoch": 0.1277533039647577, + "fcm_dpo/beta": 0.023992381989955902, + "fcm_dpo/delta": -0.21979403495788574, + "fcm_dpo/margin": 25.20366859436035, + "fcm_dpo/q_t": 0.36907005310058594, + "grad_norm": 26.347061157226562, + "learning_rate": 4.990486745229364e-07, + "logits/chosen": -0.719502329826355, + "logits/rejected": -0.6944303512573242, + "logps/chosen": -72.28180694580078, + "logps/ref_chosen": -55.63609313964844, + "logps/ref_rejected": -95.46757507324219, + "logps/rejected": -137.31695556640625, + "loss": 1.0182, + "margin_dpo/margin_mean": 25.20366859436035, + "margin_dpo/margin_std": 34.2125244140625, + "step": 87 + }, + { + "KL/chosen_KL_mean": -21.046295166015625, + "KL/mean": -30.57300567626953, + "KL/rejected_KL_mean": -40.09971618652344, + "KL/std": 29.50307846069336, + "epoch": 0.12922173274596183, + "fcm_dpo/beta": 0.023543458431959152, + "fcm_dpo/delta": -0.051779814064502716, + "fcm_dpo/margin": 19.053417205810547, + "fcm_dpo/q_t": 0.4041179418563843, + "grad_norm": 27.308347702026367, + "learning_rate": 4.989335440737586e-07, + "logits/chosen": -0.6762702465057373, + "logits/rejected": -0.6694661378860474, + "logps/chosen": -94.71744537353516, + "logps/ref_chosen": -73.67115020751953, + "logps/ref_rejected": -106.70849609375, + "logps/rejected": -146.80821228027344, + "loss": 1.1201, + "margin_dpo/margin_mean": 19.053417205810547, + "margin_dpo/margin_std": 30.958572387695312, + "step": 88 + }, + { + "KL/chosen_KL_mean": -12.778741836547852, + "KL/mean": -23.465646743774414, + "KL/rejected_KL_mean": -34.15255355834961, + "KL/std": 24.849987030029297, + "epoch": 0.13069016152716592, + "fcm_dpo/beta": 0.02335914969444275, + "fcm_dpo/delta": -0.10445674508810043, + "fcm_dpo/margin": 21.37381362915039, + "fcm_dpo/q_t": 0.38910990953445435, + "grad_norm": 24.864566802978516, + "learning_rate": 4.988118539941847e-07, + "logits/chosen": -0.7264094352722168, + "logits/rejected": -0.6940090656280518, + "logps/chosen": -73.40365600585938, + "logps/ref_chosen": -60.624916076660156, + "logps/ref_rejected": -82.08354949951172, + "logps/rejected": -116.23609924316406, + "loss": 1.042, + "margin_dpo/margin_mean": 21.37381362915039, + "margin_dpo/margin_std": 28.21473503112793, + "step": 89 + }, + { + "KL/chosen_KL_mean": -16.536136627197266, + "KL/mean": -31.133424758911133, + "KL/rejected_KL_mean": -45.730709075927734, + "KL/std": 34.05305480957031, + "epoch": 0.13215859030837004, + "fcm_dpo/beta": 0.02243289351463318, + "fcm_dpo/delta": -0.2720962464809418, + "fcm_dpo/margin": 29.194570541381836, + "fcm_dpo/q_t": 0.3703291416168213, + "grad_norm": 27.197731018066406, + "learning_rate": 4.986836074908615e-07, + "logits/chosen": -0.6625027656555176, + "logits/rejected": -0.6746160984039307, + "logps/chosen": -69.82144927978516, + "logps/ref_chosen": -53.285308837890625, + "logps/ref_rejected": -111.54470825195312, + "logps/rejected": -157.27542114257812, + "loss": 1.0204, + "margin_dpo/margin_mean": 29.194570541381836, + "margin_dpo/margin_std": 42.260841369628906, + "step": 90 + }, + { + "KL/chosen_KL_mean": -17.2109432220459, + "KL/mean": -29.20108413696289, + "KL/rejected_KL_mean": -41.191226959228516, + "KL/std": 27.21971321105957, + "epoch": 0.13362701908957417, + "fcm_dpo/beta": 0.02157766930758953, + "fcm_dpo/delta": -0.12366719543933868, + "fcm_dpo/margin": 23.98028564453125, + "fcm_dpo/q_t": 0.3894466459751129, + "grad_norm": 24.908628463745117, + "learning_rate": 4.985488079432037e-07, + "logits/chosen": -0.6959347724914551, + "logits/rejected": -0.6632735729217529, + "logps/chosen": -79.01390075683594, + "logps/ref_chosen": -61.802955627441406, + "logps/ref_rejected": -87.87395477294922, + "logps/rejected": -129.065185546875, + "loss": 1.061, + "margin_dpo/margin_mean": 23.98028564453125, + "margin_dpo/margin_std": 35.235958099365234, + "step": 91 + }, + { + "KL/chosen_KL_mean": -15.647247314453125, + "KL/mean": -27.800127029418945, + "KL/rejected_KL_mean": -39.95301055908203, + "KL/std": 28.11497688293457, + "epoch": 0.13509544787077826, + "fcm_dpo/beta": 0.021103451028466225, + "fcm_dpo/delta": -0.11899492889642715, + "fcm_dpo/margin": 24.30576515197754, + "fcm_dpo/q_t": 0.3890799880027771, + "grad_norm": 23.367460250854492, + "learning_rate": 4.984074589033043e-07, + "logits/chosen": -0.7003687620162964, + "logits/rejected": -0.676365852355957, + "logps/chosen": -67.28801727294922, + "logps/ref_chosen": -51.640769958496094, + "logps/ref_rejected": -77.88117980957031, + "logps/rejected": -117.83419799804688, + "loss": 1.0536, + "margin_dpo/margin_mean": 24.305763244628906, + "margin_dpo/margin_std": 34.140499114990234, + "step": 92 + }, + { + "KL/chosen_KL_mean": -17.547260284423828, + "KL/mean": -29.79717254638672, + "KL/rejected_KL_mean": -42.04708480834961, + "KL/std": 26.565616607666016, + "epoch": 0.13656387665198239, + "fcm_dpo/beta": 0.020545653998851776, + "fcm_dpo/delta": -0.1087617427110672, + "fcm_dpo/margin": 24.499828338623047, + "fcm_dpo/q_t": 0.3881436884403229, + "grad_norm": 23.9678897857666, + "learning_rate": 4.982595640958425e-07, + "logits/chosen": -0.751114547252655, + "logits/rejected": -0.7044565081596375, + "logps/chosen": -70.07649993896484, + "logps/ref_chosen": -52.529239654541016, + "logps/ref_rejected": -77.16075134277344, + "logps/rejected": -119.20783233642578, + "loss": 1.0292, + "margin_dpo/margin_mean": 24.499828338623047, + "margin_dpo/margin_std": 30.640230178833008, + "step": 93 + }, + { + "KL/chosen_KL_mean": -18.9477596282959, + "KL/mean": -33.051692962646484, + "KL/rejected_KL_mean": -47.1556282043457, + "KL/std": 30.412057876586914, + "epoch": 0.13803230543318648, + "fcm_dpo/beta": 0.019795160740613937, + "fcm_dpo/delta": -0.16931986808776855, + "fcm_dpo/margin": 28.20786476135254, + "fcm_dpo/q_t": 0.3750844895839691, + "grad_norm": 23.683237075805664, + "learning_rate": 4.98105127417984e-07, + "logits/chosen": -0.6706228256225586, + "logits/rejected": -0.6556359529495239, + "logps/chosen": -80.17037200927734, + "logps/ref_chosen": -61.22261047363281, + "logps/ref_rejected": -99.59902954101562, + "logps/rejected": -146.75466918945312, + "loss": 0.9929, + "margin_dpo/margin_mean": 28.207866668701172, + "margin_dpo/margin_std": 31.745136260986328, + "step": 94 + }, + { + "KL/chosen_KL_mean": -17.981779098510742, + "KL/mean": -29.374893188476562, + "KL/rejected_KL_mean": -40.76800537109375, + "KL/std": 29.04880142211914, + "epoch": 0.1395007342143906, + "fcm_dpo/beta": 0.019524898380041122, + "fcm_dpo/delta": -0.04741118103265762, + "fcm_dpo/margin": 22.786224365234375, + "fcm_dpo/q_t": 0.39812785387039185, + "grad_norm": 22.271825790405273, + "learning_rate": 4.979441529392784e-07, + "logits/chosen": -0.7048947811126709, + "logits/rejected": -0.6773319244384766, + "logps/chosen": -70.50542449951172, + "logps/ref_chosen": -52.523643493652344, + "logps/ref_rejected": -75.8803482055664, + "logps/rejected": -116.64836120605469, + "loss": 1.072, + "margin_dpo/margin_mean": 22.786224365234375, + "margin_dpo/margin_std": 30.41301727294922, + "step": 95 + }, + { + "KL/chosen_KL_mean": -17.554834365844727, + "KL/mean": -32.99760437011719, + "KL/rejected_KL_mean": -48.44038391113281, + "KL/std": 33.0831298828125, + "epoch": 0.14096916299559473, + "fcm_dpo/beta": 0.01885131560266018, + "fcm_dpo/delta": -0.19593745470046997, + "fcm_dpo/margin": 30.885547637939453, + "fcm_dpo/q_t": 0.3727257251739502, + "grad_norm": 22.89360237121582, + "learning_rate": 4.977766449015534e-07, + "logits/chosen": -0.7279924154281616, + "logits/rejected": -0.7011754512786865, + "logps/chosen": -79.71180725097656, + "logps/ref_chosen": -62.15697479248047, + "logps/ref_rejected": -96.59601593017578, + "logps/rejected": -145.03640747070312, + "loss": 0.986, + "margin_dpo/margin_mean": 30.88555145263672, + "margin_dpo/margin_std": 36.464759826660156, + "step": 96 + }, + { + "KL/chosen_KL_mean": -18.827903747558594, + "KL/mean": -30.793764114379883, + "KL/rejected_KL_mean": -42.75962829589844, + "KL/std": 27.565874099731445, + "epoch": 0.14243759177679882, + "fcm_dpo/beta": 0.01880602166056633, + "fcm_dpo/delta": -0.0534333810210228, + "fcm_dpo/margin": 23.93172836303711, + "fcm_dpo/q_t": 0.39575350284576416, + "grad_norm": 23.5416202545166, + "learning_rate": 4.976026077188012e-07, + "logits/chosen": -0.6459416151046753, + "logits/rejected": -0.6031548976898193, + "logps/chosen": -73.47427368164062, + "logps/ref_chosen": -54.646366119384766, + "logps/ref_rejected": -76.96475219726562, + "logps/rejected": -119.72438049316406, + "loss": 1.0555, + "margin_dpo/margin_mean": 23.93172836303711, + "margin_dpo/margin_std": 27.58646011352539, + "step": 97 + }, + { + "KL/chosen_KL_mean": -23.348102569580078, + "KL/mean": -36.9785270690918, + "KL/rejected_KL_mean": -50.60894775390625, + "KL/std": 31.48232650756836, + "epoch": 0.14390602055800295, + "fcm_dpo/beta": 0.0182771235704422, + "fcm_dpo/delta": -0.1036653220653534, + "fcm_dpo/margin": 27.26085662841797, + "fcm_dpo/q_t": 0.3863416314125061, + "grad_norm": 24.430879592895508, + "learning_rate": 4.974220459770639e-07, + "logits/chosen": -0.6633949875831604, + "logits/rejected": -0.6469439268112183, + "logps/chosen": -88.60673522949219, + "logps/ref_chosen": -65.25862884521484, + "logps/ref_rejected": -96.5274887084961, + "logps/rejected": -147.13644409179688, + "loss": 1.0592, + "margin_dpo/margin_mean": 27.260852813720703, + "margin_dpo/margin_std": 37.74567794799805, + "step": 98 + }, + { + "KL/chosen_KL_mean": -18.30929183959961, + "KL/mean": -35.074058532714844, + "KL/rejected_KL_mean": -51.838829040527344, + "KL/std": 34.02536392211914, + "epoch": 0.14537444933920704, + "fcm_dpo/beta": 0.017622604966163635, + "fcm_dpo/delta": -0.20359688997268677, + "fcm_dpo/margin": 33.529537200927734, + "fcm_dpo/q_t": 0.37370553612709045, + "grad_norm": 21.330432891845703, + "learning_rate": 4.972349644343108e-07, + "logits/chosen": -0.6809293627738953, + "logits/rejected": -0.6816772222518921, + "logps/chosen": -63.947776794433594, + "logps/ref_chosen": -45.638484954833984, + "logps/ref_rejected": -86.43793487548828, + "logps/rejected": -138.27676391601562, + "loss": 0.9921, + "margin_dpo/margin_mean": 33.529541015625, + "margin_dpo/margin_std": 42.3082389831543, + "step": 99 + }, + { + "KL/chosen_KL_mean": -20.69596290588379, + "KL/mean": -30.469200134277344, + "KL/rejected_KL_mean": -40.242435455322266, + "KL/std": 27.422863006591797, + "epoch": 0.14684287812041116, + "fcm_dpo/beta": 0.01770273968577385, + "fcm_dpo/delta": 0.05536198988556862, + "fcm_dpo/margin": 19.546470642089844, + "fcm_dpo/q_t": 0.4218849539756775, + "grad_norm": 23.772842407226562, + "learning_rate": 4.970413680203148e-07, + "logits/chosen": -0.620718240737915, + "logits/rejected": -0.5739752650260925, + "logps/chosen": -78.28994750976562, + "logps/ref_chosen": -57.59397888183594, + "logps/ref_rejected": -74.06021118164062, + "logps/rejected": -114.30264282226562, + "loss": 1.1659, + "margin_dpo/margin_mean": 19.546470642089844, + "margin_dpo/margin_std": 35.299861907958984, + "step": 100 + }, + { + "KL/chosen_KL_mean": -25.458805084228516, + "KL/mean": -37.322052001953125, + "KL/rejected_KL_mean": -49.18529510498047, + "KL/std": 33.75147247314453, + "epoch": 0.14831130690161526, + "fcm_dpo/beta": 0.01753612421452999, + "fcm_dpo/delta": -0.01752624288201332, + "fcm_dpo/margin": 23.726482391357422, + "fcm_dpo/q_t": 0.41101598739624023, + "grad_norm": 23.58587646484375, + "learning_rate": 4.968412618365215e-07, + "logits/chosen": -0.6659466028213501, + "logits/rejected": -0.6346107721328735, + "logps/chosen": -87.10765838623047, + "logps/ref_chosen": -61.64885330200195, + "logps/ref_rejected": -83.18968200683594, + "logps/rejected": -132.37498474121094, + "loss": 1.1286, + "margin_dpo/margin_mean": 23.726482391357422, + "margin_dpo/margin_std": 40.702640533447266, + "step": 101 + }, + { + "KL/chosen_KL_mean": -27.646360397338867, + "KL/mean": -36.379215240478516, + "KL/rejected_KL_mean": -45.112064361572266, + "KL/std": 31.945594787597656, + "epoch": 0.14977973568281938, + "fcm_dpo/beta": 0.01757633686065674, + "fcm_dpo/delta": -0.024831483140587807, + "fcm_dpo/margin": 17.465709686279297, + "fcm_dpo/q_t": 0.4317839741706848, + "grad_norm": 26.59412384033203, + "learning_rate": 4.966346511559149e-07, + "logits/chosen": -0.6938978433609009, + "logits/rejected": -0.6497205495834351, + "logps/chosen": -91.72523498535156, + "logps/ref_chosen": -64.0788803100586, + "logps/ref_rejected": -68.18707275390625, + "logps/rejected": -113.29914093017578, + "loss": 1.2103, + "margin_dpo/margin_mean": 17.46571159362793, + "margin_dpo/margin_std": 37.897613525390625, + "step": 102 + }, + { + "KL/chosen_KL_mean": -21.812721252441406, + "KL/mean": -39.324649810791016, + "KL/rejected_KL_mean": -56.83657455444336, + "KL/std": 35.332679748535156, + "epoch": 0.1512481644640235, + "fcm_dpo/beta": 0.016969915479421616, + "fcm_dpo/delta": -0.20710483193397522, + "fcm_dpo/margin": 35.02384948730469, + "fcm_dpo/q_t": 0.3709907531738281, + "grad_norm": 22.75851058959961, + "learning_rate": 4.964215414228785e-07, + "logits/chosen": -0.6656967997550964, + "logits/rejected": -0.6312940120697021, + "logps/chosen": -83.11199951171875, + "logps/ref_chosen": -61.299278259277344, + "logps/ref_rejected": -93.57270812988281, + "logps/rejected": -150.40928649902344, + "loss": 0.9857, + "margin_dpo/margin_mean": 35.02384948730469, + "margin_dpo/margin_std": 42.5711669921875, + "step": 103 + }, + { + "KL/chosen_KL_mean": -23.92180824279785, + "KL/mean": -40.524253845214844, + "KL/rejected_KL_mean": -57.12669372558594, + "KL/std": 39.19900131225586, + "epoch": 0.1527165932452276, + "fcm_dpo/beta": 0.01646982505917549, + "fcm_dpo/delta": -0.15515577793121338, + "fcm_dpo/margin": 33.20488357543945, + "fcm_dpo/q_t": 0.38579535484313965, + "grad_norm": 22.30910301208496, + "learning_rate": 4.96201938253052e-07, + "logits/chosen": -0.668786883354187, + "logits/rejected": -0.6362247467041016, + "logps/chosen": -78.29458618164062, + "logps/ref_chosen": -54.372772216796875, + "logps/ref_rejected": -89.5647201538086, + "logps/rejected": -146.69140625, + "loss": 1.0453, + "margin_dpo/margin_mean": 33.20488357543945, + "margin_dpo/margin_std": 47.9078369140625, + "step": 104 + }, + { + "KL/chosen_KL_mean": -22.438573837280273, + "KL/mean": -46.06147766113281, + "KL/rejected_KL_mean": -69.68439483642578, + "KL/std": 38.165252685546875, + "epoch": 0.15418502202643172, + "fcm_dpo/beta": 0.015482816845178604, + "fcm_dpo/delta": -0.3573678731918335, + "fcm_dpo/margin": 47.245811462402344, + "fcm_dpo/q_t": 0.3350944519042969, + "grad_norm": 22.623994827270508, + "learning_rate": 4.959758474331832e-07, + "logits/chosen": -0.6759747862815857, + "logits/rejected": -0.653762936592102, + "logps/chosen": -77.0775146484375, + "logps/ref_chosen": -54.638946533203125, + "logps/ref_rejected": -97.97351837158203, + "logps/rejected": -167.6579132080078, + "loss": 0.8737, + "margin_dpo/margin_mean": 47.245811462402344, + "margin_dpo/margin_std": 42.06477355957031, + "step": 105 + }, + { + "KL/chosen_KL_mean": -25.158750534057617, + "KL/mean": -39.978302001953125, + "KL/rejected_KL_mean": -54.79785919189453, + "KL/std": 33.347076416015625, + "epoch": 0.15565345080763582, + "fcm_dpo/beta": 0.015045535750687122, + "fcm_dpo/delta": -0.04824310541152954, + "fcm_dpo/margin": 29.63909912109375, + "fcm_dpo/q_t": 0.39749810099601746, + "grad_norm": 21.77722930908203, + "learning_rate": 4.957432749209755e-07, + "logits/chosen": -0.6268042325973511, + "logits/rejected": -0.5954272747039795, + "logps/chosen": -79.99164581298828, + "logps/ref_chosen": -54.83289337158203, + "logps/ref_rejected": -85.22461700439453, + "logps/rejected": -140.02247619628906, + "loss": 1.0594, + "margin_dpo/margin_mean": 29.63909912109375, + "margin_dpo/margin_std": 36.35613250732422, + "step": 106 + }, + { + "KL/chosen_KL_mean": -29.925743103027344, + "KL/mean": -46.62507629394531, + "KL/rejected_KL_mean": -63.32440185546875, + "KL/std": 40.514007568359375, + "epoch": 0.15712187958883994, + "fcm_dpo/beta": 0.014755118638277054, + "fcm_dpo/delta": -0.09840479493141174, + "fcm_dpo/margin": 33.398658752441406, + "fcm_dpo/q_t": 0.3882708251476288, + "grad_norm": 21.028383255004883, + "learning_rate": 4.955042268449307e-07, + "logits/chosen": -0.670194149017334, + "logits/rejected": -0.6239144802093506, + "logps/chosen": -99.63356018066406, + "logps/ref_chosen": -69.70780944824219, + "logps/ref_rejected": -94.73950958251953, + "logps/rejected": -158.06390380859375, + "loss": 1.0453, + "margin_dpo/margin_mean": 33.398658752441406, + "margin_dpo/margin_std": 42.718177795410156, + "step": 107 + }, + { + "KL/chosen_KL_mean": -27.031885147094727, + "KL/mean": -47.079811096191406, + "KL/rejected_KL_mean": -67.12774658203125, + "KL/std": 44.77525329589844, + "epoch": 0.15859030837004406, + "fcm_dpo/beta": 0.014345895498991013, + "fcm_dpo/delta": -0.18586613237857819, + "fcm_dpo/margin": 40.095855712890625, + "fcm_dpo/q_t": 0.38066431879997253, + "grad_norm": 21.29493522644043, + "learning_rate": 4.952587095041881e-07, + "logits/chosen": -0.669190526008606, + "logits/rejected": -0.6483861207962036, + "logps/chosen": -83.0417709350586, + "logps/ref_chosen": -56.0098876953125, + "logps/ref_rejected": -95.79601287841797, + "logps/rejected": -162.92376708984375, + "loss": 1.0383, + "margin_dpo/margin_mean": 40.095855712890625, + "margin_dpo/margin_std": 57.993988037109375, + "step": 108 + }, + { + "KL/chosen_KL_mean": -25.21091651916504, + "KL/mean": -46.66911315917969, + "KL/rejected_KL_mean": -68.12731170654297, + "KL/std": 43.04130554199219, + "epoch": 0.16005873715124816, + "fcm_dpo/beta": 0.013737066648900509, + "fcm_dpo/delta": -0.20238548517227173, + "fcm_dpo/margin": 42.91639709472656, + "fcm_dpo/q_t": 0.3687817454338074, + "grad_norm": 21.884559631347656, + "learning_rate": 4.95006729368358e-07, + "logits/chosen": -0.6145904660224915, + "logits/rejected": -0.59392911195755, + "logps/chosen": -88.09640502929688, + "logps/ref_chosen": -62.88549041748047, + "logps/ref_rejected": -98.68573760986328, + "logps/rejected": -166.81304931640625, + "loss": 0.9884, + "margin_dpo/margin_mean": 42.91639709472656, + "margin_dpo/margin_std": 50.632591247558594, + "step": 109 + }, + { + "KL/chosen_KL_mean": -26.089508056640625, + "KL/mean": -45.36097717285156, + "KL/rejected_KL_mean": -64.63245391845703, + "KL/std": 42.92705154418945, + "epoch": 0.16152716593245228, + "fcm_dpo/beta": 0.013237670063972473, + "fcm_dpo/delta": -0.12065520882606506, + "fcm_dpo/margin": 38.542945861816406, + "fcm_dpo/q_t": 0.3875874876976013, + "grad_norm": 19.124555587768555, + "learning_rate": 4.947482930773511e-07, + "logits/chosen": -0.5867836475372314, + "logits/rejected": -0.5484417676925659, + "logps/chosen": -84.84319305419922, + "logps/ref_chosen": -58.753684997558594, + "logps/ref_rejected": -79.75001525878906, + "logps/rejected": -144.38247680664062, + "loss": 1.0537, + "margin_dpo/margin_mean": 38.542945861816406, + "margin_dpo/margin_std": 51.59848403930664, + "step": 110 + }, + { + "KL/chosen_KL_mean": -29.633333206176758, + "KL/mean": -51.53802490234375, + "KL/rejected_KL_mean": -73.4427261352539, + "KL/std": 48.50222396850586, + "epoch": 0.16299559471365638, + "fcm_dpo/beta": 0.01289959717541933, + "fcm_dpo/delta": -0.17664864659309387, + "fcm_dpo/margin": 43.809391021728516, + "fcm_dpo/q_t": 0.3767807185649872, + "grad_norm": 21.609487533569336, + "learning_rate": 4.944834074412042e-07, + "logits/chosen": -0.669287919998169, + "logits/rejected": -0.6483087539672852, + "logps/chosen": -98.25743865966797, + "logps/ref_chosen": -68.62410736083984, + "logps/ref_rejected": -98.42886352539062, + "logps/rejected": -171.87158203125, + "loss": 1.0324, + "margin_dpo/margin_mean": 43.80938720703125, + "margin_dpo/margin_std": 59.64892578125, + "step": 111 + }, + { + "KL/chosen_KL_mean": -27.373397827148438, + "KL/mean": -41.31971740722656, + "KL/rejected_KL_mean": -55.26603698730469, + "KL/std": 33.80635070800781, + "epoch": 0.1644640234948605, + "fcm_dpo/beta": 0.01288105733692646, + "fcm_dpo/delta": 0.04204365238547325, + "fcm_dpo/margin": 27.89263916015625, + "fcm_dpo/q_t": 0.4176030158996582, + "grad_norm": 19.551979064941406, + "learning_rate": 4.942120794399002e-07, + "logits/chosen": -0.6326720118522644, + "logits/rejected": -0.5948728322982788, + "logps/chosen": -77.62303924560547, + "logps/ref_chosen": -50.24964141845703, + "logps/ref_rejected": -64.77442932128906, + "logps/rejected": -120.04046630859375, + "loss": 1.1257, + "margin_dpo/margin_mean": 27.892641067504883, + "margin_dpo/margin_std": 40.78398895263672, + "step": 112 + }, + { + "KL/chosen_KL_mean": -34.30276870727539, + "KL/mean": -49.319786071777344, + "KL/rejected_KL_mean": -64.33680725097656, + "KL/std": 35.23823928833008, + "epoch": 0.16593245227606462, + "fcm_dpo/beta": 0.012986140325665474, + "fcm_dpo/delta": 0.01037517748773098, + "fcm_dpo/margin": 30.034034729003906, + "fcm_dpo/q_t": 0.4100501537322998, + "grad_norm": 20.03278160095215, + "learning_rate": 4.939343162231841e-07, + "logits/chosen": -0.5970338582992554, + "logits/rejected": -0.5535135269165039, + "logps/chosen": -101.01571655273438, + "logps/ref_chosen": -66.71295166015625, + "logps/ref_rejected": -77.96870422363281, + "logps/rejected": -142.30551147460938, + "loss": 1.0938, + "margin_dpo/margin_mean": 30.034034729003906, + "margin_dpo/margin_std": 38.99578094482422, + "step": 113 + }, + { + "KL/chosen_KL_mean": -31.6552791595459, + "KL/mean": -55.58210754394531, + "KL/rejected_KL_mean": -79.50894165039062, + "KL/std": 53.05522918701172, + "epoch": 0.16740088105726872, + "fcm_dpo/beta": 0.01250369194895029, + "fcm_dpo/delta": -0.21323440968990326, + "fcm_dpo/margin": 47.85365295410156, + "fcm_dpo/q_t": 0.3746366500854492, + "grad_norm": 21.371883392333984, + "learning_rate": 4.936501251103751e-07, + "logits/chosen": -0.594833493232727, + "logits/rejected": -0.5611605048179626, + "logps/chosen": -89.44036102294922, + "logps/ref_chosen": -57.78507995605469, + "logps/ref_rejected": -87.10966491699219, + "logps/rejected": -166.61859130859375, + "loss": 1.0003, + "margin_dpo/margin_mean": 47.85365295410156, + "margin_dpo/margin_std": 64.31402587890625, + "step": 114 + }, + { + "KL/chosen_KL_mean": -41.24885559082031, + "KL/mean": -57.65170669555664, + "KL/rejected_KL_mean": -74.05455017089844, + "KL/std": 50.606632232666016, + "epoch": 0.16886930983847284, + "fcm_dpo/beta": 0.012465628795325756, + "fcm_dpo/delta": -0.009462913498282433, + "fcm_dpo/margin": 32.80569076538086, + "fcm_dpo/q_t": 0.414449542760849, + "grad_norm": 26.58994483947754, + "learning_rate": 4.933595135901732e-07, + "logits/chosen": -0.6483290195465088, + "logits/rejected": -0.6295895576477051, + "logps/chosen": -106.83149719238281, + "logps/ref_chosen": -65.5826416015625, + "logps/ref_rejected": -98.56552124023438, + "logps/rejected": -172.6200714111328, + "loss": 1.1668, + "margin_dpo/margin_mean": 32.80569076538086, + "margin_dpo/margin_std": 66.59864807128906, + "step": 115 + }, + { + "KL/chosen_KL_mean": -30.92531394958496, + "KL/mean": -49.26237487792969, + "KL/rejected_KL_mean": -67.59944152832031, + "KL/std": 42.78678894042969, + "epoch": 0.17033773861967694, + "fcm_dpo/beta": 0.012374404817819595, + "fcm_dpo/delta": -0.05652306228876114, + "fcm_dpo/margin": 36.67411804199219, + "fcm_dpo/q_t": 0.39768484234809875, + "grad_norm": 21.639448165893555, + "learning_rate": 4.930624893204624e-07, + "logits/chosen": -0.6038175821304321, + "logits/rejected": -0.5935859680175781, + "logps/chosen": -82.32562255859375, + "logps/ref_chosen": -51.40031433105469, + "logps/ref_rejected": -80.5218505859375, + "logps/rejected": -148.1212921142578, + "loss": 1.0582, + "margin_dpo/margin_mean": 36.67411804199219, + "margin_dpo/margin_std": 46.316162109375, + "step": 116 + }, + { + "KL/chosen_KL_mean": -39.578678131103516, + "KL/mean": -54.53247833251953, + "KL/rejected_KL_mean": -69.48628234863281, + "KL/std": 45.36625671386719, + "epoch": 0.17180616740088106, + "fcm_dpo/beta": 0.012346116825938225, + "fcm_dpo/delta": 0.031927622854709625, + "fcm_dpo/margin": 29.907602310180664, + "fcm_dpo/q_t": 0.41722893714904785, + "grad_norm": 28.117990493774414, + "learning_rate": 4.927590601281083e-07, + "logits/chosen": -0.6056150197982788, + "logits/rejected": -0.5710107088088989, + "logps/chosen": -108.87709045410156, + "logps/ref_chosen": -69.29840850830078, + "logps/ref_rejected": -66.583984375, + "logps/rejected": -136.0702667236328, + "loss": 1.1431, + "margin_dpo/margin_mean": 29.907604217529297, + "margin_dpo/margin_std": 51.51899719238281, + "step": 117 + }, + { + "KL/chosen_KL_mean": -31.513439178466797, + "KL/mean": -49.722755432128906, + "KL/rejected_KL_mean": -67.93206024169922, + "KL/std": 41.247047424316406, + "epoch": 0.17327459618208516, + "fcm_dpo/beta": 0.01227930560708046, + "fcm_dpo/delta": -0.04938432201743126, + "fcm_dpo/margin": 36.41863250732422, + "fcm_dpo/q_t": 0.3994569778442383, + "grad_norm": 20.659421920776367, + "learning_rate": 4.924492340087524e-07, + "logits/chosen": -0.6473113298416138, + "logits/rejected": -0.6298643350601196, + "logps/chosen": -87.1544189453125, + "logps/ref_chosen": -55.6409797668457, + "logps/ref_rejected": -75.66905975341797, + "logps/rejected": -143.6011199951172, + "loss": 1.0642, + "margin_dpo/margin_mean": 36.41863250732422, + "margin_dpo/margin_std": 47.30088424682617, + "step": 118 + }, + { + "KL/chosen_KL_mean": -43.098350524902344, + "KL/mean": -61.04634475708008, + "KL/rejected_KL_mean": -78.99433898925781, + "KL/std": 46.14408874511719, + "epoch": 0.17474302496328928, + "fcm_dpo/beta": 0.012094875797629356, + "fcm_dpo/delta": -0.036839861422777176, + "fcm_dpo/margin": 35.89598846435547, + "fcm_dpo/q_t": 0.40563011169433594, + "grad_norm": 23.585227966308594, + "learning_rate": 4.92133019126601e-07, + "logits/chosen": -0.6196680068969727, + "logits/rejected": -0.6074246168136597, + "logps/chosen": -116.60855102539062, + "logps/ref_chosen": -73.51019287109375, + "logps/ref_rejected": -102.977294921875, + "logps/rejected": -181.97161865234375, + "loss": 1.1036, + "margin_dpo/margin_mean": 35.895992279052734, + "margin_dpo/margin_std": 56.17529296875, + "step": 119 + }, + { + "KL/chosen_KL_mean": -44.474021911621094, + "KL/mean": -69.42485046386719, + "KL/rejected_KL_mean": -94.37568664550781, + "KL/std": 58.080665588378906, + "epoch": 0.1762114537444934, + "fcm_dpo/beta": 0.011788450181484222, + "fcm_dpo/delta": -0.2001763880252838, + "fcm_dpo/margin": 49.901649475097656, + "fcm_dpo/q_t": 0.37107378244400024, + "grad_norm": 21.931350708007812, + "learning_rate": 4.918104238142103e-07, + "logits/chosen": -0.6279151439666748, + "logits/rejected": -0.5975610017776489, + "logps/chosen": -121.25485229492188, + "logps/ref_chosen": -76.78083801269531, + "logps/ref_rejected": -108.02374267578125, + "logps/rejected": -202.3994140625, + "loss": 0.9934, + "margin_dpo/margin_mean": 49.901649475097656, + "margin_dpo/margin_std": 60.89421081542969, + "step": 120 + }, + { + "KL/chosen_KL_mean": -42.93914794921875, + "KL/mean": -70.19830322265625, + "KL/rejected_KL_mean": -97.45744323730469, + "KL/std": 56.19927978515625, + "epoch": 0.1776798825256975, + "fcm_dpo/beta": 0.011228121817111969, + "fcm_dpo/delta": -0.22773230075836182, + "fcm_dpo/margin": 54.51830291748047, + "fcm_dpo/q_t": 0.3683249354362488, + "grad_norm": 24.322509765625, + "learning_rate": 4.91481456572267e-07, + "logits/chosen": -0.5546694993972778, + "logits/rejected": -0.5499193072319031, + "logps/chosen": -104.72904968261719, + "logps/ref_chosen": -61.789894104003906, + "logps/ref_rejected": -109.99456787109375, + "logps/rejected": -207.4520263671875, + "loss": 0.9991, + "margin_dpo/margin_mean": 54.51830291748047, + "margin_dpo/margin_std": 70.47315216064453, + "step": 121 + }, + { + "KL/chosen_KL_mean": -39.022762298583984, + "KL/mean": -73.1162109375, + "KL/rejected_KL_mean": -107.20967102050781, + "KL/std": 64.33946990966797, + "epoch": 0.17914831130690162, + "fcm_dpo/beta": 0.010600419715046883, + "fcm_dpo/delta": -0.34783935546875, + "fcm_dpo/margin": 68.18690490722656, + "fcm_dpo/q_t": 0.34132951498031616, + "grad_norm": 23.660940170288086, + "learning_rate": 4.911461260693638e-07, + "logits/chosen": -0.5362130403518677, + "logits/rejected": -0.5523936748504639, + "logps/chosen": -85.92497253417969, + "logps/ref_chosen": -46.9022102355957, + "logps/ref_rejected": -106.71418762207031, + "logps/rejected": -213.92385864257812, + "loss": 0.8959, + "margin_dpo/margin_mean": 68.18690490722656, + "margin_dpo/margin_std": 67.38003540039062, + "step": 122 + }, + { + "KL/chosen_KL_mean": -42.91706848144531, + "KL/mean": -65.22838592529297, + "KL/rejected_KL_mean": -87.53971862792969, + "KL/std": 54.896278381347656, + "epoch": 0.18061674008810572, + "fcm_dpo/beta": 0.010254621505737305, + "fcm_dpo/delta": -0.06130140274763107, + "fcm_dpo/margin": 44.62263870239258, + "fcm_dpo/q_t": 0.4016492962837219, + "grad_norm": 20.85264778137207, + "learning_rate": 4.908044411417711e-07, + "logits/chosen": -0.5527976751327515, + "logits/rejected": -0.535463273525238, + "logps/chosen": -104.25570678710938, + "logps/ref_chosen": -61.33863830566406, + "logps/ref_rejected": -87.775390625, + "logps/rejected": -175.3151092529297, + "loss": 1.1084, + "margin_dpo/margin_mean": 44.62263488769531, + "margin_dpo/margin_std": 73.6215591430664, + "step": 123 + }, + { + "KL/chosen_KL_mean": -48.867706298828125, + "KL/mean": -81.8544921875, + "KL/rejected_KL_mean": -114.84127044677734, + "KL/std": 75.15878295898438, + "epoch": 0.18208516886930984, + "fcm_dpo/beta": 0.00984976440668106, + "fcm_dpo/delta": -0.26814186573028564, + "fcm_dpo/margin": 65.97357177734375, + "fcm_dpo/q_t": 0.36925771832466125, + "grad_norm": 22.487119674682617, + "learning_rate": 4.904564107932048e-07, + "logits/chosen": -0.5741191506385803, + "logits/rejected": -0.5771223306655884, + "logps/chosen": -120.3160400390625, + "logps/ref_chosen": -71.44833374023438, + "logps/ref_rejected": -117.58056640625, + "logps/rejected": -232.42184448242188, + "loss": 1.0146, + "margin_dpo/margin_mean": 65.97357177734375, + "margin_dpo/margin_std": 94.4095458984375, + "step": 124 + }, + { + "KL/chosen_KL_mean": -40.33204650878906, + "KL/mean": -69.6327896118164, + "KL/rejected_KL_mean": -98.93354797363281, + "KL/std": 63.41231918334961, + "epoch": 0.18355359765051396, + "fcm_dpo/beta": 0.009521868079900742, + "fcm_dpo/delta": -0.16708803176879883, + "fcm_dpo/margin": 58.601497650146484, + "fcm_dpo/q_t": 0.3802080452442169, + "grad_norm": 18.873462677001953, + "learning_rate": 4.90102044194588e-07, + "logits/chosen": -0.4996240735054016, + "logits/rejected": -0.5013130903244019, + "logps/chosen": -90.46898651123047, + "logps/ref_chosen": -50.136940002441406, + "logps/ref_rejected": -83.98861694335938, + "logps/rejected": -182.92214965820312, + "loss": 1.0267, + "margin_dpo/margin_mean": 58.60149383544922, + "margin_dpo/margin_std": 78.49264526367188, + "step": 125 + }, + { + "KL/chosen_KL_mean": -43.697837829589844, + "KL/mean": -70.89157104492188, + "KL/rejected_KL_mean": -98.08531188964844, + "KL/std": 56.95924377441406, + "epoch": 0.18502202643171806, + "fcm_dpo/beta": 0.009252631105482578, + "fcm_dpo/delta": -0.10898162424564362, + "fcm_dpo/margin": 54.387474060058594, + "fcm_dpo/q_t": 0.38932526111602783, + "grad_norm": 20.310638427734375, + "learning_rate": 4.897413506838102e-07, + "logits/chosen": -0.543870210647583, + "logits/rejected": -0.5395331382751465, + "logps/chosen": -99.36490631103516, + "logps/ref_chosen": -55.66706848144531, + "logps/ref_rejected": -98.1297607421875, + "logps/rejected": -196.21507263183594, + "loss": 1.0441, + "margin_dpo/margin_mean": 54.387474060058594, + "margin_dpo/margin_std": 72.31570434570312, + "step": 126 + }, + { + "KL/chosen_KL_mean": -42.63111114501953, + "KL/mean": -62.53132629394531, + "KL/rejected_KL_mean": -82.43153381347656, + "KL/std": 50.20075225830078, + "epoch": 0.18649045521292218, + "fcm_dpo/beta": 0.009267613291740417, + "fcm_dpo/delta": 0.03231769800186157, + "fcm_dpo/margin": 39.8004264831543, + "fcm_dpo/q_t": 0.414898157119751, + "grad_norm": 20.918685913085938, + "learning_rate": 4.89374339765481e-07, + "logits/chosen": -0.5204076170921326, + "logits/rejected": -0.5004839897155762, + "logps/chosen": -99.185791015625, + "logps/ref_chosen": -56.55467987060547, + "logps/ref_rejected": -76.7957763671875, + "logps/rejected": -159.22732543945312, + "loss": 1.1319, + "margin_dpo/margin_mean": 39.80043029785156, + "margin_dpo/margin_std": 63.22393798828125, + "step": 127 + }, + { + "KL/chosen_KL_mean": -45.72355270385742, + "KL/mean": -66.75888061523438, + "KL/rejected_KL_mean": -87.79420471191406, + "KL/std": 59.13935852050781, + "epoch": 0.18795888399412627, + "fcm_dpo/beta": 0.009327895939350128, + "fcm_dpo/delta": 0.007488146424293518, + "fcm_dpo/margin": 42.07066345214844, + "fcm_dpo/q_t": 0.4129607379436493, + "grad_norm": 29.327892303466797, + "learning_rate": 4.890010211106795e-07, + "logits/chosen": -0.5080424547195435, + "logits/rejected": -0.4870242476463318, + "logps/chosen": -103.84451293945312, + "logps/ref_chosen": -58.12095642089844, + "logps/ref_rejected": -76.43896484375, + "logps/rejected": -164.23316955566406, + "loss": 1.1428, + "margin_dpo/margin_mean": 42.07066345214844, + "margin_dpo/margin_std": 74.08981323242188, + "step": 128 + }, + { + "KL/chosen_KL_mean": -55.38201904296875, + "KL/mean": -77.5055923461914, + "KL/rejected_KL_mean": -99.62916564941406, + "KL/std": 65.07495880126953, + "epoch": 0.1894273127753304, + "fcm_dpo/beta": 0.009284512139856815, + "fcm_dpo/delta": -0.01128113642334938, + "fcm_dpo/margin": 44.24713134765625, + "fcm_dpo/q_t": 0.4140230417251587, + "grad_norm": 20.87249183654785, + "learning_rate": 4.88621404556699e-07, + "logits/chosen": -0.5257991552352905, + "logits/rejected": -0.5148609280586243, + "logps/chosen": -122.29839324951172, + "logps/ref_chosen": -66.91637420654297, + "logps/ref_rejected": -96.6422119140625, + "logps/rejected": -196.27137756347656, + "loss": 1.1488, + "margin_dpo/margin_mean": 44.24713134765625, + "margin_dpo/margin_std": 84.46028900146484, + "step": 129 + }, + { + "KL/chosen_KL_mean": -41.156402587890625, + "KL/mean": -74.48085021972656, + "KL/rejected_KL_mean": -107.8052978515625, + "KL/std": 66.90983581542969, + "epoch": 0.19089574155653452, + "fcm_dpo/beta": 0.00907239317893982, + "fcm_dpo/delta": -0.2178019881248474, + "fcm_dpo/margin": 66.64888763427734, + "fcm_dpo/q_t": 0.37017908692359924, + "grad_norm": 21.265871047973633, + "learning_rate": 4.882355001067891e-07, + "logits/chosen": -0.5009858012199402, + "logits/rejected": -0.4956481158733368, + "logps/chosen": -85.82325744628906, + "logps/ref_chosen": -44.66685104370117, + "logps/ref_rejected": -82.78165435791016, + "logps/rejected": -190.58694458007812, + "loss": 0.9976, + "margin_dpo/margin_mean": 66.64889526367188, + "margin_dpo/margin_std": 81.45700073242188, + "step": 130 + }, + { + "KL/chosen_KL_mean": -36.80657196044922, + "KL/mean": -70.24964904785156, + "KL/rejected_KL_mean": -103.69271850585938, + "KL/std": 66.65603637695312, + "epoch": 0.19236417033773862, + "fcm_dpo/beta": 0.008619595319032669, + "fcm_dpo/delta": -0.18765899538993835, + "fcm_dpo/margin": 66.88614654541016, + "fcm_dpo/q_t": 0.3690122663974762, + "grad_norm": 28.770198822021484, + "learning_rate": 4.878433179298909e-07, + "logits/chosen": -0.498441219329834, + "logits/rejected": -0.5051707625389099, + "logps/chosen": -81.73116302490234, + "logps/ref_chosen": -44.924591064453125, + "logps/ref_rejected": -88.44401550292969, + "logps/rejected": -192.13671875, + "loss": 0.9805, + "margin_dpo/margin_mean": 66.88614654541016, + "margin_dpo/margin_std": 73.10858154296875, + "step": 131 + }, + { + "KL/chosen_KL_mean": -48.89440155029297, + "KL/mean": -76.34346008300781, + "KL/rejected_KL_mean": -103.79251098632812, + "KL/std": 66.48584747314453, + "epoch": 0.19383259911894274, + "fcm_dpo/beta": 0.00844726525247097, + "fcm_dpo/delta": -0.06705770641565323, + "fcm_dpo/margin": 54.898109436035156, + "fcm_dpo/q_t": 0.40113556385040283, + "grad_norm": 19.942279815673828, + "learning_rate": 4.874448683603694e-07, + "logits/chosen": -0.48032820224761963, + "logits/rejected": -0.47630518674850464, + "logps/chosen": -107.89549255371094, + "logps/ref_chosen": -59.00108337402344, + "logps/ref_rejected": -87.89215087890625, + "logps/rejected": -191.68466186523438, + "loss": 1.0886, + "margin_dpo/margin_mean": 54.89811325073242, + "margin_dpo/margin_std": 86.42204284667969, + "step": 132 + }, + { + "KL/chosen_KL_mean": -57.482398986816406, + "KL/mean": -81.73624420166016, + "KL/rejected_KL_mean": -105.99009704589844, + "KL/std": 59.815887451171875, + "epoch": 0.19530102790014683, + "fcm_dpo/beta": 0.008408504538238049, + "fcm_dpo/delta": -0.00830613262951374, + "fcm_dpo/margin": 48.5077018737793, + "fcm_dpo/q_t": 0.41032248735427856, + "grad_norm": 27.46077537536621, + "learning_rate": 4.870401618977415e-07, + "logits/chosen": -0.5145904421806335, + "logits/rejected": -0.5010430812835693, + "logps/chosen": -124.08689880371094, + "logps/ref_chosen": -66.60449981689453, + "logps/ref_rejected": -96.33355712890625, + "logps/rejected": -202.32366943359375, + "loss": 1.11, + "margin_dpo/margin_mean": 48.50770568847656, + "margin_dpo/margin_std": 75.81759643554688, + "step": 133 + }, + { + "KL/chosen_KL_mean": -45.88512420654297, + "KL/mean": -71.80236053466797, + "KL/rejected_KL_mean": -97.7196044921875, + "KL/std": 58.62601852416992, + "epoch": 0.19676945668135096, + "fcm_dpo/beta": 0.008402526378631592, + "fcm_dpo/delta": -0.037258490920066833, + "fcm_dpo/margin": 51.83448028564453, + "fcm_dpo/q_t": 0.4022940993309021, + "grad_norm": 18.916580200195312, + "learning_rate": 4.866292092063986e-07, + "logits/chosen": -0.4667087197303772, + "logits/rejected": -0.4526156187057495, + "logps/chosen": -97.95437622070312, + "logps/ref_chosen": -52.06925582885742, + "logps/ref_rejected": -87.6545181274414, + "logps/rejected": -185.37411499023438, + "loss": 1.0693, + "margin_dpo/margin_mean": 51.83448028564453, + "margin_dpo/margin_std": 67.91160583496094, + "step": 134 + }, + { + "KL/chosen_KL_mean": -50.227142333984375, + "KL/mean": -87.61117553710938, + "KL/rejected_KL_mean": -124.99522399902344, + "KL/std": 77.6145248413086, + "epoch": 0.19823788546255505, + "fcm_dpo/beta": 0.008090103045105934, + "fcm_dpo/delta": -0.21815121173858643, + "fcm_dpo/margin": 74.76806640625, + "fcm_dpo/q_t": 0.3705596625804901, + "grad_norm": 22.08445167541504, + "learning_rate": 4.862120211153265e-07, + "logits/chosen": -0.48219579458236694, + "logits/rejected": -0.5165150165557861, + "logps/chosen": -100.58100128173828, + "logps/ref_chosen": -50.353858947753906, + "logps/ref_rejected": -115.97975158691406, + "logps/rejected": -240.9749755859375, + "loss": 0.9939, + "margin_dpo/margin_mean": 74.76806640625, + "margin_dpo/margin_std": 93.64501953125, + "step": 135 + }, + { + "KL/chosen_KL_mean": -59.928977966308594, + "KL/mean": -84.67167663574219, + "KL/rejected_KL_mean": -109.41439819335938, + "KL/std": 70.4333724975586, + "epoch": 0.19970631424375918, + "fcm_dpo/beta": 0.007961141876876354, + "fcm_dpo/delta": 0.005593650043010712, + "fcm_dpo/margin": 49.48542404174805, + "fcm_dpo/q_t": 0.41935813426971436, + "grad_norm": 20.334075927734375, + "learning_rate": 4.857886086178193e-07, + "logits/chosen": -0.481515109539032, + "logits/rejected": -0.4732978343963623, + "logps/chosen": -125.0014877319336, + "logps/ref_chosen": -65.072509765625, + "logps/ref_rejected": -96.32122802734375, + "logps/rejected": -205.73562622070312, + "loss": 1.1443, + "margin_dpo/margin_mean": 49.48542022705078, + "margin_dpo/margin_std": 90.12128448486328, + "step": 136 + }, + { + "KL/chosen_KL_mean": -57.55767822265625, + "KL/mean": -96.53668212890625, + "KL/rejected_KL_mean": -135.51568603515625, + "KL/std": 93.2874984741211, + "epoch": 0.2011747430249633, + "fcm_dpo/beta": 0.00774747971445322, + "fcm_dpo/delta": -0.21738505363464355, + "fcm_dpo/margin": 77.95802307128906, + "fcm_dpo/q_t": 0.3767836093902588, + "grad_norm": 19.739362716674805, + "learning_rate": 4.853589828711902e-07, + "logits/chosen": -0.42566192150115967, + "logits/rejected": -0.4528757333755493, + "logps/chosen": -106.31678771972656, + "logps/ref_chosen": -48.759117126464844, + "logps/ref_rejected": -113.86376953125, + "logps/rejected": -249.37945556640625, + "loss": 1.0251, + "margin_dpo/margin_mean": 77.95802307128906, + "margin_dpo/margin_std": 111.74757385253906, + "step": 137 + }, + { + "KL/chosen_KL_mean": -59.95167922973633, + "KL/mean": -89.62110137939453, + "KL/rejected_KL_mean": -119.29052734375, + "KL/std": 71.20696258544922, + "epoch": 0.2026431718061674, + "fcm_dpo/beta": 0.007621297147125006, + "fcm_dpo/delta": -0.054680272936820984, + "fcm_dpo/margin": 59.33884811401367, + "fcm_dpo/q_t": 0.3964976966381073, + "grad_norm": 21.295473098754883, + "learning_rate": 4.849231551964771e-07, + "logits/chosen": -0.4297477602958679, + "logits/rejected": -0.41819727420806885, + "logps/chosen": -120.47132873535156, + "logps/ref_chosen": -60.519649505615234, + "logps/ref_rejected": -93.19694519042969, + "logps/rejected": -212.4874725341797, + "loss": 1.0552, + "margin_dpo/margin_mean": 59.33884811401367, + "margin_dpo/margin_std": 72.82606506347656, + "step": 138 + }, + { + "KL/chosen_KL_mean": -50.222434997558594, + "KL/mean": -85.49241638183594, + "KL/rejected_KL_mean": -120.76240539550781, + "KL/std": 67.446044921875, + "epoch": 0.20411160058737152, + "fcm_dpo/beta": 0.007455192506313324, + "fcm_dpo/delta": -0.13268427550792694, + "fcm_dpo/margin": 70.53996276855469, + "fcm_dpo/q_t": 0.38338446617126465, + "grad_norm": 18.57466697692871, + "learning_rate": 4.844811370781446e-07, + "logits/chosen": -0.44361281394958496, + "logits/rejected": -0.4341086149215698, + "logps/chosen": -97.11381530761719, + "logps/ref_chosen": -46.89138412475586, + "logps/ref_rejected": -79.72798156738281, + "logps/rejected": -200.49038696289062, + "loss": 1.0198, + "margin_dpo/margin_mean": 70.53996276855469, + "margin_dpo/margin_std": 87.36215209960938, + "step": 139 + }, + { + "KL/chosen_KL_mean": -60.796875, + "KL/mean": -92.86795806884766, + "KL/rejected_KL_mean": -124.93904113769531, + "KL/std": 75.12921905517578, + "epoch": 0.2055800293685756, + "fcm_dpo/beta": 0.007304832339286804, + "fcm_dpo/delta": -0.07196947187185287, + "fcm_dpo/margin": 64.14215850830078, + "fcm_dpo/q_t": 0.39591526985168457, + "grad_norm": 21.425811767578125, + "learning_rate": 4.840329401637809e-07, + "logits/chosen": -0.4486401081085205, + "logits/rejected": -0.4356744587421417, + "logps/chosen": -119.77159118652344, + "logps/ref_chosen": -58.97471618652344, + "logps/ref_rejected": -83.28410339355469, + "logps/rejected": -208.22314453125, + "loss": 1.0689, + "margin_dpo/margin_mean": 64.14215850830078, + "margin_dpo/margin_std": 90.35142517089844, + "step": 140 + }, + { + "KL/chosen_KL_mean": -69.00209045410156, + "KL/mean": -99.4312744140625, + "KL/rejected_KL_mean": -129.86044311523438, + "KL/std": 83.01104736328125, + "epoch": 0.20704845814977973, + "fcm_dpo/beta": 0.007242698222398758, + "fcm_dpo/delta": -0.04263737052679062, + "fcm_dpo/margin": 60.85835266113281, + "fcm_dpo/q_t": 0.4011520743370056, + "grad_norm": 27.03215980529785, + "learning_rate": 4.83578576263792e-07, + "logits/chosen": -0.4251963496208191, + "logits/rejected": -0.41217079758644104, + "logps/chosen": -144.0777587890625, + "logps/ref_chosen": -75.07566833496094, + "logps/ref_rejected": -98.1922607421875, + "logps/rejected": -228.05270385742188, + "loss": 1.1024, + "margin_dpo/margin_mean": 60.85835266113281, + "margin_dpo/margin_std": 95.99069213867188, + "step": 141 + }, + { + "KL/chosen_KL_mean": -69.8545150756836, + "KL/mean": -104.672607421875, + "KL/rejected_KL_mean": -139.49070739746094, + "KL/std": 90.1983642578125, + "epoch": 0.20851688693098386, + "fcm_dpo/beta": 0.007142849266529083, + "fcm_dpo/delta": -0.10244297236204147, + "fcm_dpo/margin": 69.63619995117188, + "fcm_dpo/q_t": 0.3925698399543762, + "grad_norm": 26.979690551757812, + "learning_rate": 4.83118057351089e-07, + "logits/chosen": -0.40249842405319214, + "logits/rejected": -0.40109604597091675, + "logps/chosen": -127.8824462890625, + "logps/ref_chosen": -58.027931213378906, + "logps/ref_rejected": -94.58222961425781, + "logps/rejected": -234.07293701171875, + "loss": 1.0826, + "margin_dpo/margin_mean": 69.63619995117188, + "margin_dpo/margin_std": 106.22422790527344, + "step": 142 + }, + { + "KL/chosen_KL_mean": -74.11309051513672, + "KL/mean": -96.2786865234375, + "KL/rejected_KL_mean": -118.44427490234375, + "KL/std": 79.39483642578125, + "epoch": 0.20998531571218795, + "fcm_dpo/beta": 0.007136983796954155, + "fcm_dpo/delta": 0.08635500073432922, + "fcm_dpo/margin": 44.3311882019043, + "fcm_dpo/q_t": 0.4322276711463928, + "grad_norm": 23.613080978393555, + "learning_rate": 4.826513955607734e-07, + "logits/chosen": -0.38653671741485596, + "logits/rejected": -0.3791394829750061, + "logps/chosen": -131.70953369140625, + "logps/ref_chosen": -57.59645080566406, + "logps/ref_rejected": -78.99957275390625, + "logps/rejected": -197.44384765625, + "loss": 1.1961, + "margin_dpo/margin_mean": 44.3311882019043, + "margin_dpo/margin_std": 92.59246826171875, + "step": 143 + }, + { + "KL/chosen_KL_mean": -65.83393859863281, + "KL/mean": -93.10847473144531, + "KL/rejected_KL_mean": -120.38301086425781, + "KL/std": 67.2380142211914, + "epoch": 0.21145374449339208, + "fcm_dpo/beta": 0.007198760285973549, + "fcm_dpo/delta": 0.007602264638990164, + "fcm_dpo/margin": 54.54907989501953, + "fcm_dpo/q_t": 0.4110908508300781, + "grad_norm": 21.00301170349121, + "learning_rate": 4.821786031898176e-07, + "logits/chosen": -0.38446202874183655, + "logits/rejected": -0.368974506855011, + "logps/chosen": -125.74029541015625, + "logps/ref_chosen": -59.90636444091797, + "logps/ref_rejected": -82.00025939941406, + "logps/rejected": -202.38327026367188, + "loss": 1.1073, + "margin_dpo/margin_mean": 54.54907989501953, + "margin_dpo/margin_std": 79.00935363769531, + "step": 144 + }, + { + "KL/chosen_KL_mean": -62.50547409057617, + "KL/mean": -91.89143371582031, + "KL/rejected_KL_mean": -121.27738952636719, + "KL/std": 67.18325805664062, + "epoch": 0.21292217327459617, + "fcm_dpo/beta": 0.007178094238042831, + "fcm_dpo/delta": -0.02286495827138424, + "fcm_dpo/margin": 58.771915435791016, + "fcm_dpo/q_t": 0.40436333417892456, + "grad_norm": 23.93907356262207, + "learning_rate": 4.816996926967401e-07, + "logits/chosen": -0.43855080008506775, + "logits/rejected": -0.42247945070266724, + "logps/chosen": -119.10614013671875, + "logps/ref_chosen": -56.60066604614258, + "logps/ref_rejected": -77.86631774902344, + "logps/rejected": -199.14370727539062, + "loss": 1.0876, + "margin_dpo/margin_mean": 58.771915435791016, + "margin_dpo/margin_std": 81.85527038574219, + "step": 145 + }, + { + "KL/chosen_KL_mean": -85.47122192382812, + "KL/mean": -108.09506225585938, + "KL/rejected_KL_mean": -130.71890258789062, + "KL/std": 72.80694580078125, + "epoch": 0.2143906020558003, + "fcm_dpo/beta": 0.007236181758344173, + "fcm_dpo/delta": 0.07508739829063416, + "fcm_dpo/margin": 45.2476806640625, + "fcm_dpo/q_t": 0.4256265461444855, + "grad_norm": 26.226016998291016, + "learning_rate": 4.812146767012779e-07, + "logits/chosen": -0.3779621720314026, + "logits/rejected": -0.34679633378982544, + "logps/chosen": -151.4716796875, + "logps/ref_chosen": -66.00045013427734, + "logps/ref_rejected": -81.70278930664062, + "logps/rejected": -212.42169189453125, + "loss": 1.183, + "margin_dpo/margin_mean": 45.2476806640625, + "margin_dpo/margin_std": 87.81689453125, + "step": 146 + }, + { + "KL/chosen_KL_mean": -62.50874328613281, + "KL/mean": -93.20917510986328, + "KL/rejected_KL_mean": -123.90959930419922, + "KL/std": 73.37457275390625, + "epoch": 0.21585903083700442, + "fcm_dpo/beta": 0.007220801897346973, + "fcm_dpo/delta": -0.045540180057287216, + "fcm_dpo/margin": 61.40085983276367, + "fcm_dpo/q_t": 0.4018627405166626, + "grad_norm": 19.40831184387207, + "learning_rate": 4.807235679840536e-07, + "logits/chosen": -0.44671040773391724, + "logits/rejected": -0.42650818824768066, + "logps/chosen": -115.91423034667969, + "logps/ref_chosen": -53.405487060546875, + "logps/ref_rejected": -71.39060974121094, + "logps/rejected": -195.3002166748047, + "loss": 1.0886, + "margin_dpo/margin_mean": 61.40085983276367, + "margin_dpo/margin_std": 90.92599487304688, + "step": 147 + }, + { + "KL/chosen_KL_mean": -61.455265045166016, + "KL/mean": -87.85798645019531, + "KL/rejected_KL_mean": -114.26071166992188, + "KL/std": 73.19024658203125, + "epoch": 0.2173274596182085, + "fcm_dpo/beta": 0.007157785817980766, + "fcm_dpo/delta": -0.0840882733464241, + "fcm_dpo/margin": 52.805450439453125, + "fcm_dpo/q_t": 0.4160994589328766, + "grad_norm": 19.22397804260254, + "learning_rate": 4.802263794862384e-07, + "logits/chosen": -0.47541412711143494, + "logits/rejected": -0.46777063608169556, + "logps/chosen": -126.39234924316406, + "logps/ref_chosen": -64.93708038330078, + "logps/ref_rejected": -103.09384155273438, + "logps/rejected": -217.35455322265625, + "loss": 1.1233, + "margin_dpo/margin_mean": 52.80545425415039, + "margin_dpo/margin_std": 77.12681579589844, + "step": 148 + }, + { + "KL/chosen_KL_mean": -59.069393157958984, + "KL/mean": -91.8096694946289, + "KL/rejected_KL_mean": -124.54994201660156, + "KL/std": 66.26725006103516, + "epoch": 0.21879588839941264, + "fcm_dpo/beta": 0.00697628129273653, + "fcm_dpo/delta": -0.06137773394584656, + "fcm_dpo/margin": 65.48056030273438, + "fcm_dpo/q_t": 0.3954726457595825, + "grad_norm": 18.177705764770508, + "learning_rate": 4.797231243092118e-07, + "logits/chosen": -0.4999982714653015, + "logits/rejected": -0.48564597964286804, + "logps/chosen": -117.54315185546875, + "logps/ref_chosen": -58.47376251220703, + "logps/ref_rejected": -99.31474304199219, + "logps/rejected": -223.86468505859375, + "loss": 1.0551, + "margin_dpo/margin_mean": 65.48056030273438, + "margin_dpo/margin_std": 78.40389251708984, + "step": 149 + }, + { + "KL/chosen_KL_mean": -52.26180648803711, + "KL/mean": -84.42105865478516, + "KL/rejected_KL_mean": -116.58030700683594, + "KL/std": 78.1861343383789, + "epoch": 0.22026431718061673, + "fcm_dpo/beta": 0.006911845877766609, + "fcm_dpo/delta": -0.04800789803266525, + "fcm_dpo/margin": 64.3185043334961, + "fcm_dpo/q_t": 0.40420806407928467, + "grad_norm": 18.062509536743164, + "learning_rate": 4.792138157142157e-07, + "logits/chosen": -0.46106863021850586, + "logits/rejected": -0.4648742079734802, + "logps/chosen": -97.96762084960938, + "logps/ref_chosen": -45.705810546875, + "logps/ref_rejected": -83.34759521484375, + "logps/rejected": -199.9279022216797, + "loss": 1.0812, + "margin_dpo/margin_mean": 64.3185043334961, + "margin_dpo/margin_std": 93.56321716308594, + "step": 150 + }, + { + "KL/chosen_KL_mean": -65.14360046386719, + "KL/mean": -97.421142578125, + "KL/rejected_KL_mean": -129.69869995117188, + "KL/std": 73.59419250488281, + "epoch": 0.22173274596182085, + "fcm_dpo/beta": 0.006893502548336983, + "fcm_dpo/delta": -0.047122225165367126, + "fcm_dpo/margin": 64.55509185791016, + "fcm_dpo/q_t": 0.3980643153190613, + "grad_norm": 20.905559539794922, + "learning_rate": 4.786984671220053e-07, + "logits/chosen": -0.541815996170044, + "logits/rejected": -0.5158591866493225, + "logps/chosen": -135.7144317626953, + "logps/ref_chosen": -70.57083129882812, + "logps/ref_rejected": -100.46382141113281, + "logps/rejected": -230.16250610351562, + "loss": 1.061, + "margin_dpo/margin_mean": 64.55509185791016, + "margin_dpo/margin_std": 80.68389892578125, + "step": 151 + }, + { + "KL/chosen_KL_mean": -57.33415603637695, + "KL/mean": -96.5593032836914, + "KL/rejected_KL_mean": -135.78445434570312, + "KL/std": 76.12808990478516, + "epoch": 0.22320117474302498, + "fcm_dpo/beta": 0.006756227929145098, + "fcm_dpo/delta": -0.13709712028503418, + "fcm_dpo/margin": 78.45030212402344, + "fcm_dpo/q_t": 0.38215482234954834, + "grad_norm": 19.958600997924805, + "learning_rate": 4.78177092112495e-07, + "logits/chosen": -0.48123008012771606, + "logits/rejected": -0.47946709394454956, + "logps/chosen": -117.49854278564453, + "logps/ref_chosen": -60.16438674926758, + "logps/ref_rejected": -106.14045715332031, + "logps/rejected": -241.92491149902344, + "loss": 1.0171, + "margin_dpo/margin_mean": 78.45030212402344, + "margin_dpo/margin_std": 93.28910827636719, + "step": 152 + }, + { + "KL/chosen_KL_mean": -57.76191711425781, + "KL/mean": -91.28083801269531, + "KL/rejected_KL_mean": -124.79976654052734, + "KL/std": 82.61441040039062, + "epoch": 0.22466960352422907, + "fcm_dpo/beta": 0.006657836027443409, + "fcm_dpo/delta": -0.04847495257854462, + "fcm_dpo/margin": 67.03783416748047, + "fcm_dpo/q_t": 0.403054416179657, + "grad_norm": 15.512747764587402, + "learning_rate": 4.776497044244016e-07, + "logits/chosen": -0.48859214782714844, + "logits/rejected": -0.48414355516433716, + "logps/chosen": -114.07719421386719, + "logps/ref_chosen": -56.315277099609375, + "logps/ref_rejected": -85.65583801269531, + "logps/rejected": -210.45559692382812, + "loss": 1.0887, + "margin_dpo/margin_mean": 67.037841796875, + "margin_dpo/margin_std": 101.445068359375, + "step": 153 + }, + { + "KL/chosen_KL_mean": -69.54912567138672, + "KL/mean": -101.59078979492188, + "KL/rejected_KL_mean": -133.6324462890625, + "KL/std": 83.2306137084961, + "epoch": 0.2261380323054332, + "fcm_dpo/beta": 0.006629183888435364, + "fcm_dpo/delta": -0.026146577671170235, + "fcm_dpo/margin": 64.08331298828125, + "fcm_dpo/q_t": 0.4066181182861328, + "grad_norm": 19.13957977294922, + "learning_rate": 4.771163179548808e-07, + "logits/chosen": -0.4480747580528259, + "logits/rejected": -0.4504152834415436, + "logps/chosen": -132.29168701171875, + "logps/ref_chosen": -62.74256896972656, + "logps/ref_rejected": -104.24420166015625, + "logps/rejected": -237.87664794921875, + "loss": 1.1201, + "margin_dpo/margin_mean": 64.08331298828125, + "margin_dpo/margin_std": 105.86033630371094, + "step": 154 + }, + { + "KL/chosen_KL_mean": -63.659236907958984, + "KL/mean": -95.98409271240234, + "KL/rejected_KL_mean": -128.3089599609375, + "KL/std": 76.96090698242188, + "epoch": 0.2276064610866373, + "fcm_dpo/beta": 0.006584943272173405, + "fcm_dpo/delta": -0.026932524517178535, + "fcm_dpo/margin": 64.64971923828125, + "fcm_dpo/q_t": 0.4042484164237976, + "grad_norm": 19.22228240966797, + "learning_rate": 4.7657694675916247e-07, + "logits/chosen": -0.49922215938568115, + "logits/rejected": -0.4822632670402527, + "logps/chosen": -124.31242370605469, + "logps/ref_chosen": -60.65318298339844, + "logps/ref_rejected": -77.49220275878906, + "logps/rejected": -205.80116271972656, + "loss": 1.0942, + "margin_dpo/margin_mean": 64.64971923828125, + "margin_dpo/margin_std": 95.12773132324219, + "step": 155 + }, + { + "KL/chosen_KL_mean": -89.18508911132812, + "KL/mean": -107.76153564453125, + "KL/rejected_KL_mean": -126.33798217773438, + "KL/std": 82.31591796875, + "epoch": 0.2290748898678414, + "fcm_dpo/beta": 0.006626888178288937, + "fcm_dpo/delta": 0.05205275118350983, + "fcm_dpo/margin": 37.15288543701172, + "fcm_dpo/q_t": 0.44431304931640625, + "grad_norm": 29.14635467529297, + "learning_rate": 4.7603160505017893e-07, + "logits/chosen": -0.4069097638130188, + "logits/rejected": -0.3994802236557007, + "logps/chosen": -158.67697143554688, + "logps/ref_chosen": -69.49188232421875, + "logps/ref_rejected": -77.16929626464844, + "logps/rejected": -203.50726318359375, + "loss": 1.2754, + "margin_dpo/margin_mean": 37.15288543701172, + "margin_dpo/margin_std": 108.07014465332031, + "step": 156 + }, + { + "KL/chosen_KL_mean": -80.44374084472656, + "KL/mean": -121.49053192138672, + "KL/rejected_KL_mean": -162.53732299804688, + "KL/std": 89.35894775390625, + "epoch": 0.2305433186490455, + "fcm_dpo/beta": 0.006456049624830484, + "fcm_dpo/delta": -0.13866297900676727, + "fcm_dpo/margin": 82.09356689453125, + "fcm_dpo/q_t": 0.37898433208465576, + "grad_norm": 23.95264434814453, + "learning_rate": 4.7548030719819154e-07, + "logits/chosen": -0.3959600329399109, + "logits/rejected": -0.4036720395088196, + "logps/chosen": -141.8121795654297, + "logps/ref_chosen": -61.368438720703125, + "logps/ref_rejected": -107.64636993408203, + "logps/rejected": -270.1837158203125, + "loss": 1.0292, + "margin_dpo/margin_mean": 82.09357452392578, + "margin_dpo/margin_std": 101.68392944335938, + "step": 157 + }, + { + "KL/chosen_KL_mean": -80.78021240234375, + "KL/mean": -124.482666015625, + "KL/rejected_KL_mean": -168.18511962890625, + "KL/std": 110.5858154296875, + "epoch": 0.23201174743024963, + "fcm_dpo/beta": 0.006296713836491108, + "fcm_dpo/delta": -0.15908576548099518, + "fcm_dpo/margin": 87.40489959716797, + "fcm_dpo/q_t": 0.3867127597332001, + "grad_norm": 19.55266761779785, + "learning_rate": 4.7492306773041136e-07, + "logits/chosen": -0.33809971809387207, + "logits/rejected": -0.35502055287361145, + "logps/chosen": -138.39312744140625, + "logps/ref_chosen": -57.612918853759766, + "logps/ref_rejected": -113.6946792602539, + "logps/rejected": -281.8797912597656, + "loss": 1.0552, + "margin_dpo/margin_mean": 87.4049072265625, + "margin_dpo/margin_std": 133.116943359375, + "step": 158 + }, + { + "KL/chosen_KL_mean": -90.86648559570312, + "KL/mean": -120.76834106445312, + "KL/rejected_KL_mean": -150.67018127441406, + "KL/std": 97.11602783203125, + "epoch": 0.23348017621145375, + "fcm_dpo/beta": 0.006294050253927708, + "fcm_dpo/delta": 0.02400752529501915, + "fcm_dpo/margin": 59.80369567871094, + "fcm_dpo/q_t": 0.41609764099121094, + "grad_norm": 22.071809768676758, + "learning_rate": 4.743599013306165e-07, + "logits/chosen": -0.4063527286052704, + "logits/rejected": -0.37675607204437256, + "logps/chosen": -172.4268341064453, + "logps/ref_chosen": -81.56034851074219, + "logps/ref_rejected": -88.89871215820312, + "logps/rejected": -239.5688934326172, + "loss": 1.1473, + "margin_dpo/margin_mean": 59.80369567871094, + "margin_dpo/margin_std": 104.75639343261719, + "step": 159 + }, + { + "KL/chosen_KL_mean": -93.1180419921875, + "KL/mean": -133.37435913085938, + "KL/rejected_KL_mean": -173.6306610107422, + "KL/std": 104.18497467041016, + "epoch": 0.23494860499265785, + "fcm_dpo/beta": 0.006151704117655754, + "fcm_dpo/delta": -0.10115846991539001, + "fcm_dpo/margin": 80.51261901855469, + "fcm_dpo/q_t": 0.3962337076663971, + "grad_norm": 23.225406646728516, + "learning_rate": 4.737908228387656e-07, + "logits/chosen": -0.3705775737762451, + "logits/rejected": -0.362305611371994, + "logps/chosen": -158.8489227294922, + "logps/ref_chosen": -65.73088073730469, + "logps/ref_rejected": -97.21781921386719, + "logps/rejected": -270.8484802246094, + "loss": 1.0912, + "margin_dpo/margin_mean": 80.51261901855469, + "margin_dpo/margin_std": 130.29788208007812, + "step": 160 + }, + { + "KL/chosen_KL_mean": -79.38202667236328, + "KL/mean": -114.66146850585938, + "KL/rejected_KL_mean": -149.94090270996094, + "KL/std": 82.19270324707031, + "epoch": 0.23641703377386197, + "fcm_dpo/beta": 0.00611657090485096, + "fcm_dpo/delta": -0.03301185369491577, + "fcm_dpo/margin": 70.55889129638672, + "fcm_dpo/q_t": 0.4046275019645691, + "grad_norm": 21.588083267211914, + "learning_rate": 4.7321584725060594e-07, + "logits/chosen": -0.3816624879837036, + "logits/rejected": -0.3820039629936218, + "logps/chosen": -131.81849670410156, + "logps/ref_chosen": -52.43647003173828, + "logps/ref_rejected": -83.43095397949219, + "logps/rejected": -233.37185668945312, + "loss": 1.0935, + "margin_dpo/margin_mean": 70.55888366699219, + "margin_dpo/margin_std": 104.3523941040039, + "step": 161 + }, + { + "KL/chosen_KL_mean": -76.32833862304688, + "KL/mean": -111.31201171875, + "KL/rejected_KL_mean": -146.29568481445312, + "KL/std": 91.16246032714844, + "epoch": 0.23788546255506607, + "fcm_dpo/beta": 0.0060338219627738, + "fcm_dpo/delta": -0.02483561635017395, + "fcm_dpo/margin": 69.96736145019531, + "fcm_dpo/q_t": 0.407100111246109, + "grad_norm": 21.74049186706543, + "learning_rate": 4.7263498971727905e-07, + "logits/chosen": -0.4195418953895569, + "logits/rejected": -0.4026295840740204, + "logps/chosen": -138.9389190673828, + "logps/ref_chosen": -62.6105842590332, + "logps/ref_rejected": -89.39057922363281, + "logps/rejected": -235.686279296875, + "loss": 1.109, + "margin_dpo/margin_mean": 69.96736145019531, + "margin_dpo/margin_std": 108.229248046875, + "step": 162 + }, + { + "KL/chosen_KL_mean": -85.92386627197266, + "KL/mean": -120.09257507324219, + "KL/rejected_KL_mean": -154.26129150390625, + "KL/std": 91.9381103515625, + "epoch": 0.2393538913362702, + "fcm_dpo/beta": 0.006065480876713991, + "fcm_dpo/delta": -0.015123652294278145, + "fcm_dpo/margin": 68.33741760253906, + "fcm_dpo/q_t": 0.4095137119293213, + "grad_norm": 21.437828063964844, + "learning_rate": 4.720482655449212e-07, + "logits/chosen": -0.3672639727592468, + "logits/rejected": -0.3495738208293915, + "logps/chosen": -140.94549560546875, + "logps/ref_chosen": -55.021629333496094, + "logps/ref_rejected": -75.418212890625, + "logps/rejected": -229.67950439453125, + "loss": 1.1152, + "margin_dpo/margin_mean": 68.33741760253906, + "margin_dpo/margin_std": 110.58999633789062, + "step": 163 + }, + { + "KL/chosen_KL_mean": -77.3841323852539, + "KL/mean": -119.36503601074219, + "KL/rejected_KL_mean": -161.34591674804688, + "KL/std": 89.17874908447266, + "epoch": 0.24082232011747431, + "fcm_dpo/beta": 0.0059239305555820465, + "fcm_dpo/delta": -0.10423934459686279, + "fcm_dpo/margin": 83.9617919921875, + "fcm_dpo/q_t": 0.3878824710845947, + "grad_norm": 21.113449096679688, + "learning_rate": 4.714556901942599e-07, + "logits/chosen": -0.3516240119934082, + "logits/rejected": -0.33663517236709595, + "logps/chosen": -133.02481079101562, + "logps/ref_chosen": -55.64066696166992, + "logps/ref_rejected": -79.66463470458984, + "logps/rejected": -241.01055908203125, + "loss": 1.035, + "margin_dpo/margin_mean": 83.9617919921875, + "margin_dpo/margin_std": 102.90313720703125, + "step": 164 + }, + { + "KL/chosen_KL_mean": -83.97947692871094, + "KL/mean": -110.91374969482422, + "KL/rejected_KL_mean": -137.8480224609375, + "KL/std": 75.39066314697266, + "epoch": 0.2422907488986784, + "fcm_dpo/beta": 0.005989417899399996, + "fcm_dpo/delta": 0.08000632375478745, + "fcm_dpo/margin": 53.86854553222656, + "fcm_dpo/q_t": 0.42767125368118286, + "grad_norm": 23.085264205932617, + "learning_rate": 4.708572792802069e-07, + "logits/chosen": -0.3876940608024597, + "logits/rejected": -0.36072492599487305, + "logps/chosen": -145.2901611328125, + "logps/ref_chosen": -61.310691833496094, + "logps/ref_rejected": -73.67060852050781, + "logps/rejected": -211.51861572265625, + "loss": 1.1749, + "margin_dpo/margin_mean": 53.86854553222656, + "margin_dpo/margin_std": 100.26142883300781, + "step": 165 + }, + { + "KL/chosen_KL_mean": -73.80902099609375, + "KL/mean": -123.69305419921875, + "KL/rejected_KL_mean": -173.57708740234375, + "KL/std": 109.76763916015625, + "epoch": 0.24375917767988253, + "fcm_dpo/beta": 0.0058315591886639595, + "fcm_dpo/delta": -0.19435712695121765, + "fcm_dpo/margin": 99.76808166503906, + "fcm_dpo/q_t": 0.3807521462440491, + "grad_norm": 17.283048629760742, + "learning_rate": 4.702530485714461e-07, + "logits/chosen": -0.36310431361198425, + "logits/rejected": -0.37374886870384216, + "logps/chosen": -124.79261779785156, + "logps/ref_chosen": -50.98360061645508, + "logps/ref_rejected": -98.09512329101562, + "logps/rejected": -271.6722106933594, + "loss": 1.0185, + "margin_dpo/margin_mean": 99.76808166503906, + "margin_dpo/margin_std": 138.61410522460938, + "step": 166 + }, + { + "KL/chosen_KL_mean": -75.02628326416016, + "KL/mean": -127.5509033203125, + "KL/rejected_KL_mean": -180.07553100585938, + "KL/std": 100.14985656738281, + "epoch": 0.24522760646108663, + "fcm_dpo/beta": 0.005625586491078138, + "fcm_dpo/delta": -0.20304620265960693, + "fcm_dpo/margin": 105.04924011230469, + "fcm_dpo/q_t": 0.36813193559646606, + "grad_norm": 21.618406295776367, + "learning_rate": 4.6964301399001877e-07, + "logits/chosen": -0.3545036017894745, + "logits/rejected": -0.35761505365371704, + "logps/chosen": -125.45037841796875, + "logps/ref_chosen": -50.424095153808594, + "logps/ref_rejected": -96.03042602539062, + "logps/rejected": -276.10595703125, + "loss": 0.9744, + "margin_dpo/margin_mean": 105.04924011230469, + "margin_dpo/margin_std": 115.94286346435547, + "step": 167 + }, + { + "KL/chosen_KL_mean": -81.31709289550781, + "KL/mean": -120.00129699707031, + "KL/rejected_KL_mean": -158.6855010986328, + "KL/std": 93.61595153808594, + "epoch": 0.24669603524229075, + "fcm_dpo/beta": 0.0055332607589662075, + "fcm_dpo/delta": -0.029504312202334404, + "fcm_dpo/margin": 77.36842346191406, + "fcm_dpo/q_t": 0.40392887592315674, + "grad_norm": 19.52683448791504, + "learning_rate": 4.690271916109034e-07, + "logits/chosen": -0.349258691072464, + "logits/rejected": -0.339669793844223, + "logps/chosen": -130.7799072265625, + "logps/ref_chosen": -49.462825775146484, + "logps/ref_rejected": -75.30855560302734, + "logps/rejected": -233.99404907226562, + "loss": 1.079, + "margin_dpo/margin_mean": 77.36842346191406, + "margin_dpo/margin_std": 104.69574737548828, + "step": 168 + }, + { + "KL/chosen_KL_mean": -83.92520904541016, + "KL/mean": -117.55094909667969, + "KL/rejected_KL_mean": -151.1767120361328, + "KL/std": 92.2286605834961, + "epoch": 0.24816446402349487, + "fcm_dpo/beta": 0.005457356106489897, + "fcm_dpo/delta": -0.07184266299009323, + "fcm_dpo/margin": 67.25149536132812, + "fcm_dpo/q_t": 0.420589804649353, + "grad_norm": 20.000539779663086, + "learning_rate": 4.6840559766159235e-07, + "logits/chosen": -0.3642885386943817, + "logits/rejected": -0.34793075919151306, + "logps/chosen": -143.72865295410156, + "logps/ref_chosen": -59.803443908691406, + "logps/ref_rejected": -83.34574890136719, + "logps/rejected": -234.5224609375, + "loss": 1.162, + "margin_dpo/margin_mean": 67.25149536132812, + "margin_dpo/margin_std": 126.57770538330078, + "step": 169 + }, + { + "KL/chosen_KL_mean": -74.95732116699219, + "KL/mean": -114.7613296508789, + "KL/rejected_KL_mean": -154.56533813476562, + "KL/std": 88.03938293457031, + "epoch": 0.24963289280469897, + "fcm_dpo/beta": 0.005398896988481283, + "fcm_dpo/delta": -0.03206340968608856, + "fcm_dpo/margin": 79.6080093383789, + "fcm_dpo/q_t": 0.4014880359172821, + "grad_norm": 17.664331436157227, + "learning_rate": 4.6777824852166437e-07, + "logits/chosen": -0.298395574092865, + "logits/rejected": -0.2869154214859009, + "logps/chosen": -124.42909240722656, + "logps/ref_chosen": -49.471771240234375, + "logps/ref_rejected": -75.91734313964844, + "logps/rejected": -230.482666015625, + "loss": 1.0798, + "margin_dpo/margin_mean": 79.6080093383789, + "margin_dpo/margin_std": 105.32583618164062, + "step": 170 + }, + { + "KL/chosen_KL_mean": -110.30496215820312, + "KL/mean": -142.41079711914062, + "KL/rejected_KL_mean": -174.51663208007812, + "KL/std": 103.6309585571289, + "epoch": 0.2511013215859031, + "fcm_dpo/beta": 0.005459581036120653, + "fcm_dpo/delta": 0.051255661994218826, + "fcm_dpo/margin": 64.2116470336914, + "fcm_dpo/q_t": 0.4263428747653961, + "grad_norm": 28.27412223815918, + "learning_rate": 4.6714516072235273e-07, + "logits/chosen": -0.3548741340637207, + "logits/rejected": -0.3387761116027832, + "logps/chosen": -194.8042755126953, + "logps/ref_chosen": -84.49931335449219, + "logps/ref_rejected": -109.38209533691406, + "logps/rejected": -283.89874267578125, + "loss": 1.1816, + "margin_dpo/margin_mean": 64.2116470336914, + "margin_dpo/margin_std": 133.7387237548828, + "step": 171 + }, + { + "KL/chosen_KL_mean": -95.93238830566406, + "KL/mean": -130.29452514648438, + "KL/rejected_KL_mean": -164.65667724609375, + "KL/std": 100.20172882080078, + "epoch": 0.2525697503671072, + "fcm_dpo/beta": 0.005491352174431086, + "fcm_dpo/delta": 0.02349797450006008, + "fcm_dpo/margin": 68.72428131103516, + "fcm_dpo/q_t": 0.41575637459754944, + "grad_norm": 18.535226821899414, + "learning_rate": 4.6650635094610966e-07, + "logits/chosen": -0.3598722219467163, + "logits/rejected": -0.3389941453933716, + "logps/chosen": -164.5863037109375, + "logps/ref_chosen": -68.65391540527344, + "logps/ref_rejected": -85.43667602539062, + "logps/rejected": -250.0933380126953, + "loss": 1.132, + "margin_dpo/margin_mean": 68.72427368164062, + "margin_dpo/margin_std": 113.38480377197266, + "step": 172 + }, + { + "KL/chosen_KL_mean": -88.23031616210938, + "KL/mean": -122.02497100830078, + "KL/rejected_KL_mean": -155.81961059570312, + "KL/std": 92.91819763183594, + "epoch": 0.2540381791483113, + "fcm_dpo/beta": 0.005545733496546745, + "fcm_dpo/delta": 0.025776570662856102, + "fcm_dpo/margin": 67.58930969238281, + "fcm_dpo/q_t": 0.4147086441516876, + "grad_norm": 20.111751556396484, + "learning_rate": 4.6586183602616687e-07, + "logits/chosen": -0.3795207440853119, + "logits/rejected": -0.3491283059120178, + "logps/chosen": -151.28118896484375, + "logps/ref_chosen": -63.050880432128906, + "logps/ref_rejected": -78.68392181396484, + "logps/rejected": -234.5035400390625, + "loss": 1.1112, + "margin_dpo/margin_mean": 67.58930969238281, + "margin_dpo/margin_std": 95.4912109375, + "step": 173 + }, + { + "KL/chosen_KL_mean": -82.88648223876953, + "KL/mean": -122.82292175292969, + "KL/rejected_KL_mean": -162.75936889648438, + "KL/std": 97.02500915527344, + "epoch": 0.2555066079295154, + "fcm_dpo/beta": 0.005529084708541632, + "fcm_dpo/delta": -0.04399598762392998, + "fcm_dpo/margin": 79.87288665771484, + "fcm_dpo/q_t": 0.402817964553833, + "grad_norm": 28.604568481445312, + "learning_rate": 4.652116329460919e-07, + "logits/chosen": -0.30759066343307495, + "logits/rejected": -0.3249150216579437, + "logps/chosen": -136.24945068359375, + "logps/ref_chosen": -53.36296844482422, + "logps/ref_rejected": -101.91120910644531, + "logps/rejected": -264.67059326171875, + "loss": 1.0882, + "margin_dpo/margin_mean": 79.87288665771484, + "margin_dpo/margin_std": 115.7405014038086, + "step": 174 + }, + { + "KL/chosen_KL_mean": -76.46298217773438, + "KL/mean": -131.42095947265625, + "KL/rejected_KL_mean": -186.37893676757812, + "KL/std": 104.40403747558594, + "epoch": 0.25697503671071953, + "fcm_dpo/beta": 0.005318961106240749, + "fcm_dpo/delta": -0.1964312642812729, + "fcm_dpo/margin": 109.91595458984375, + "fcm_dpo/q_t": 0.36687812209129333, + "grad_norm": 29.169300079345703, + "learning_rate": 4.645557588393406e-07, + "logits/chosen": -0.32927554845809937, + "logits/rejected": -0.31611427664756775, + "logps/chosen": -121.88074493408203, + "logps/ref_chosen": -45.417762756347656, + "logps/ref_rejected": -89.50579833984375, + "logps/rejected": -275.884765625, + "loss": 0.9593, + "margin_dpo/margin_mean": 109.91596221923828, + "margin_dpo/margin_std": 109.20188903808594, + "step": 175 + }, + { + "KL/chosen_KL_mean": -81.6014404296875, + "KL/mean": -127.35952758789062, + "KL/rejected_KL_mean": -173.11761474609375, + "KL/std": 102.09504699707031, + "epoch": 0.25844346549192365, + "fcm_dpo/beta": 0.0052184974774718285, + "fcm_dpo/delta": -0.08138823509216309, + "fcm_dpo/margin": 91.51618957519531, + "fcm_dpo/q_t": 0.394927978515625, + "grad_norm": 20.01445770263672, + "learning_rate": 4.638942309888058e-07, + "logits/chosen": -0.28535836935043335, + "logits/rejected": -0.3025384843349457, + "logps/chosen": -132.0542755126953, + "logps/ref_chosen": -50.452842712402344, + "logps/ref_rejected": -95.5589599609375, + "logps/rejected": -268.67657470703125, + "loss": 1.0499, + "margin_dpo/margin_mean": 91.51618957519531, + "margin_dpo/margin_std": 118.59428405761719, + "step": 176 + }, + { + "KL/chosen_KL_mean": -94.85406494140625, + "KL/mean": -140.81088256835938, + "KL/rejected_KL_mean": -186.76768493652344, + "KL/std": 111.390869140625, + "epoch": 0.2599118942731278, + "fcm_dpo/beta": 0.005144456867128611, + "fcm_dpo/delta": -0.07641495764255524, + "fcm_dpo/margin": 91.91362762451172, + "fcm_dpo/q_t": 0.3949311375617981, + "grad_norm": 27.786762237548828, + "learning_rate": 4.6322706682636137e-07, + "logits/chosen": -0.3724118173122406, + "logits/rejected": -0.364002525806427, + "logps/chosen": -156.07052612304688, + "logps/ref_chosen": -61.216468811035156, + "logps/ref_rejected": -95.89378356933594, + "logps/rejected": -282.6614685058594, + "loss": 1.0495, + "margin_dpo/margin_mean": 91.91362762451172, + "margin_dpo/margin_std": 118.17066955566406, + "step": 177 + }, + { + "KL/chosen_KL_mean": -104.51988220214844, + "KL/mean": -162.33509826660156, + "KL/rejected_KL_mean": -220.1503143310547, + "KL/std": 131.26687622070312, + "epoch": 0.26138032305433184, + "fcm_dpo/beta": 0.004952050745487213, + "fcm_dpo/delta": -0.18411573767662048, + "fcm_dpo/margin": 115.63043975830078, + "fcm_dpo/q_t": 0.37578919529914856, + "grad_norm": 27.795106887817383, + "learning_rate": 4.6255428393240354e-07, + "logits/chosen": -0.2641046941280365, + "logits/rejected": -0.2551937997341156, + "logps/chosen": -162.78466796875, + "logps/ref_chosen": -58.26478958129883, + "logps/ref_rejected": -105.3653335571289, + "logps/rejected": -325.5156555175781, + "loss": 1.0002, + "margin_dpo/margin_mean": 115.63044738769531, + "margin_dpo/margin_std": 143.41700744628906, + "step": 178 + }, + { + "KL/chosen_KL_mean": -109.52047729492188, + "KL/mean": -149.59457397460938, + "KL/rejected_KL_mean": -189.66867065429688, + "KL/std": 112.11015319824219, + "epoch": 0.26284875183553597, + "fcm_dpo/beta": 0.004905564710497856, + "fcm_dpo/delta": 0.006664544343948364, + "fcm_dpo/margin": 80.14815521240234, + "fcm_dpo/q_t": 0.41320013999938965, + "grad_norm": 34.54417419433594, + "learning_rate": 4.6187590003538724e-07, + "logits/chosen": -0.30266761779785156, + "logits/rejected": -0.3117542266845703, + "logps/chosen": -170.57879638671875, + "logps/ref_chosen": -61.05832290649414, + "logps/ref_rejected": -90.52782440185547, + "logps/rejected": -280.19647216796875, + "loss": 1.1404, + "margin_dpo/margin_mean": 80.14815521240234, + "margin_dpo/margin_std": 139.17221069335938, + "step": 179 + }, + { + "KL/chosen_KL_mean": -94.91496276855469, + "KL/mean": -146.8360595703125, + "KL/rejected_KL_mean": -198.7571563720703, + "KL/std": 101.62055969238281, + "epoch": 0.2643171806167401, + "fcm_dpo/beta": 0.004845252260565758, + "fcm_dpo/delta": -0.10879069566726685, + "fcm_dpo/margin": 103.84219360351562, + "fcm_dpo/q_t": 0.3857002854347229, + "grad_norm": 19.25888442993164, + "learning_rate": 4.611919330113591e-07, + "logits/chosen": -0.29693859815597534, + "logits/rejected": -0.29173195362091064, + "logps/chosen": -149.2576904296875, + "logps/ref_chosen": -54.34272003173828, + "logps/ref_rejected": -98.21183776855469, + "logps/rejected": -296.968994140625, + "loss": 1.0305, + "margin_dpo/margin_mean": 103.84219360351562, + "margin_dpo/margin_std": 126.80170440673828, + "step": 180 + }, + { + "KL/chosen_KL_mean": -83.34645080566406, + "KL/mean": -115.71342468261719, + "KL/rejected_KL_mean": -148.08038330078125, + "KL/std": 93.42445373535156, + "epoch": 0.2657856093979442, + "fcm_dpo/beta": 0.004894108511507511, + "fcm_dpo/delta": 0.08573634922504425, + "fcm_dpo/margin": 64.73393249511719, + "fcm_dpo/q_t": 0.4280344247817993, + "grad_norm": 20.40754508972168, + "learning_rate": 4.605024008834863e-07, + "logits/chosen": -0.3203880190849304, + "logits/rejected": -0.2962578535079956, + "logps/chosen": -138.34690856933594, + "logps/ref_chosen": -55.000457763671875, + "logps/ref_rejected": -61.656166076660156, + "logps/rejected": -209.73655700683594, + "loss": 1.1713, + "margin_dpo/margin_mean": 64.73393249511719, + "margin_dpo/margin_std": 117.01361083984375, + "step": 181 + }, + { + "KL/chosen_KL_mean": -79.22171020507812, + "KL/mean": -136.55117797851562, + "KL/rejected_KL_mean": -193.88064575195312, + "KL/std": 114.58843994140625, + "epoch": 0.26725403817914833, + "fcm_dpo/beta": 0.004775552079081535, + "fcm_dpo/delta": -0.15708649158477783, + "fcm_dpo/margin": 114.658935546875, + "fcm_dpo/q_t": 0.37630826234817505, + "grad_norm": 18.048755645751953, + "learning_rate": 4.598073218215817e-07, + "logits/chosen": -0.2832631766796112, + "logits/rejected": -0.29323720932006836, + "logps/chosen": -120.32955932617188, + "logps/ref_chosen": -41.107852935791016, + "logps/ref_rejected": -89.5215835571289, + "logps/rejected": -283.4022216796875, + "loss": 1.0117, + "margin_dpo/margin_mean": 114.658935546875, + "margin_dpo/margin_std": 138.28912353515625, + "step": 182 + }, + { + "KL/chosen_KL_mean": -116.80380249023438, + "KL/mean": -147.58697509765625, + "KL/rejected_KL_mean": -178.3701629638672, + "KL/std": 93.99075317382812, + "epoch": 0.2687224669603524, + "fcm_dpo/beta": 0.004714460577815771, + "fcm_dpo/delta": -0.04429354518651962, + "fcm_dpo/margin": 61.56635665893555, + "fcm_dpo/q_t": 0.4325304627418518, + "grad_norm": 21.687788009643555, + "learning_rate": 4.5910671414162484e-07, + "logits/chosen": -0.29588770866394043, + "logits/rejected": -0.28640466928482056, + "logps/chosen": -174.328369140625, + "logps/ref_chosen": -57.52456283569336, + "logps/ref_rejected": -75.97572326660156, + "logps/rejected": -254.34588623046875, + "loss": 1.1828, + "margin_dpo/margin_mean": 61.56635665893555, + "margin_dpo/margin_std": 104.21000671386719, + "step": 183 + }, + { + "KL/chosen_KL_mean": -95.97406005859375, + "KL/mean": -128.724609375, + "KL/rejected_KL_mean": -161.4751434326172, + "KL/std": 88.82809448242188, + "epoch": 0.2701908957415565, + "fcm_dpo/beta": 0.0047124335542321205, + "fcm_dpo/delta": -0.004301935900002718, + "fcm_dpo/margin": 65.50109100341797, + "fcm_dpo/q_t": 0.4299464225769043, + "grad_norm": 18.115541458129883, + "learning_rate": 4.5840059630527985e-07, + "logits/chosen": -0.3457328975200653, + "logits/rejected": -0.33615928888320923, + "logps/chosen": -154.51901245117188, + "logps/ref_chosen": -58.544952392578125, + "logps/ref_rejected": -76.63406372070312, + "logps/rejected": -238.1092071533203, + "loss": 1.1687, + "margin_dpo/margin_mean": 65.50109100341797, + "margin_dpo/margin_std": 111.95549011230469, + "step": 184 + }, + { + "KL/chosen_KL_mean": -104.63207244873047, + "KL/mean": -130.31781005859375, + "KL/rejected_KL_mean": -156.0035400390625, + "KL/std": 102.3460693359375, + "epoch": 0.27165932452276065, + "fcm_dpo/beta": 0.0048194690607488155, + "fcm_dpo/delta": 0.15635941922664642, + "fcm_dpo/margin": 51.37147521972656, + "fcm_dpo/q_t": 0.44680285453796387, + "grad_norm": 20.880599975585938, + "learning_rate": 4.5768898691940836e-07, + "logits/chosen": -0.3066332936286926, + "logits/rejected": -0.2832027077674866, + "logps/chosen": -166.65792846679688, + "logps/ref_chosen": -62.025848388671875, + "logps/ref_rejected": -73.7625961303711, + "logps/rejected": -229.76614379882812, + "loss": 1.2341, + "margin_dpo/margin_mean": 51.37147521972656, + "margin_dpo/margin_std": 122.99656677246094, + "step": 185 + }, + { + "KL/chosen_KL_mean": -95.6789779663086, + "KL/mean": -144.47451782226562, + "KL/rejected_KL_mean": -193.27001953125, + "KL/std": 103.38729858398438, + "epoch": 0.27312775330396477, + "fcm_dpo/beta": 0.004802432842552662, + "fcm_dpo/delta": -0.07216604053974152, + "fcm_dpo/margin": 97.591064453125, + "fcm_dpo/q_t": 0.393841028213501, + "grad_norm": 26.266706466674805, + "learning_rate": 4.5697190473557947e-07, + "logits/chosen": -0.3480488061904907, + "logits/rejected": -0.32328087091445923, + "logps/chosen": -165.03244018554688, + "logps/ref_chosen": -69.35346984863281, + "logps/ref_rejected": -88.07244873046875, + "logps/rejected": -281.34246826171875, + "loss": 1.045, + "margin_dpo/margin_mean": 97.591064453125, + "margin_dpo/margin_std": 118.66375732421875, + "step": 186 + }, + { + "KL/chosen_KL_mean": -88.34507751464844, + "KL/mean": -128.76791381835938, + "KL/rejected_KL_mean": -169.19076538085938, + "KL/std": 97.03087615966797, + "epoch": 0.2745961820851689, + "fcm_dpo/beta": 0.004818159155547619, + "fcm_dpo/delta": 0.010491464287042618, + "fcm_dpo/margin": 80.8456802368164, + "fcm_dpo/q_t": 0.41053086519241333, + "grad_norm": 22.043073654174805, + "learning_rate": 4.5624936864957555e-07, + "logits/chosen": -0.3333667516708374, + "logits/rejected": -0.3270256221294403, + "logps/chosen": -141.10153198242188, + "logps/ref_chosen": -52.7564582824707, + "logps/ref_rejected": -81.96910095214844, + "logps/rejected": -251.15985107421875, + "loss": 1.0959, + "margin_dpo/margin_mean": 80.8456802368164, + "margin_dpo/margin_std": 105.46454620361328, + "step": 187 + }, + { + "KL/chosen_KL_mean": -83.22930908203125, + "KL/mean": -131.79873657226562, + "KL/rejected_KL_mean": -180.36813354492188, + "KL/std": 107.7387466430664, + "epoch": 0.27606461086637296, + "fcm_dpo/beta": 0.004757707007229328, + "fcm_dpo/delta": -0.06513302028179169, + "fcm_dpo/margin": 97.13882446289062, + "fcm_dpo/q_t": 0.3954910933971405, + "grad_norm": 28.16905975341797, + "learning_rate": 4.5552139770089454e-07, + "logits/chosen": -0.3342798352241516, + "logits/rejected": -0.3404528498649597, + "logps/chosen": -132.64480590820312, + "logps/ref_chosen": -49.415489196777344, + "logps/ref_rejected": -89.54043579101562, + "logps/rejected": -269.9085693359375, + "loss": 1.0479, + "margin_dpo/margin_mean": 97.13883209228516, + "margin_dpo/margin_std": 117.88801574707031, + "step": 188 + }, + { + "KL/chosen_KL_mean": -96.34205627441406, + "KL/mean": -136.79183959960938, + "KL/rejected_KL_mean": -177.24160766601562, + "KL/std": 108.5394287109375, + "epoch": 0.2775330396475771, + "fcm_dpo/beta": 0.004754100926220417, + "fcm_dpo/delta": 0.015977924689650536, + "fcm_dpo/margin": 80.89956665039062, + "fcm_dpo/q_t": 0.41550976037979126, + "grad_norm": 23.41521644592285, + "learning_rate": 4.5478801107224794e-07, + "logits/chosen": -0.3520697355270386, + "logits/rejected": -0.3348464369773865, + "logps/chosen": -148.7410125732422, + "logps/ref_chosen": -52.39896011352539, + "logps/ref_rejected": -72.16735076904297, + "logps/rejected": -249.40896606445312, + "loss": 1.1258, + "margin_dpo/margin_mean": 80.89956665039062, + "margin_dpo/margin_std": 133.14503479003906, + "step": 189 + }, + { + "KL/chosen_KL_mean": -103.06121826171875, + "KL/mean": -150.68637084960938, + "KL/rejected_KL_mean": -198.3115234375, + "KL/std": 115.74911499023438, + "epoch": 0.2790014684287812, + "fcm_dpo/beta": 0.004754353780299425, + "fcm_dpo/delta": -0.05636203661561012, + "fcm_dpo/margin": 95.25030517578125, + "fcm_dpo/q_t": 0.39939507842063904, + "grad_norm": 18.363422393798828, + "learning_rate": 4.5404922808905543e-07, + "logits/chosen": -0.38547688722610474, + "logits/rejected": -0.375651478767395, + "logps/chosen": -167.74429321289062, + "logps/ref_chosen": -64.68305969238281, + "logps/ref_rejected": -102.55052185058594, + "logps/rejected": -300.862060546875, + "loss": 1.0822, + "margin_dpo/margin_mean": 95.25030517578125, + "margin_dpo/margin_std": 133.5958251953125, + "step": 190 + }, + { + "KL/chosen_KL_mean": -95.35700988769531, + "KL/mean": -163.53025817871094, + "KL/rejected_KL_mean": -231.70352172851562, + "KL/std": 133.92214965820312, + "epoch": 0.28046989720998533, + "fcm_dpo/beta": 0.004521770402789116, + "fcm_dpo/delta": -0.23212674260139465, + "fcm_dpo/margin": 136.34649658203125, + "fcm_dpo/q_t": 0.3637212812900543, + "grad_norm": 20.231264114379883, + "learning_rate": 4.5330506821893565e-07, + "logits/chosen": -0.3467414379119873, + "logits/rejected": -0.3258952498435974, + "logps/chosen": -164.015869140625, + "logps/ref_chosen": -68.65887451171875, + "logps/ref_rejected": -110.1396713256836, + "logps/rejected": -341.84320068359375, + "loss": 0.9565, + "margin_dpo/margin_mean": 136.34649658203125, + "margin_dpo/margin_std": 147.54470825195312, + "step": 191 + }, + { + "KL/chosen_KL_mean": -124.43778991699219, + "KL/mean": -170.04360961914062, + "KL/rejected_KL_mean": -215.6494140625, + "KL/std": 117.56196594238281, + "epoch": 0.28193832599118945, + "fcm_dpo/beta": 0.0044925631955266, + "fcm_dpo/delta": -0.010320080444216728, + "fcm_dpo/margin": 91.21162414550781, + "fcm_dpo/q_t": 0.4096482992172241, + "grad_norm": 25.52708625793457, + "learning_rate": 4.5255555107119336e-07, + "logits/chosen": -0.32894307374954224, + "logits/rejected": -0.328900545835495, + "logps/chosen": -194.16470336914062, + "logps/ref_chosen": -69.72691345214844, + "logps/ref_rejected": -103.32135009765625, + "logps/rejected": -318.97076416015625, + "loss": 1.1135, + "margin_dpo/margin_mean": 91.21162414550781, + "margin_dpo/margin_std": 144.74786376953125, + "step": 192 + }, + { + "KL/chosen_KL_mean": -124.83224487304688, + "KL/mean": -151.44308471679688, + "KL/rejected_KL_mean": -178.053955078125, + "KL/std": 107.97267150878906, + "epoch": 0.2834067547723935, + "fcm_dpo/beta": 0.004495399538427591, + "fcm_dpo/delta": 0.0392833836376667, + "fcm_dpo/margin": 53.221702575683594, + "fcm_dpo/q_t": 0.44323813915252686, + "grad_norm": 26.372344970703125, + "learning_rate": 4.5180069639630236e-07, + "logits/chosen": -0.3519429564476013, + "logits/rejected": -0.34495627880096436, + "logps/chosen": -185.02273559570312, + "logps/ref_chosen": -60.19049835205078, + "logps/ref_rejected": -76.40755462646484, + "logps/rejected": -254.4615020751953, + "loss": 1.2529, + "margin_dpo/margin_mean": 53.221702575683594, + "margin_dpo/margin_std": 137.95343017578125, + "step": 193 + }, + { + "KL/chosen_KL_mean": -78.5848388671875, + "KL/mean": -121.88168334960938, + "KL/rejected_KL_mean": -165.17852783203125, + "KL/std": 90.56858825683594, + "epoch": 0.28487518355359764, + "fcm_dpo/beta": 0.004506401717662811, + "fcm_dpo/delta": 0.010020148009061813, + "fcm_dpo/margin": 86.59368896484375, + "fcm_dpo/q_t": 0.4085754156112671, + "grad_norm": 18.025230407714844, + "learning_rate": 4.510405240853854e-07, + "logits/chosen": -0.2157665491104126, + "logits/rejected": -0.1980063021183014, + "logps/chosen": -116.42521667480469, + "logps/ref_chosen": -37.84037399291992, + "logps/ref_rejected": -60.684783935546875, + "logps/rejected": -225.86331176757812, + "loss": 1.082, + "margin_dpo/margin_mean": 86.59367370605469, + "margin_dpo/margin_std": 99.3104019165039, + "step": 194 + }, + { + "KL/chosen_KL_mean": -124.42152404785156, + "KL/mean": -171.287353515625, + "KL/rejected_KL_mean": -218.1531982421875, + "KL/std": 112.64602661132812, + "epoch": 0.28634361233480177, + "fcm_dpo/beta": 0.004506120923906565, + "fcm_dpo/delta": -0.023354141041636467, + "fcm_dpo/margin": 93.73165893554688, + "fcm_dpo/q_t": 0.4031534194946289, + "grad_norm": 22.234222412109375, + "learning_rate": 4.5027505416968985e-07, + "logits/chosen": -0.24858853220939636, + "logits/rejected": -0.2673921287059784, + "logps/chosen": -179.31309509277344, + "logps/ref_chosen": -54.891571044921875, + "logps/ref_rejected": -96.77095794677734, + "logps/rejected": -314.9241638183594, + "loss": 1.0714, + "margin_dpo/margin_mean": 93.73165893554688, + "margin_dpo/margin_std": 116.69031524658203, + "step": 195 + }, + { + "KL/chosen_KL_mean": -97.50921630859375, + "KL/mean": -150.7244415283203, + "KL/rejected_KL_mean": -203.93966674804688, + "KL/std": 114.79684448242188, + "epoch": 0.2878120411160059, + "fcm_dpo/beta": 0.004426237195730209, + "fcm_dpo/delta": -0.07563818991184235, + "fcm_dpo/margin": 106.43045043945312, + "fcm_dpo/q_t": 0.3946911692619324, + "grad_norm": 18.23614501953125, + "learning_rate": 4.495043068200599e-07, + "logits/chosen": -0.30258023738861084, + "logits/rejected": -0.288103848695755, + "logps/chosen": -150.75445556640625, + "logps/ref_chosen": -53.245243072509766, + "logps/ref_rejected": -76.05294799804688, + "logps/rejected": -279.99261474609375, + "loss": 1.0592, + "margin_dpo/margin_mean": 106.43045043945312, + "margin_dpo/margin_std": 137.8130645751953, + "step": 196 + }, + { + "KL/chosen_KL_mean": -101.94883728027344, + "KL/mean": -143.18423461914062, + "KL/rejected_KL_mean": -184.41961669921875, + "KL/std": 101.37451171875, + "epoch": 0.28928046989721, + "fcm_dpo/beta": 0.004469497129321098, + "fcm_dpo/delta": 0.03227302059531212, + "fcm_dpo/margin": 82.47077941894531, + "fcm_dpo/q_t": 0.41608455777168274, + "grad_norm": 18.144241333007812, + "learning_rate": 4.4872830234640493e-07, + "logits/chosen": -0.2958967983722687, + "logits/rejected": -0.290219783782959, + "logps/chosen": -162.36917114257812, + "logps/ref_chosen": -60.42033386230469, + "logps/ref_rejected": -77.20890808105469, + "logps/rejected": -261.6285400390625, + "loss": 1.1138, + "margin_dpo/margin_mean": 82.47077941894531, + "margin_dpo/margin_std": 115.41438293457031, + "step": 197 + }, + { + "KL/chosen_KL_mean": -114.83041381835938, + "KL/mean": -167.80075073242188, + "KL/rejected_KL_mean": -220.7711181640625, + "KL/std": 126.27465057373047, + "epoch": 0.2907488986784141, + "fcm_dpo/beta": 0.004416568670421839, + "fcm_dpo/delta": -0.07120651751756668, + "fcm_dpo/margin": 105.94068145751953, + "fcm_dpo/q_t": 0.3967708349227905, + "grad_norm": 22.24930191040039, + "learning_rate": 4.479470611971645e-07, + "logits/chosen": -0.3203980028629303, + "logits/rejected": -0.3210110068321228, + "logps/chosen": -169.86660766601562, + "logps/ref_chosen": -55.03618621826172, + "logps/ref_rejected": -97.24325561523438, + "logps/rejected": -318.0143737792969, + "loss": 1.0591, + "margin_dpo/margin_mean": 105.94068145751953, + "margin_dpo/margin_std": 143.1464080810547, + "step": 198 + }, + { + "KL/chosen_KL_mean": -110.44184875488281, + "KL/mean": -162.89865112304688, + "KL/rejected_KL_mean": -215.35546875, + "KL/std": 114.6693115234375, + "epoch": 0.2922173274596182, + "fcm_dpo/beta": 0.004335303790867329, + "fcm_dpo/delta": -0.058502815663814545, + "fcm_dpo/margin": 104.91361999511719, + "fcm_dpo/q_t": 0.3972257673740387, + "grad_norm": 23.503461837768555, + "learning_rate": 4.471606039587695e-07, + "logits/chosen": -0.27863985300064087, + "logits/rejected": -0.2598820924758911, + "logps/chosen": -167.2706756591797, + "logps/ref_chosen": -56.828826904296875, + "logps/ref_rejected": -84.64820861816406, + "logps/rejected": -300.003662109375, + "loss": 1.0675, + "margin_dpo/margin_mean": 104.91361999511719, + "margin_dpo/margin_std": 138.49346923828125, + "step": 199 + }, + { + "KL/chosen_KL_mean": -108.3734130859375, + "KL/mean": -160.2718963623047, + "KL/rejected_KL_mean": -212.17037963867188, + "KL/std": 123.57206726074219, + "epoch": 0.2936857562408223, + "fcm_dpo/beta": 0.004300840198993683, + "fcm_dpo/delta": -0.04902205243706703, + "fcm_dpo/margin": 103.79698181152344, + "fcm_dpo/q_t": 0.40177974104881287, + "grad_norm": 22.9044246673584, + "learning_rate": 4.4636895135509966e-07, + "logits/chosen": -0.26905137300491333, + "logits/rejected": -0.25207480788230896, + "logps/chosen": -161.44046020507812, + "logps/ref_chosen": -53.06706237792969, + "logps/ref_rejected": -80.60843658447266, + "logps/rejected": -292.77880859375, + "loss": 1.0929, + "margin_dpo/margin_mean": 103.7969741821289, + "margin_dpo/margin_std": 158.15789794921875, + "step": 200 + }, + { + "KL/chosen_KL_mean": -113.9289321899414, + "KL/mean": -165.15493774414062, + "KL/rejected_KL_mean": -216.38092041015625, + "KL/std": 129.3989715576172, + "epoch": 0.29515418502202645, + "fcm_dpo/beta": 0.004290143959224224, + "fcm_dpo/delta": -0.041380785405635834, + "fcm_dpo/margin": 102.45198822021484, + "fcm_dpo/q_t": 0.40112942457199097, + "grad_norm": 20.798912048339844, + "learning_rate": 4.455721242469372e-07, + "logits/chosen": -0.3590313792228699, + "logits/rejected": -0.3559607267379761, + "logps/chosen": -189.33114624023438, + "logps/ref_chosen": -75.4022216796875, + "logps/ref_rejected": -114.80821990966797, + "logps/rejected": -331.18914794921875, + "loss": 1.0829, + "margin_dpo/margin_mean": 102.45198059082031, + "margin_dpo/margin_std": 144.92611694335938, + "step": 201 + }, + { + "KL/chosen_KL_mean": -116.26347351074219, + "KL/mean": -152.79412841796875, + "KL/rejected_KL_mean": -189.32476806640625, + "KL/std": 111.22699737548828, + "epoch": 0.2966226138032305, + "fcm_dpo/beta": 0.0043277074582874775, + "fcm_dpo/delta": 0.08649900555610657, + "fcm_dpo/margin": 73.06129455566406, + "fcm_dpo/q_t": 0.42985087633132935, + "grad_norm": 20.812585830688477, + "learning_rate": 4.4477014363141755e-07, + "logits/chosen": -0.2794426679611206, + "logits/rejected": -0.293861985206604, + "logps/chosen": -166.3647918701172, + "logps/ref_chosen": -50.101318359375, + "logps/ref_rejected": -86.98503112792969, + "logps/rejected": -276.309814453125, + "loss": 1.1841, + "margin_dpo/margin_mean": 73.0613021850586, + "margin_dpo/margin_std": 143.23988342285156, + "step": 202 + }, + { + "KL/chosen_KL_mean": -114.866455078125, + "KL/mean": -160.2359619140625, + "KL/rejected_KL_mean": -205.60543823242188, + "KL/std": 114.47230529785156, + "epoch": 0.29809104258443464, + "fcm_dpo/beta": 0.004343975335359573, + "fcm_dpo/delta": 0.00605600792914629, + "fcm_dpo/margin": 90.73900604248047, + "fcm_dpo/q_t": 0.4100680649280548, + "grad_norm": 21.901674270629883, + "learning_rate": 4.439630306414758e-07, + "logits/chosen": -0.3357563614845276, + "logits/rejected": -0.32634925842285156, + "logps/chosen": -175.4761505126953, + "logps/ref_chosen": -60.60969543457031, + "logps/ref_rejected": -85.89596557617188, + "logps/rejected": -291.50140380859375, + "loss": 1.0993, + "margin_dpo/margin_mean": 90.73899841308594, + "margin_dpo/margin_std": 125.68807983398438, + "step": 203 + }, + { + "KL/chosen_KL_mean": -128.02561950683594, + "KL/mean": -169.0902557373047, + "KL/rejected_KL_mean": -210.15489196777344, + "KL/std": 124.49624633789062, + "epoch": 0.29955947136563876, + "fcm_dpo/beta": 0.00437512993812561, + "fcm_dpo/delta": 0.04220545291900635, + "fcm_dpo/margin": 82.12925720214844, + "fcm_dpo/q_t": 0.42096078395843506, + "grad_norm": 22.241016387939453, + "learning_rate": 4.431508065452897e-07, + "logits/chosen": -0.4248543977737427, + "logits/rejected": -0.38815587759017944, + "logps/chosen": -208.19058227539062, + "logps/ref_chosen": -80.16496276855469, + "logps/ref_rejected": -87.69590759277344, + "logps/rejected": -297.8507995605469, + "loss": 1.1522, + "margin_dpo/margin_mean": 82.12925720214844, + "margin_dpo/margin_std": 144.77645874023438, + "step": 204 + }, + { + "KL/chosen_KL_mean": -120.78949737548828, + "KL/mean": -176.00003051757812, + "KL/rejected_KL_mean": -231.2105712890625, + "KL/std": 126.26949310302734, + "epoch": 0.3010279001468429, + "fcm_dpo/beta": 0.004297832027077675, + "fcm_dpo/delta": -0.08044849336147308, + "fcm_dpo/margin": 110.42106628417969, + "fcm_dpo/q_t": 0.39113306999206543, + "grad_norm": 21.1467342376709, + "learning_rate": 4.4233349274571974e-07, + "logits/chosen": -0.32940664887428284, + "logits/rejected": -0.29995858669281006, + "logps/chosen": -180.17422485351562, + "logps/ref_chosen": -59.384735107421875, + "logps/ref_rejected": -85.12505340576172, + "logps/rejected": -316.33563232421875, + "loss": 1.0534, + "margin_dpo/margin_mean": 110.42106628417969, + "margin_dpo/margin_std": 136.45323181152344, + "step": 205 + }, + { + "KL/chosen_KL_mean": -110.37324523925781, + "KL/mean": -169.74008178710938, + "KL/rejected_KL_mean": -229.10691833496094, + "KL/std": 117.97074127197266, + "epoch": 0.302496328928047, + "fcm_dpo/beta": 0.004232403822243214, + "fcm_dpo/delta": -0.10859975218772888, + "fcm_dpo/margin": 118.73365783691406, + "fcm_dpo/q_t": 0.38320374488830566, + "grad_norm": 25.72849464416504, + "learning_rate": 4.415111107797445e-07, + "logits/chosen": -0.26661020517349243, + "logits/rejected": -0.2699154019355774, + "logps/chosen": -157.33773803710938, + "logps/ref_chosen": -46.964500427246094, + "logps/ref_rejected": -98.9534912109375, + "logps/rejected": -328.0604248046875, + "loss": 1.0145, + "margin_dpo/margin_mean": 118.73365783691406, + "margin_dpo/margin_std": 128.0810089111328, + "step": 206 + }, + { + "KL/chosen_KL_mean": -100.90605163574219, + "KL/mean": -168.05532836914062, + "KL/rejected_KL_mean": -235.20462036132812, + "KL/std": 134.08450317382812, + "epoch": 0.3039647577092511, + "fcm_dpo/beta": 0.004127143882215023, + "fcm_dpo/delta": -0.1631755232810974, + "fcm_dpo/margin": 134.29855346679688, + "fcm_dpo/q_t": 0.3774099349975586, + "grad_norm": 22.681591033935547, + "learning_rate": 4.4068368231789365e-07, + "logits/chosen": -0.35407179594039917, + "logits/rejected": -0.32842785120010376, + "logps/chosen": -156.96231079101562, + "logps/ref_chosen": -56.05625915527344, + "logps/ref_rejected": -84.44779968261719, + "logps/rejected": -319.65240478515625, + "loss": 0.9974, + "margin_dpo/margin_mean": 134.298583984375, + "margin_dpo/margin_std": 156.59857177734375, + "step": 207 + }, + { + "KL/chosen_KL_mean": -154.8147735595703, + "KL/mean": -207.43914794921875, + "KL/rejected_KL_mean": -260.06353759765625, + "KL/std": 128.05979919433594, + "epoch": 0.3054331864904552, + "fcm_dpo/beta": 0.004062125459313393, + "fcm_dpo/delta": -0.02891511656343937, + "fcm_dpo/margin": 105.24872589111328, + "fcm_dpo/q_t": 0.40312352776527405, + "grad_norm": 23.56682014465332, + "learning_rate": 4.398512291636768e-07, + "logits/chosen": -0.38881534337997437, + "logits/rejected": -0.37188804149627686, + "logps/chosen": -221.88238525390625, + "logps/ref_chosen": -67.06761169433594, + "logps/ref_rejected": -94.28689575195312, + "logps/rejected": -354.35040283203125, + "loss": 1.096, + "margin_dpo/margin_mean": 105.24872589111328, + "margin_dpo/margin_std": 155.59713745117188, + "step": 208 + }, + { + "KL/chosen_KL_mean": -129.39630126953125, + "KL/mean": -176.29495239257812, + "KL/rejected_KL_mean": -223.19363403320312, + "KL/std": 115.95198059082031, + "epoch": 0.3069016152716593, + "fcm_dpo/beta": 0.004076983779668808, + "fcm_dpo/delta": 0.018282007426023483, + "fcm_dpo/margin": 93.79732513427734, + "fcm_dpo/q_t": 0.41346555948257446, + "grad_norm": 26.791549682617188, + "learning_rate": 4.3901377325300857e-07, + "logits/chosen": -0.26864010095596313, + "logits/rejected": -0.2571912109851837, + "logps/chosen": -185.5780029296875, + "logps/ref_chosen": -56.18169403076172, + "logps/ref_rejected": -80.94152069091797, + "logps/rejected": -304.1351623535156, + "loss": 1.1292, + "margin_dpo/margin_mean": 93.79731750488281, + "margin_dpo/margin_std": 149.582763671875, + "step": 209 + }, + { + "KL/chosen_KL_mean": -116.94548034667969, + "KL/mean": -171.28390502929688, + "KL/rejected_KL_mean": -225.622314453125, + "KL/std": 119.73749542236328, + "epoch": 0.30837004405286345, + "fcm_dpo/beta": 0.004069700837135315, + "fcm_dpo/delta": -0.04440900310873985, + "fcm_dpo/margin": 108.67684936523438, + "fcm_dpo/q_t": 0.40055060386657715, + "grad_norm": 23.223583221435547, + "learning_rate": 4.381713366536311e-07, + "logits/chosen": -0.2933782935142517, + "logits/rejected": -0.2853144705295563, + "logps/chosen": -163.31729125976562, + "logps/ref_chosen": -46.371822357177734, + "logps/ref_rejected": -76.68162536621094, + "logps/rejected": -302.303955078125, + "loss": 1.0747, + "margin_dpo/margin_mean": 108.67683410644531, + "margin_dpo/margin_std": 146.201904296875, + "step": 210 + }, + { + "KL/chosen_KL_mean": -168.60812377929688, + "KL/mean": -214.71722412109375, + "KL/rejected_KL_mean": -260.8263244628906, + "KL/std": 136.29385375976562, + "epoch": 0.30983847283406757, + "fcm_dpo/beta": 0.004061352461576462, + "fcm_dpo/delta": 0.02644379436969757, + "fcm_dpo/margin": 92.21820831298828, + "fcm_dpo/q_t": 0.41933655738830566, + "grad_norm": 30.78042221069336, + "learning_rate": 4.373239415645323e-07, + "logits/chosen": -0.3214316964149475, + "logits/rejected": -0.2823808193206787, + "logps/chosen": -247.54046630859375, + "logps/ref_chosen": -78.93235778808594, + "logps/ref_rejected": -86.82098388671875, + "logps/rejected": -347.6473083496094, + "loss": 1.1462, + "margin_dpo/margin_mean": 92.21821594238281, + "margin_dpo/margin_std": 160.97451782226562, + "step": 211 + }, + { + "KL/chosen_KL_mean": -139.83131408691406, + "KL/mean": -206.15283203125, + "KL/rejected_KL_mean": -272.474365234375, + "KL/std": 148.2513427734375, + "epoch": 0.31130690161527164, + "fcm_dpo/beta": 0.003954698797315359, + "fcm_dpo/delta": -0.13367314636707306, + "fcm_dpo/margin": 132.64306640625, + "fcm_dpo/q_t": 0.3826107978820801, + "grad_norm": 24.799522399902344, + "learning_rate": 4.3647161031536086e-07, + "logits/chosen": -0.3196195363998413, + "logits/rejected": -0.31085437536239624, + "logps/chosen": -198.0283203125, + "logps/ref_chosen": -58.19701385498047, + "logps/ref_rejected": -103.05785369873047, + "logps/rejected": -375.5322265625, + "loss": 1.0319, + "margin_dpo/margin_mean": 132.64306640625, + "margin_dpo/margin_std": 163.62814331054688, + "step": 212 + }, + { + "KL/chosen_KL_mean": -132.14027404785156, + "KL/mean": -195.88795471191406, + "KL/rejected_KL_mean": -259.6356506347656, + "KL/std": 129.16009521484375, + "epoch": 0.31277533039647576, + "fcm_dpo/beta": 0.0038848065305501223, + "fcm_dpo/delta": -0.10088707506656647, + "fcm_dpo/margin": 127.49536895751953, + "fcm_dpo/q_t": 0.3872129023075104, + "grad_norm": 29.145305633544922, + "learning_rate": 4.3561436536583774e-07, + "logits/chosen": -0.3360249698162079, + "logits/rejected": -0.31101077795028687, + "logps/chosen": -199.65298461914062, + "logps/ref_chosen": -67.51271057128906, + "logps/ref_rejected": -93.91471862792969, + "logps/rejected": -353.55035400390625, + "loss": 1.0333, + "margin_dpo/margin_mean": 127.49537658691406, + "margin_dpo/margin_std": 153.2450408935547, + "step": 213 + }, + { + "KL/chosen_KL_mean": -112.85121154785156, + "KL/mean": -170.10894775390625, + "KL/rejected_KL_mean": -227.36666870117188, + "KL/std": 127.09822082519531, + "epoch": 0.3142437591776799, + "fcm_dpo/beta": 0.003853208851069212, + "fcm_dpo/delta": -0.043163709342479706, + "fcm_dpo/margin": 114.5154800415039, + "fcm_dpo/q_t": 0.4001784920692444, + "grad_norm": 23.445825576782227, + "learning_rate": 4.3475222930516473e-07, + "logits/chosen": -0.25710099935531616, + "logits/rejected": -0.26210659742355347, + "logps/chosen": -154.4561004638672, + "logps/ref_chosen": -41.604888916015625, + "logps/ref_rejected": -77.51741027832031, + "logps/rejected": -304.88409423828125, + "loss": 1.0671, + "margin_dpo/margin_mean": 114.51548767089844, + "margin_dpo/margin_std": 147.68756103515625, + "step": 214 + }, + { + "KL/chosen_KL_mean": -134.02593994140625, + "KL/mean": -194.3079833984375, + "KL/rejected_KL_mean": -254.5900421142578, + "KL/std": 132.08059692382812, + "epoch": 0.315712187958884, + "fcm_dpo/beta": 0.0038004510570317507, + "fcm_dpo/delta": -0.06129283457994461, + "fcm_dpo/margin": 120.56410217285156, + "fcm_dpo/q_t": 0.39448457956314087, + "grad_norm": 26.497583389282227, + "learning_rate": 4.3388522485142885e-07, + "logits/chosen": -0.2794630229473114, + "logits/rejected": -0.27032387256622314, + "logps/chosen": -187.30520629882812, + "logps/ref_chosen": -53.279266357421875, + "logps/ref_rejected": -89.96464538574219, + "logps/rejected": -344.5546875, + "loss": 1.0433, + "margin_dpo/margin_mean": 120.56410217285156, + "margin_dpo/margin_std": 137.23513793945312, + "step": 215 + }, + { + "KL/chosen_KL_mean": -138.703369140625, + "KL/mean": -198.391845703125, + "KL/rejected_KL_mean": -258.0802917480469, + "KL/std": 138.0404510498047, + "epoch": 0.31718061674008813, + "fcm_dpo/beta": 0.003782880725339055, + "fcm_dpo/delta": -0.05413120239973068, + "fcm_dpo/margin": 119.37692260742188, + "fcm_dpo/q_t": 0.39900004863739014, + "grad_norm": 24.74566078186035, + "learning_rate": 4.330133748510036e-07, + "logits/chosen": -0.2884059250354767, + "logits/rejected": -0.27266985177993774, + "logps/chosen": -187.59117126464844, + "logps/ref_chosen": -48.887794494628906, + "logps/ref_rejected": -77.19892883300781, + "logps/rejected": -335.27923583984375, + "loss": 1.0807, + "margin_dpo/margin_mean": 119.37692260742188, + "margin_dpo/margin_std": 169.870849609375, + "step": 216 + }, + { + "KL/chosen_KL_mean": -141.81265258789062, + "KL/mean": -211.98358154296875, + "KL/rejected_KL_mean": -282.154541015625, + "KL/std": 141.36019897460938, + "epoch": 0.3186490455212922, + "fcm_dpo/beta": 0.003682144917547703, + "fcm_dpo/delta": -0.12343692779541016, + "fcm_dpo/margin": 140.3418731689453, + "fcm_dpo/q_t": 0.3833308517932892, + "grad_norm": 20.693517684936523, + "learning_rate": 4.3213670227794757e-07, + "logits/chosen": -0.26268115639686584, + "logits/rejected": -0.2574685513973236, + "logps/chosen": -191.657958984375, + "logps/ref_chosen": -49.845306396484375, + "logps/ref_rejected": -100.07832336425781, + "logps/rejected": -382.23284912109375, + "loss": 1.0116, + "margin_dpo/margin_mean": 140.34185791015625, + "margin_dpo/margin_std": 159.26388549804688, + "step": 217 + }, + { + "KL/chosen_KL_mean": -148.78378295898438, + "KL/mean": -203.23626708984375, + "KL/rejected_KL_mean": -257.688720703125, + "KL/std": 139.531494140625, + "epoch": 0.3201174743024963, + "fcm_dpo/beta": 0.003660230664536357, + "fcm_dpo/delta": 0.0013791173696517944, + "fcm_dpo/margin": 108.90492248535156, + "fcm_dpo/q_t": 0.410659521818161, + "grad_norm": 21.03861427307129, + "learning_rate": 4.3125523023339815e-07, + "logits/chosen": -0.28771138191223145, + "logits/rejected": -0.28279104828834534, + "logps/chosen": -207.3604736328125, + "logps/ref_chosen": -58.576683044433594, + "logps/ref_rejected": -87.84639739990234, + "logps/rejected": -345.5351257324219, + "loss": 1.1106, + "margin_dpo/margin_mean": 108.9049301147461, + "margin_dpo/margin_std": 163.36837768554688, + "step": 218 + }, + { + "KL/chosen_KL_mean": -159.7332763671875, + "KL/mean": -205.7009735107422, + "KL/rejected_KL_mean": -251.6686553955078, + "KL/std": 144.26089477539062, + "epoch": 0.32158590308370044, + "fcm_dpo/beta": 0.0037173782475292683, + "fcm_dpo/delta": 0.05955355241894722, + "fcm_dpo/margin": 91.93537139892578, + "fcm_dpo/q_t": 0.4234854578971863, + "grad_norm": 28.875822067260742, + "learning_rate": 4.303689819449636e-07, + "logits/chosen": -0.3138810992240906, + "logits/rejected": -0.30747318267822266, + "logps/chosen": -220.817138671875, + "logps/ref_chosen": -61.083858489990234, + "logps/ref_rejected": -85.83042907714844, + "logps/rejected": -337.49908447265625, + "loss": 1.1738, + "margin_dpo/margin_mean": 91.93536376953125, + "margin_dpo/margin_std": 175.91717529296875, + "step": 219 + }, + { + "KL/chosen_KL_mean": -179.41836547851562, + "KL/mean": -218.91024780273438, + "KL/rejected_KL_mean": -258.40216064453125, + "KL/std": 125.53445434570312, + "epoch": 0.32305433186490456, + "fcm_dpo/beta": 0.0037627811543643475, + "fcm_dpo/delta": 0.1060803085565567, + "fcm_dpo/margin": 78.98377990722656, + "fcm_dpo/q_t": 0.43049296736717224, + "grad_norm": 28.49346160888672, + "learning_rate": 4.2947798076611047e-07, + "logits/chosen": -0.2874869704246521, + "logits/rejected": -0.2643676996231079, + "logps/chosen": -249.44964599609375, + "logps/ref_chosen": -70.03128051757812, + "logps/ref_rejected": -87.68551635742188, + "logps/rejected": -346.087646484375, + "loss": 1.1674, + "margin_dpo/margin_mean": 78.98377990722656, + "margin_dpo/margin_std": 128.49771118164062, + "step": 220 + }, + { + "KL/chosen_KL_mean": -153.0963134765625, + "KL/mean": -238.85574340820312, + "KL/rejected_KL_mean": -324.6151428222656, + "KL/std": 157.85043334960938, + "epoch": 0.3245227606461087, + "fcm_dpo/beta": 0.003666388336569071, + "fcm_dpo/delta": -0.24369555711746216, + "fcm_dpo/margin": 171.518798828125, + "fcm_dpo/q_t": 0.35655221343040466, + "grad_norm": 25.883392333984375, + "learning_rate": 4.285822501755485e-07, + "logits/chosen": -0.28886061906814575, + "logits/rejected": -0.2952112555503845, + "logps/chosen": -205.25103759765625, + "logps/ref_chosen": -52.15470886230469, + "logps/ref_rejected": -106.46768188476562, + "logps/rejected": -431.08282470703125, + "loss": 0.9343, + "margin_dpo/margin_mean": 171.518798828125, + "margin_dpo/margin_std": 161.14630126953125, + "step": 221 + }, + { + "KL/chosen_KL_mean": -157.40545654296875, + "KL/mean": -221.60968017578125, + "KL/rejected_KL_mean": -285.81390380859375, + "KL/std": 144.6243133544922, + "epoch": 0.32599118942731276, + "fcm_dpo/beta": 0.003590481821447611, + "fcm_dpo/delta": -0.06407497823238373, + "fcm_dpo/margin": 128.40843200683594, + "fcm_dpo/q_t": 0.39516395330429077, + "grad_norm": 20.063804626464844, + "learning_rate": 4.276818137766118e-07, + "logits/chosen": -0.32411831617355347, + "logits/rejected": -0.32718104124069214, + "logps/chosen": -218.37655639648438, + "logps/ref_chosen": -60.971099853515625, + "logps/ref_rejected": -100.00115203857422, + "logps/rejected": -385.8150634765625, + "loss": 1.0554, + "margin_dpo/margin_mean": 128.40843200683594, + "margin_dpo/margin_std": 161.19532775878906, + "step": 222 + }, + { + "KL/chosen_KL_mean": -166.05140686035156, + "KL/mean": -220.04031372070312, + "KL/rejected_KL_mean": -274.02923583984375, + "KL/std": 142.65538024902344, + "epoch": 0.3274596182085169, + "fcm_dpo/beta": 0.0035675265826284885, + "fcm_dpo/delta": 0.015362029895186424, + "fcm_dpo/margin": 107.97785186767578, + "fcm_dpo/q_t": 0.41345182061195374, + "grad_norm": 23.098182678222656, + "learning_rate": 4.2677669529663686e-07, + "logits/chosen": -0.2400050163269043, + "logits/rejected": -0.2347499132156372, + "logps/chosen": -218.69198608398438, + "logps/ref_chosen": -52.64057540893555, + "logps/ref_rejected": -82.82502746582031, + "logps/rejected": -356.854248046875, + "loss": 1.1337, + "margin_dpo/margin_mean": 107.97784423828125, + "margin_dpo/margin_std": 181.26332092285156, + "step": 223 + }, + { + "KL/chosen_KL_mean": -140.97390747070312, + "KL/mean": -202.82191467285156, + "KL/rejected_KL_mean": -264.669921875, + "KL/std": 155.19711303710938, + "epoch": 0.328928046989721, + "fcm_dpo/beta": 0.0035286881029605865, + "fcm_dpo/delta": -0.03949831798672676, + "fcm_dpo/margin": 123.69601440429688, + "fcm_dpo/q_t": 0.40351927280426025, + "grad_norm": 24.251049041748047, + "learning_rate": 4.2586691858633747e-07, + "logits/chosen": -0.3118600845336914, + "logits/rejected": -0.2952437102794647, + "logps/chosen": -189.56932067871094, + "logps/ref_chosen": -48.59541320800781, + "logps/ref_rejected": -77.11648559570312, + "logps/rejected": -341.7864074707031, + "loss": 1.088, + "margin_dpo/margin_mean": 123.69600677490234, + "margin_dpo/margin_std": 177.5772247314453, + "step": 224 + }, + { + "KL/chosen_KL_mean": -159.09860229492188, + "KL/mean": -231.4862060546875, + "KL/rejected_KL_mean": -303.8738098144531, + "KL/std": 148.37298583984375, + "epoch": 0.3303964757709251, + "fcm_dpo/beta": 0.0034855613484978676, + "fcm_dpo/delta": -0.11032609641551971, + "fcm_dpo/margin": 144.7752227783203, + "fcm_dpo/q_t": 0.38641393184661865, + "grad_norm": 21.628904342651367, + "learning_rate": 4.249525076191759e-07, + "logits/chosen": -0.3304445743560791, + "logits/rejected": -0.32175442576408386, + "logps/chosen": -217.09906005859375, + "logps/ref_chosen": -58.000465393066406, + "logps/ref_rejected": -99.90291595458984, + "logps/rejected": -403.7767333984375, + "loss": 1.0326, + "margin_dpo/margin_mean": 144.7752227783203, + "margin_dpo/margin_std": 180.18701171875, + "step": 225 + }, + { + "KL/chosen_KL_mean": -133.54840087890625, + "KL/mean": -190.18936157226562, + "KL/rejected_KL_mean": -246.830322265625, + "KL/std": 143.92837524414062, + "epoch": 0.33186490455212925, + "fcm_dpo/beta": 0.003453510347753763, + "fcm_dpo/delta": 0.008469540625810623, + "fcm_dpo/margin": 113.28192901611328, + "fcm_dpo/q_t": 0.4127262234687805, + "grad_norm": 28.347190856933594, + "learning_rate": 4.2403348649073167e-07, + "logits/chosen": -0.396121621131897, + "logits/rejected": -0.3598354160785675, + "logps/chosen": -192.44720458984375, + "logps/ref_chosen": -58.898799896240234, + "logps/ref_rejected": -78.68775939941406, + "logps/rejected": -325.51806640625, + "loss": 1.1124, + "margin_dpo/margin_mean": 113.28193664550781, + "margin_dpo/margin_std": 167.6037139892578, + "step": 226 + }, + { + "KL/chosen_KL_mean": -153.62620544433594, + "KL/mean": -225.86712646484375, + "KL/rejected_KL_mean": -298.1080627441406, + "KL/std": 163.63131713867188, + "epoch": 0.3333333333333333, + "fcm_dpo/beta": 0.003411718178540468, + "fcm_dpo/delta": -0.09843979775905609, + "fcm_dpo/margin": 144.4818878173828, + "fcm_dpo/q_t": 0.38779282569885254, + "grad_norm": 21.415340423583984, + "learning_rate": 4.2310987941806615e-07, + "logits/chosen": -0.3376998007297516, + "logits/rejected": -0.3247716724872589, + "logps/chosen": -212.69837951660156, + "logps/ref_chosen": -59.072181701660156, + "logps/ref_rejected": -99.41236877441406, + "logps/rejected": -397.52044677734375, + "loss": 1.0302, + "margin_dpo/margin_mean": 144.4818878173828, + "margin_dpo/margin_std": 171.448486328125, + "step": 227 + }, + { + "KL/chosen_KL_mean": -157.6712646484375, + "KL/mean": -208.39749145507812, + "KL/rejected_KL_mean": -259.12371826171875, + "KL/std": 133.16519165039062, + "epoch": 0.33480176211453744, + "fcm_dpo/beta": 0.00343983992934227, + "fcm_dpo/delta": 0.05273807793855667, + "fcm_dpo/margin": 101.45245361328125, + "fcm_dpo/q_t": 0.4198562502861023, + "grad_norm": 22.723651885986328, + "learning_rate": 4.2218171073908463e-07, + "logits/chosen": -0.34845787286758423, + "logits/rejected": -0.3311355710029602, + "logps/chosen": -223.56256103515625, + "logps/ref_chosen": -65.89128875732422, + "logps/ref_rejected": -91.04875183105469, + "logps/rejected": -350.1724853515625, + "loss": 1.1416, + "margin_dpo/margin_mean": 101.45246887207031, + "margin_dpo/margin_std": 162.93731689453125, + "step": 228 + }, + { + "KL/chosen_KL_mean": -161.77908325195312, + "KL/mean": -216.16851806640625, + "KL/rejected_KL_mean": -270.5579528808594, + "KL/std": 153.45242309570312, + "epoch": 0.33627019089574156, + "fcm_dpo/beta": 0.0034589767456054688, + "fcm_dpo/delta": 0.024636760354042053, + "fcm_dpo/margin": 108.77888488769531, + "fcm_dpo/q_t": 0.41331931948661804, + "grad_norm": 30.321849822998047, + "learning_rate": 4.212490049118951e-07, + "logits/chosen": -0.4106701612472534, + "logits/rejected": -0.3795148730278015, + "logps/chosen": -232.48545837402344, + "logps/ref_chosen": -70.70637512207031, + "logps/ref_rejected": -84.52741241455078, + "logps/rejected": -355.08538818359375, + "loss": 1.1173, + "margin_dpo/margin_mean": 108.77888488769531, + "margin_dpo/margin_std": 161.33079528808594, + "step": 229 + }, + { + "KL/chosen_KL_mean": -125.57262420654297, + "KL/mean": -208.11363220214844, + "KL/rejected_KL_mean": -290.6546630859375, + "KL/std": 146.30148315429688, + "epoch": 0.3377386196769457, + "fcm_dpo/beta": 0.003373272018507123, + "fcm_dpo/delta": -0.16629549860954285, + "fcm_dpo/margin": 165.08203125, + "fcm_dpo/q_t": 0.3711177110671997, + "grad_norm": 28.437881469726562, + "learning_rate": 4.203117865141635e-07, + "logits/chosen": -0.31211984157562256, + "logits/rejected": -0.3166271448135376, + "logps/chosen": -164.85462951660156, + "logps/ref_chosen": -39.282005310058594, + "logps/ref_rejected": -85.62191009521484, + "logps/rejected": -376.27655029296875, + "loss": 0.9724, + "margin_dpo/margin_mean": 165.08203125, + "margin_dpo/margin_std": 161.2001495361328, + "step": 230 + }, + { + "KL/chosen_KL_mean": -148.6149444580078, + "KL/mean": -206.58450317382812, + "KL/rejected_KL_mean": -264.5540771484375, + "KL/std": 131.30169677734375, + "epoch": 0.3392070484581498, + "fcm_dpo/beta": 0.0033540253061801195, + "fcm_dpo/delta": 0.011585213243961334, + "fcm_dpo/margin": 115.93913269042969, + "fcm_dpo/q_t": 0.4116860628128052, + "grad_norm": 23.887901306152344, + "learning_rate": 4.1937008024246625e-07, + "logits/chosen": -0.3661789894104004, + "logits/rejected": -0.33695119619369507, + "logps/chosen": -211.89138793945312, + "logps/ref_chosen": -63.27644348144531, + "logps/ref_rejected": -74.1239013671875, + "logps/rejected": -338.677978515625, + "loss": 1.0947, + "margin_dpo/margin_mean": 115.93913269042969, + "margin_dpo/margin_std": 152.4516143798828, + "step": 231 + }, + { + "KL/chosen_KL_mean": -187.2882843017578, + "KL/mean": -234.39813232421875, + "KL/rejected_KL_mean": -281.5079650878906, + "KL/std": 158.7782745361328, + "epoch": 0.3406754772393539, + "fcm_dpo/beta": 0.0033917182590812445, + "fcm_dpo/delta": 0.08317073434591293, + "fcm_dpo/margin": 94.21968841552734, + "fcm_dpo/q_t": 0.4292943477630615, + "grad_norm": 25.093761444091797, + "learning_rate": 4.1842391091163933e-07, + "logits/chosen": -0.35640761256217957, + "logits/rejected": -0.33361750841140747, + "logps/chosen": -258.03704833984375, + "logps/ref_chosen": -70.74876403808594, + "logps/ref_rejected": -83.97706604003906, + "logps/rejected": -365.48504638671875, + "loss": 1.1629, + "margin_dpo/margin_mean": 94.21968078613281, + "margin_dpo/margin_std": 164.27349853515625, + "step": 232 + }, + { + "KL/chosen_KL_mean": -170.53353881835938, + "KL/mean": -244.69998168945312, + "KL/rejected_KL_mean": -318.866455078125, + "KL/std": 167.12754821777344, + "epoch": 0.342143906020558, + "fcm_dpo/beta": 0.003362037241458893, + "fcm_dpo/delta": -0.10381458699703217, + "fcm_dpo/margin": 148.33291625976562, + "fcm_dpo/q_t": 0.3922462463378906, + "grad_norm": 27.809114456176758, + "learning_rate": 4.174733034541245e-07, + "logits/chosen": -0.3662954270839691, + "logits/rejected": -0.37061402201652527, + "logps/chosen": -225.41647338867188, + "logps/ref_chosen": -54.8829345703125, + "logps/ref_rejected": -107.4800796508789, + "logps/rejected": -426.3465270996094, + "loss": 1.0649, + "margin_dpo/margin_mean": 148.33291625976562, + "margin_dpo/margin_std": 215.14837646484375, + "step": 233 + }, + { + "KL/chosen_KL_mean": -167.3464813232422, + "KL/mean": -245.2230987548828, + "KL/rejected_KL_mean": -323.0997009277344, + "KL/std": 150.866455078125, + "epoch": 0.3436123348017621, + "fcm_dpo/beta": 0.003259950317442417, + "fcm_dpo/delta": -0.11555645614862442, + "fcm_dpo/margin": 155.75323486328125, + "fcm_dpo/q_t": 0.3844042122364044, + "grad_norm": 36.372398376464844, + "learning_rate": 4.165182829193126e-07, + "logits/chosen": -0.3135479688644409, + "logits/rejected": -0.34066638350486755, + "logps/chosen": -211.44100952148438, + "logps/ref_chosen": -44.094520568847656, + "logps/ref_rejected": -100.00663757324219, + "logps/rejected": -423.1063232421875, + "loss": 1.0153, + "margin_dpo/margin_mean": 155.75323486328125, + "margin_dpo/margin_std": 169.97021484375, + "step": 234 + }, + { + "KL/chosen_KL_mean": -198.0821990966797, + "KL/mean": -247.19805908203125, + "KL/rejected_KL_mean": -296.31390380859375, + "KL/std": 143.0350341796875, + "epoch": 0.34508076358296624, + "fcm_dpo/beta": 0.0033134431578218937, + "fcm_dpo/delta": 0.07656269520521164, + "fcm_dpo/margin": 98.23170471191406, + "fcm_dpo/q_t": 0.42514321208000183, + "grad_norm": 27.246450424194336, + "learning_rate": 4.1555887447288255e-07, + "logits/chosen": -0.38035351037979126, + "logits/rejected": -0.3624608516693115, + "logps/chosen": -260.32012939453125, + "logps/ref_chosen": -62.237911224365234, + "logps/ref_rejected": -90.39506530761719, + "logps/rejected": -386.708984375, + "loss": 1.1651, + "margin_dpo/margin_mean": 98.2317123413086, + "margin_dpo/margin_std": 172.09481811523438, + "step": 235 + }, + { + "KL/chosen_KL_mean": -141.14572143554688, + "KL/mean": -220.7097930908203, + "KL/rejected_KL_mean": -300.2738952636719, + "KL/std": 150.90029907226562, + "epoch": 0.3465491923641703, + "fcm_dpo/beta": 0.003255967516452074, + "fcm_dpo/delta": -0.12453138083219528, + "fcm_dpo/margin": 159.128173828125, + "fcm_dpo/q_t": 0.3787830173969269, + "grad_norm": 40.99539566040039, + "learning_rate": 4.1459510339613946e-07, + "logits/chosen": -0.3324674963951111, + "logits/rejected": -0.33202531933784485, + "logps/chosen": -190.48707580566406, + "logps/ref_chosen": -49.34136199951172, + "logps/ref_rejected": -103.51162719726562, + "logps/rejected": -403.7855224609375, + "loss": 0.9848, + "margin_dpo/margin_mean": 159.128173828125, + "margin_dpo/margin_std": 143.73922729492188, + "step": 236 + }, + { + "KL/chosen_KL_mean": -186.3697509765625, + "KL/mean": -246.17672729492188, + "KL/rejected_KL_mean": -305.98370361328125, + "KL/std": 150.6981964111328, + "epoch": 0.34801762114537443, + "fcm_dpo/beta": 0.0032444519456475973, + "fcm_dpo/delta": 0.012344859540462494, + "fcm_dpo/margin": 119.61395263671875, + "fcm_dpo/q_t": 0.411772221326828, + "grad_norm": 26.916481018066406, + "learning_rate": 4.136269950853473e-07, + "logits/chosen": -0.39190009236335754, + "logits/rejected": -0.38742589950561523, + "logps/chosen": -240.53787231445312, + "logps/ref_chosen": -54.168121337890625, + "logps/ref_rejected": -94.78036499023438, + "logps/rejected": -400.7640686035156, + "loss": 1.1085, + "margin_dpo/margin_mean": 119.61394500732422, + "margin_dpo/margin_std": 173.78614807128906, + "step": 237 + }, + { + "KL/chosen_KL_mean": -164.6402587890625, + "KL/mean": -224.44186401367188, + "KL/rejected_KL_mean": -284.2435302734375, + "KL/std": 150.41282653808594, + "epoch": 0.34948604992657856, + "fcm_dpo/beta": 0.003233974566683173, + "fcm_dpo/delta": 0.013382863253355026, + "fcm_dpo/margin": 119.60325622558594, + "fcm_dpo/q_t": 0.41353076696395874, + "grad_norm": 22.910091400146484, + "learning_rate": 4.126545750510605e-07, + "logits/chosen": -0.3664902448654175, + "logits/rejected": -0.38237977027893066, + "logps/chosen": -218.61337280273438, + "logps/ref_chosen": -53.973121643066406, + "logps/ref_rejected": -89.41795349121094, + "logps/rejected": -373.66143798828125, + "loss": 1.1035, + "margin_dpo/margin_mean": 119.60325622558594, + "margin_dpo/margin_std": 166.632080078125, + "step": 238 + }, + { + "KL/chosen_KL_mean": -173.09349060058594, + "KL/mean": -240.07090759277344, + "KL/rejected_KL_mean": -307.04833984375, + "KL/std": 139.98458862304688, + "epoch": 0.3509544787077827, + "fcm_dpo/beta": 0.003213751595467329, + "fcm_dpo/delta": -0.03296435624361038, + "fcm_dpo/margin": 133.9548797607422, + "fcm_dpo/q_t": 0.40008848905563354, + "grad_norm": 43.435367584228516, + "learning_rate": 4.116778689174514e-07, + "logits/chosen": -0.357890248298645, + "logits/rejected": -0.34496229887008667, + "logps/chosen": -231.19131469726562, + "logps/ref_chosen": -58.09782409667969, + "logps/ref_rejected": -93.59294128417969, + "logps/rejected": -400.64129638671875, + "loss": 1.0658, + "margin_dpo/margin_mean": 133.9548797607422, + "margin_dpo/margin_std": 157.7518310546875, + "step": 239 + }, + { + "KL/chosen_KL_mean": -189.6949462890625, + "KL/mean": -246.00955200195312, + "KL/rejected_KL_mean": -302.32415771484375, + "KL/std": 153.5874786376953, + "epoch": 0.3524229074889868, + "fcm_dpo/beta": 0.0032359175384044647, + "fcm_dpo/delta": 0.03676654398441315, + "fcm_dpo/margin": 112.62922668457031, + "fcm_dpo/q_t": 0.41720300912857056, + "grad_norm": 38.87122344970703, + "learning_rate": 4.106969024216348e-07, + "logits/chosen": -0.3707585334777832, + "logits/rejected": -0.3482241630554199, + "logps/chosen": -250.30943298339844, + "logps/ref_chosen": -60.6144905090332, + "logps/ref_rejected": -74.1185302734375, + "logps/rejected": -376.44268798828125, + "loss": 1.14, + "margin_dpo/margin_mean": 112.62922668457031, + "margin_dpo/margin_std": 183.985595703125, + "step": 240 + }, + { + "KL/chosen_KL_mean": -162.46920776367188, + "KL/mean": -250.920166015625, + "KL/rejected_KL_mean": -339.3711242675781, + "KL/std": 172.28445434570312, + "epoch": 0.35389133627019087, + "fcm_dpo/beta": 0.0031772879883646965, + "fcm_dpo/delta": -0.1714785099029541, + "fcm_dpo/margin": 176.90191650390625, + "fcm_dpo/q_t": 0.3749390244483948, + "grad_norm": 22.715147018432617, + "learning_rate": 4.097117014129903e-07, + "logits/chosen": -0.41761964559555054, + "logits/rejected": -0.39237093925476074, + "logps/chosen": -228.56027221679688, + "logps/ref_chosen": -66.091064453125, + "logps/ref_rejected": -88.06088256835938, + "logps/rejected": -427.4320068359375, + "loss": 0.992, + "margin_dpo/margin_mean": 176.90191650390625, + "margin_dpo/margin_std": 199.16400146484375, + "step": 241 + }, + { + "KL/chosen_KL_mean": -190.58236694335938, + "KL/mean": -253.82553100585938, + "KL/rejected_KL_mean": -317.0687255859375, + "KL/std": 150.39723205566406, + "epoch": 0.355359765051395, + "fcm_dpo/beta": 0.003157552797347307, + "fcm_dpo/delta": 0.00024249032139778137, + "fcm_dpo/margin": 126.4863510131836, + "fcm_dpo/q_t": 0.4103991985321045, + "grad_norm": 31.792686462402344, + "learning_rate": 4.087222918524807e-07, + "logits/chosen": -0.35047098994255066, + "logits/rejected": -0.32680755853652954, + "logps/chosen": -258.4462890625, + "logps/ref_chosen": -67.86392974853516, + "logps/ref_rejected": -83.36033630371094, + "logps/rejected": -400.4290771484375, + "loss": 1.1001, + "margin_dpo/margin_mean": 126.48635864257812, + "margin_dpo/margin_std": 179.0635986328125, + "step": 242 + }, + { + "KL/chosen_KL_mean": -187.70462036132812, + "KL/mean": -265.26544189453125, + "KL/rejected_KL_mean": -342.8262634277344, + "KL/std": 165.91262817382812, + "epoch": 0.3568281938325991, + "fcm_dpo/beta": 0.0030988508369773626, + "fcm_dpo/delta": -0.08498271554708481, + "fcm_dpo/margin": 155.12164306640625, + "fcm_dpo/q_t": 0.3906670808792114, + "grad_norm": 23.673492431640625, + "learning_rate": 4.07728699811968e-07, + "logits/chosen": -0.37325674295425415, + "logits/rejected": -0.3453625440597534, + "logps/chosen": -250.7888641357422, + "logps/ref_chosen": -63.0842399597168, + "logps/ref_rejected": -76.33563232421875, + "logps/rejected": -419.1618957519531, + "loss": 1.0381, + "margin_dpo/margin_mean": 155.12164306640625, + "margin_dpo/margin_std": 187.38595581054688, + "step": 243 + }, + { + "KL/chosen_KL_mean": -165.96322631835938, + "KL/mean": -245.95059204101562, + "KL/rejected_KL_mean": -325.93792724609375, + "KL/std": 159.3692626953125, + "epoch": 0.35829662261380324, + "fcm_dpo/beta": 0.0030439933761954308, + "fcm_dpo/delta": -0.09161465615034103, + "fcm_dpo/margin": 159.97470092773438, + "fcm_dpo/q_t": 0.38761717081069946, + "grad_norm": 31.374834060668945, + "learning_rate": 4.067309514735267e-07, + "logits/chosen": -0.42891860008239746, + "logits/rejected": -0.42122605443000793, + "logps/chosen": -227.1039276123047, + "logps/ref_chosen": -61.140689849853516, + "logps/ref_rejected": -94.89193725585938, + "logps/rejected": -420.82989501953125, + "loss": 1.0162, + "margin_dpo/margin_mean": 159.97470092773438, + "margin_dpo/margin_std": 164.85641479492188, + "step": 244 + }, + { + "KL/chosen_KL_mean": -186.3873291015625, + "KL/mean": -248.48068237304688, + "KL/rejected_KL_mean": -310.57403564453125, + "KL/std": 153.88400268554688, + "epoch": 0.35976505139500736, + "fcm_dpo/beta": 0.0030218339525163174, + "fcm_dpo/delta": 0.02460547536611557, + "fcm_dpo/margin": 124.18669128417969, + "fcm_dpo/q_t": 0.4137793779373169, + "grad_norm": 26.153411865234375, + "learning_rate": 4.057290731287531e-07, + "logits/chosen": -0.4331769049167633, + "logits/rejected": -0.40836483240127563, + "logps/chosen": -253.64962768554688, + "logps/ref_chosen": -67.26228332519531, + "logps/ref_rejected": -87.64010620117188, + "logps/rejected": -398.214111328125, + "loss": 1.1139, + "margin_dpo/margin_mean": 124.18669128417969, + "margin_dpo/margin_std": 170.16293334960938, + "step": 245 + }, + { + "KL/chosen_KL_mean": -182.14895629882812, + "KL/mean": -245.166015625, + "KL/rejected_KL_mean": -308.18310546875, + "KL/std": 170.09425354003906, + "epoch": 0.36123348017621143, + "fcm_dpo/beta": 0.003048623912036419, + "fcm_dpo/delta": 0.016290059313178062, + "fcm_dpo/margin": 126.03412628173828, + "fcm_dpo/q_t": 0.4134928584098816, + "grad_norm": 24.42864418029785, + "learning_rate": 4.047230911780736e-07, + "logits/chosen": -0.440762996673584, + "logits/rejected": -0.40071290731430054, + "logps/chosen": -248.84593200683594, + "logps/ref_chosen": -66.69696807861328, + "logps/ref_rejected": -84.34634399414062, + "logps/rejected": -392.5294189453125, + "loss": 1.109, + "margin_dpo/margin_mean": 126.03411865234375, + "margin_dpo/margin_std": 182.46896362304688, + "step": 246 + }, + { + "KL/chosen_KL_mean": -209.86624145507812, + "KL/mean": -298.015625, + "KL/rejected_KL_mean": -386.1650390625, + "KL/std": 176.03521728515625, + "epoch": 0.36270190895741555, + "fcm_dpo/beta": 0.002994304057210684, + "fcm_dpo/delta": -0.13526105880737305, + "fcm_dpo/margin": 176.29876708984375, + "fcm_dpo/q_t": 0.3789059519767761, + "grad_norm": 33.18147277832031, + "learning_rate": 4.0371303213004814e-07, + "logits/chosen": -0.3571593761444092, + "logits/rejected": -0.355099618434906, + "logps/chosen": -266.4715881347656, + "logps/ref_chosen": -56.6053466796875, + "logps/ref_rejected": -106.29326629638672, + "logps/rejected": -492.45831298828125, + "loss": 1.0026, + "margin_dpo/margin_mean": 176.29876708984375, + "margin_dpo/margin_std": 190.67379760742188, + "step": 247 + }, + { + "KL/chosen_KL_mean": -180.16818237304688, + "KL/mean": -257.116455078125, + "KL/rejected_KL_mean": -334.06475830078125, + "KL/std": 142.95608520507812, + "epoch": 0.3641703377386197, + "fcm_dpo/beta": 0.0029416182078421116, + "fcm_dpo/delta": -0.055744655430316925, + "fcm_dpo/margin": 153.89654541015625, + "fcm_dpo/q_t": 0.3923521637916565, + "grad_norm": 21.914152145385742, + "learning_rate": 4.0269892260067197e-07, + "logits/chosen": -0.3768647313117981, + "logits/rejected": -0.3953893482685089, + "logps/chosen": -224.21139526367188, + "logps/ref_chosen": -44.043216705322266, + "logps/ref_rejected": -91.85687255859375, + "logps/rejected": -425.921630859375, + "loss": 1.0231, + "margin_dpo/margin_mean": 153.89654541015625, + "margin_dpo/margin_std": 135.817138671875, + "step": 248 + }, + { + "KL/chosen_KL_mean": -235.70599365234375, + "KL/mean": -278.174560546875, + "KL/rejected_KL_mean": -320.6430969238281, + "KL/std": 158.992431640625, + "epoch": 0.3656387665198238, + "fcm_dpo/beta": 0.003009880194440484, + "fcm_dpo/delta": 0.1481824517250061, + "fcm_dpo/margin": 84.93710327148438, + "fcm_dpo/q_t": 0.4420696496963501, + "grad_norm": 31.89614486694336, + "learning_rate": 4.0168078931267426e-07, + "logits/chosen": -0.4115716814994812, + "logits/rejected": -0.38695603609085083, + "logps/chosen": -298.1483459472656, + "logps/ref_chosen": -62.442352294921875, + "logps/ref_rejected": -80.46806335449219, + "logps/rejected": -401.11114501953125, + "loss": 1.2227, + "margin_dpo/margin_mean": 84.93710327148438, + "margin_dpo/margin_std": 187.25634765625, + "step": 249 + }, + { + "KL/chosen_KL_mean": -207.01589965820312, + "KL/mean": -286.22161865234375, + "KL/rejected_KL_mean": -365.4273681640625, + "KL/std": 158.22332763671875, + "epoch": 0.3671071953010279, + "fcm_dpo/beta": 0.0030103102326393127, + "fcm_dpo/delta": -0.08075070381164551, + "fcm_dpo/margin": 158.41148376464844, + "fcm_dpo/q_t": 0.3886602520942688, + "grad_norm": 30.4766845703125, + "learning_rate": 4.006586590948141e-07, + "logits/chosen": -0.39069664478302, + "logits/rejected": -0.3340034484863281, + "logps/chosen": -272.652587890625, + "logps/ref_chosen": -65.63668823242188, + "logps/ref_rejected": -73.87184143066406, + "logps/rejected": -439.2991943359375, + "loss": 1.0195, + "margin_dpo/margin_mean": 158.41148376464844, + "margin_dpo/margin_std": 157.6683349609375, + "step": 250 + }, + { + "KL/chosen_KL_mean": -221.11842346191406, + "KL/mean": -274.54730224609375, + "KL/rejected_KL_mean": -327.97613525390625, + "KL/std": 165.54885864257812, + "epoch": 0.368575624082232, + "fcm_dpo/beta": 0.0030130401719361544, + "fcm_dpo/delta": 0.08070008456707001, + "fcm_dpo/margin": 106.85773468017578, + "fcm_dpo/q_t": 0.42667731642723083, + "grad_norm": 35.921974182128906, + "learning_rate": 3.9963255888117325e-07, + "logits/chosen": -0.3663170635700226, + "logits/rejected": -0.33492955565452576, + "logps/chosen": -278.3011474609375, + "logps/ref_chosen": -57.182716369628906, + "logps/ref_rejected": -77.66343688964844, + "logps/rejected": -405.63958740234375, + "loss": 1.1604, + "margin_dpo/margin_mean": 106.85773468017578, + "margin_dpo/margin_std": 176.058837890625, + "step": 251 + }, + { + "KL/chosen_KL_mean": -219.7275390625, + "KL/mean": -291.9000549316406, + "KL/rejected_KL_mean": -364.07257080078125, + "KL/std": 145.91665649414062, + "epoch": 0.3700440528634361, + "fcm_dpo/beta": 0.0030095637775957584, + "fcm_dpo/delta": -0.036197420209646225, + "fcm_dpo/margin": 144.3450164794922, + "fcm_dpo/q_t": 0.39602023363113403, + "grad_norm": 25.36005210876465, + "learning_rate": 3.9860251571044666e-07, + "logits/chosen": -0.4368041753768921, + "logits/rejected": -0.3982187509536743, + "logps/chosen": -291.4132080078125, + "logps/ref_chosen": -71.68563842773438, + "logps/ref_rejected": -84.75799560546875, + "logps/rejected": -448.83056640625, + "loss": 1.046, + "margin_dpo/margin_mean": 144.34500122070312, + "margin_dpo/margin_std": 146.50564575195312, + "step": 252 + }, + { + "KL/chosen_KL_mean": -187.4129180908203, + "KL/mean": -257.6170349121094, + "KL/rejected_KL_mean": -327.8211364746094, + "KL/std": 155.3035125732422, + "epoch": 0.37151248164464024, + "fcm_dpo/beta": 0.0030172369442880154, + "fcm_dpo/delta": -0.025195002555847168, + "fcm_dpo/margin": 140.40821838378906, + "fcm_dpo/q_t": 0.40122461318969727, + "grad_norm": 23.17310333251953, + "learning_rate": 3.9756855672522986e-07, + "logits/chosen": -0.42870020866394043, + "logits/rejected": -0.42114484310150146, + "logps/chosen": -256.5468444824219, + "logps/ref_chosen": -69.1339340209961, + "logps/ref_rejected": -98.70252990722656, + "logps/rejected": -426.523681640625, + "loss": 1.0717, + "margin_dpo/margin_mean": 140.40821838378906, + "margin_dpo/margin_std": 169.84083557128906, + "step": 253 + }, + { + "KL/chosen_KL_mean": -176.9703369140625, + "KL/mean": -236.77066040039062, + "KL/rejected_KL_mean": -296.5709533691406, + "KL/std": 159.91812133789062, + "epoch": 0.37298091042584436, + "fcm_dpo/beta": 0.003011333290487528, + "fcm_dpo/delta": 0.041351526975631714, + "fcm_dpo/margin": 119.60062408447266, + "fcm_dpo/q_t": 0.42020976543426514, + "grad_norm": 20.98316192626953, + "learning_rate": 3.965307091713037e-07, + "logits/chosen": -0.4022292196750641, + "logits/rejected": -0.3870220184326172, + "logps/chosen": -231.12533569335938, + "logps/ref_chosen": -54.154998779296875, + "logps/ref_rejected": -90.30764770507812, + "logps/rejected": -386.87860107421875, + "loss": 1.1406, + "margin_dpo/margin_mean": 119.60063171386719, + "margin_dpo/margin_std": 200.53207397460938, + "step": 254 + }, + { + "KL/chosen_KL_mean": -176.31068420410156, + "KL/mean": -240.4302215576172, + "KL/rejected_KL_mean": -304.54974365234375, + "KL/std": 140.88638305664062, + "epoch": 0.3744493392070485, + "fcm_dpo/beta": 0.003016393631696701, + "fcm_dpo/delta": 0.013489855453372002, + "fcm_dpo/margin": 128.23907470703125, + "fcm_dpo/q_t": 0.4103944003582001, + "grad_norm": 21.49859046936035, + "learning_rate": 3.954890003969163e-07, + "logits/chosen": -0.3628276288509369, + "logits/rejected": -0.3527315855026245, + "logps/chosen": -233.45236206054688, + "logps/ref_chosen": -57.14167022705078, + "logps/ref_rejected": -90.2085952758789, + "logps/rejected": -394.7583312988281, + "loss": 1.1055, + "margin_dpo/margin_mean": 128.23907470703125, + "margin_dpo/margin_std": 176.99844360351562, + "step": 255 + }, + { + "KL/chosen_KL_mean": -149.53353881835938, + "KL/mean": -217.53964233398438, + "KL/rejected_KL_mean": -285.5457458496094, + "KL/std": 150.42579650878906, + "epoch": 0.37591776798825255, + "fcm_dpo/beta": 0.0030256398022174835, + "fcm_dpo/delta": -0.012017881497740746, + "fcm_dpo/margin": 136.01217651367188, + "fcm_dpo/q_t": 0.4049100875854492, + "grad_norm": 39.732818603515625, + "learning_rate": 3.944434578520628e-07, + "logits/chosen": -0.3158034384250641, + "logits/rejected": -0.3230699896812439, + "logps/chosen": -204.697021484375, + "logps/ref_chosen": -55.163490295410156, + "logps/ref_rejected": -92.56291961669922, + "logps/rejected": -378.108642578125, + "loss": 1.0802, + "margin_dpo/margin_mean": 136.01217651367188, + "margin_dpo/margin_std": 172.76388549804688, + "step": 256 + }, + { + "KL/chosen_KL_mean": -146.291259765625, + "KL/mean": -217.93704223632812, + "KL/rejected_KL_mean": -289.58282470703125, + "KL/std": 154.93496704101562, + "epoch": 0.37738619676945667, + "fcm_dpo/beta": 0.0029899184592068195, + "fcm_dpo/delta": -0.030670955777168274, + "fcm_dpo/margin": 143.2915802001953, + "fcm_dpo/q_t": 0.4019385874271393, + "grad_norm": 23.517911911010742, + "learning_rate": 3.933941090877615e-07, + "logits/chosen": -0.36811453104019165, + "logits/rejected": -0.354714035987854, + "logps/chosen": -195.7149658203125, + "logps/ref_chosen": -49.42369842529297, + "logps/ref_rejected": -79.53791809082031, + "logps/rejected": -369.1207580566406, + "loss": 1.0716, + "margin_dpo/margin_mean": 143.2915802001953, + "margin_dpo/margin_std": 175.0234375, + "step": 257 + }, + { + "KL/chosen_KL_mean": -193.3330841064453, + "KL/mean": -262.8382568359375, + "KL/rejected_KL_mean": -332.3433837890625, + "KL/std": 162.82940673828125, + "epoch": 0.3788546255506608, + "fcm_dpo/beta": 0.0030003516003489494, + "fcm_dpo/delta": -0.01784433051943779, + "fcm_dpo/margin": 139.01031494140625, + "fcm_dpo/q_t": 0.4030870795249939, + "grad_norm": 28.047407150268555, + "learning_rate": 3.923409817553284e-07, + "logits/chosen": -0.3141087591648102, + "logits/rejected": -0.3110647201538086, + "logps/chosen": -252.7172088623047, + "logps/ref_chosen": -59.384124755859375, + "logps/ref_rejected": -95.99010467529297, + "logps/rejected": -428.33349609375, + "loss": 1.0907, + "margin_dpo/margin_mean": 139.01031494140625, + "margin_dpo/margin_std": 193.4696502685547, + "step": 258 + }, + { + "KL/chosen_KL_mean": -188.4984130859375, + "KL/mean": -248.42568969726562, + "KL/rejected_KL_mean": -308.3529968261719, + "KL/std": 159.6024169921875, + "epoch": 0.3803230543318649, + "fcm_dpo/beta": 0.0030027367174625397, + "fcm_dpo/delta": 0.04155043140053749, + "fcm_dpo/margin": 119.85458374023438, + "fcm_dpo/q_t": 0.417955607175827, + "grad_norm": 27.507400512695312, + "learning_rate": 3.9128410360564793e-07, + "logits/chosen": -0.3825646936893463, + "logits/rejected": -0.3810487985610962, + "logps/chosen": -241.32675170898438, + "logps/ref_chosen": -52.828346252441406, + "logps/ref_rejected": -89.191650390625, + "logps/rejected": -397.5446472167969, + "loss": 1.1286, + "margin_dpo/margin_mean": 119.85458374023438, + "margin_dpo/margin_std": 180.78173828125, + "step": 259 + }, + { + "KL/chosen_KL_mean": -193.53997802734375, + "KL/mean": -273.78662109375, + "KL/rejected_KL_mean": -354.0333251953125, + "KL/std": 163.99403381347656, + "epoch": 0.38179148311306904, + "fcm_dpo/beta": 0.00299159437417984, + "fcm_dpo/delta": -0.0841422975063324, + "fcm_dpo/margin": 160.49334716796875, + "fcm_dpo/q_t": 0.390764981508255, + "grad_norm": 26.93035125732422, + "learning_rate": 3.9022350248844246e-07, + "logits/chosen": -0.33758312463760376, + "logits/rejected": -0.35471126437187195, + "logps/chosen": -240.9576416015625, + "logps/ref_chosen": -47.41767501831055, + "logps/ref_rejected": -95.08978271484375, + "logps/rejected": -449.12310791015625, + "loss": 1.0251, + "margin_dpo/margin_mean": 160.49334716796875, + "margin_dpo/margin_std": 176.1913604736328, + "step": 260 + }, + { + "KL/chosen_KL_mean": -203.53651428222656, + "KL/mean": -278.8546142578125, + "KL/rejected_KL_mean": -354.1727294921875, + "KL/std": 180.8629150390625, + "epoch": 0.3832599118942731, + "fcm_dpo/beta": 0.0029416559264063835, + "fcm_dpo/delta": -0.04529657959938049, + "fcm_dpo/margin": 150.63624572753906, + "fcm_dpo/q_t": 0.39985257387161255, + "grad_norm": 23.40955924987793, + "learning_rate": 3.891592063515376e-07, + "logits/chosen": -0.30870985984802246, + "logits/rejected": -0.3094845414161682, + "logps/chosen": -256.5679016113281, + "logps/ref_chosen": -53.03137969970703, + "logps/ref_rejected": -88.51494598388672, + "logps/rejected": -442.68768310546875, + "loss": 1.0729, + "margin_dpo/margin_mean": 150.63623046875, + "margin_dpo/margin_std": 203.27151489257812, + "step": 261 + }, + { + "KL/chosen_KL_mean": -243.448974609375, + "KL/mean": -305.376708984375, + "KL/rejected_KL_mean": -367.30438232421875, + "KL/std": 165.85348510742188, + "epoch": 0.38472834067547723, + "fcm_dpo/beta": 0.002966498024761677, + "fcm_dpo/delta": 0.03342254459857941, + "fcm_dpo/margin": 123.85542297363281, + "fcm_dpo/q_t": 0.41466158628463745, + "grad_norm": 25.519702911376953, + "learning_rate": 3.880912432401264e-07, + "logits/chosen": -0.29800450801849365, + "logits/rejected": -0.2692173719406128, + "logps/chosen": -303.0691223144531, + "logps/ref_chosen": -59.620140075683594, + "logps/ref_rejected": -86.41853332519531, + "logps/rejected": -453.7229309082031, + "loss": 1.108, + "margin_dpo/margin_mean": 123.85542297363281, + "margin_dpo/margin_std": 163.9331512451172, + "step": 262 + }, + { + "KL/chosen_KL_mean": -228.96612548828125, + "KL/mean": -317.9360046386719, + "KL/rejected_KL_mean": -406.9059143066406, + "KL/std": 197.06179809570312, + "epoch": 0.38619676945668135, + "fcm_dpo/beta": 0.0028949188999831676, + "fcm_dpo/delta": -0.12226266413927078, + "fcm_dpo/margin": 177.93978881835938, + "fcm_dpo/q_t": 0.38297536969184875, + "grad_norm": 23.3193302154541, + "learning_rate": 3.870196412960302e-07, + "logits/chosen": -0.3331921398639679, + "logits/rejected": -0.30877092480659485, + "logps/chosen": -288.3870849609375, + "logps/ref_chosen": -59.42094421386719, + "logps/ref_rejected": -96.85720825195312, + "logps/rejected": -503.76312255859375, + "loss": 1.0198, + "margin_dpo/margin_mean": 177.93978881835938, + "margin_dpo/margin_std": 208.54910278320312, + "step": 263 + }, + { + "KL/chosen_KL_mean": -242.01239013671875, + "KL/mean": -317.7347412109375, + "KL/rejected_KL_mean": -393.45709228515625, + "KL/std": 182.8231201171875, + "epoch": 0.3876651982378855, + "fcm_dpo/beta": 0.002853479702025652, + "fcm_dpo/delta": -0.03490894287824631, + "fcm_dpo/margin": 151.4447021484375, + "fcm_dpo/q_t": 0.4024280309677124, + "grad_norm": 27.41741180419922, + "learning_rate": 3.8594442875695665e-07, + "logits/chosen": -0.3761428892612457, + "logits/rejected": -0.36597341299057007, + "logps/chosen": -304.7344665527344, + "logps/ref_chosen": -62.722084045410156, + "logps/ref_rejected": -93.85620880126953, + "logps/rejected": -487.31329345703125, + "loss": 1.0845, + "margin_dpo/margin_mean": 151.4447021484375, + "margin_dpo/margin_std": 206.7536163330078, + "step": 264 + }, + { + "KL/chosen_KL_mean": -260.0290832519531, + "KL/mean": -333.6810302734375, + "KL/rejected_KL_mean": -407.3330383300781, + "KL/std": 206.29835510253906, + "epoch": 0.3891336270190896, + "fcm_dpo/beta": 0.0028575900942087173, + "fcm_dpo/delta": -0.021879900246858597, + "fcm_dpo/margin": 147.30393981933594, + "fcm_dpo/q_t": 0.4068824350833893, + "grad_norm": 25.906906127929688, + "learning_rate": 3.848656339557562e-07, + "logits/chosen": -0.3324648439884186, + "logits/rejected": -0.3139057755470276, + "logps/chosen": -322.00054931640625, + "logps/ref_chosen": -61.971466064453125, + "logps/ref_rejected": -88.02059936523438, + "logps/rejected": -495.3536376953125, + "loss": 1.1181, + "margin_dpo/margin_mean": 147.30393981933594, + "margin_dpo/margin_std": 244.15444946289062, + "step": 265 + }, + { + "KL/chosen_KL_mean": -260.84808349609375, + "KL/mean": -320.35455322265625, + "KL/rejected_KL_mean": -379.8610534667969, + "KL/std": 171.5772247314453, + "epoch": 0.39060205580029367, + "fcm_dpo/beta": 0.0028820079751312733, + "fcm_dpo/delta": 0.059021368622779846, + "fcm_dpo/margin": 119.01298522949219, + "fcm_dpo/q_t": 0.4227384924888611, + "grad_norm": 65.29761505126953, + "learning_rate": 3.8378328531967507e-07, + "logits/chosen": -0.3929414451122284, + "logits/rejected": -0.3485182523727417, + "logps/chosen": -327.94775390625, + "logps/ref_chosen": -67.09967041015625, + "logps/ref_rejected": -67.97122192382812, + "logps/rejected": -447.832275390625, + "loss": 1.1495, + "margin_dpo/margin_mean": 119.01298522949219, + "margin_dpo/margin_std": 200.31187438964844, + "step": 266 + }, + { + "KL/chosen_KL_mean": -230.92379760742188, + "KL/mean": -304.11248779296875, + "KL/rejected_KL_mean": -377.3011474609375, + "KL/std": 182.7541961669922, + "epoch": 0.3920704845814978, + "fcm_dpo/beta": 0.002870975062251091, + "fcm_dpo/delta": -0.021463816985487938, + "fcm_dpo/margin": 146.3773193359375, + "fcm_dpo/q_t": 0.40422123670578003, + "grad_norm": 41.96255874633789, + "learning_rate": 3.8269741136960646e-07, + "logits/chosen": -0.3992302715778351, + "logits/rejected": -0.36245715618133545, + "logps/chosen": -299.8945617675781, + "logps/ref_chosen": -68.97075653076172, + "logps/ref_rejected": -90.16844940185547, + "logps/rejected": -467.4696044921875, + "loss": 1.0898, + "margin_dpo/margin_mean": 146.3773193359375, + "margin_dpo/margin_std": 205.5968780517578, + "step": 267 + }, + { + "KL/chosen_KL_mean": -235.22242736816406, + "KL/mean": -306.08172607421875, + "KL/rejected_KL_mean": -376.9410400390625, + "KL/std": 170.38865661621094, + "epoch": 0.3935389133627019, + "fcm_dpo/beta": 0.002865626011043787, + "fcm_dpo/delta": -0.006617257371544838, + "fcm_dpo/margin": 141.7186279296875, + "fcm_dpo/q_t": 0.4083732068538666, + "grad_norm": 30.43846321105957, + "learning_rate": 3.8160804071933894e-07, + "logits/chosen": -0.37674716114997864, + "logits/rejected": -0.3824624717235565, + "logps/chosen": -291.12274169921875, + "logps/ref_chosen": -55.90031051635742, + "logps/ref_rejected": -101.64763641357422, + "logps/rejected": -478.58868408203125, + "loss": 1.0987, + "margin_dpo/margin_mean": 141.71861267089844, + "margin_dpo/margin_std": 204.48934936523438, + "step": 268 + }, + { + "KL/chosen_KL_mean": -245.32723999023438, + "KL/mean": -330.68280029296875, + "KL/rejected_KL_mean": -416.038330078125, + "KL/std": 177.90664672851562, + "epoch": 0.39500734214390604, + "fcm_dpo/beta": 0.002839939435943961, + "fcm_dpo/delta": -0.08905084431171417, + "fcm_dpo/margin": 170.71112060546875, + "fcm_dpo/q_t": 0.3899012804031372, + "grad_norm": 27.612655639648438, + "learning_rate": 3.8051520207480204e-07, + "logits/chosen": -0.45941269397735596, + "logits/rejected": -0.4373210668563843, + "logps/chosen": -315.3667907714844, + "logps/ref_chosen": -70.03955841064453, + "logps/ref_rejected": -107.34937286376953, + "logps/rejected": -523.3876953125, + "loss": 1.0488, + "margin_dpo/margin_mean": 170.71112060546875, + "margin_dpo/margin_std": 219.97512817382812, + "step": 269 + }, + { + "KL/chosen_KL_mean": -226.80136108398438, + "KL/mean": -291.0020751953125, + "KL/rejected_KL_mean": -355.2027893066406, + "KL/std": 163.08633422851562, + "epoch": 0.3964757709251101, + "fcm_dpo/beta": 0.002845948562026024, + "fcm_dpo/delta": 0.03565208241343498, + "fcm_dpo/margin": 128.40142822265625, + "fcm_dpo/q_t": 0.41619110107421875, + "grad_norm": 45.69468688964844, + "learning_rate": 3.794189242333106e-07, + "logits/chosen": -0.4949715733528137, + "logits/rejected": -0.4854864776134491, + "logps/chosen": -296.3348388671875, + "logps/ref_chosen": -69.53347778320312, + "logps/ref_rejected": -109.92864990234375, + "logps/rejected": -465.1314392089844, + "loss": 1.1308, + "margin_dpo/margin_mean": 128.40142822265625, + "margin_dpo/margin_std": 200.96267700195312, + "step": 270 + }, + { + "KL/chosen_KL_mean": -224.1912078857422, + "KL/mean": -305.23382568359375, + "KL/rejected_KL_mean": -386.27642822265625, + "KL/std": 166.82310485839844, + "epoch": 0.39794419970631423, + "fcm_dpo/beta": 0.0028090826235711575, + "fcm_dpo/delta": -0.058409713208675385, + "fcm_dpo/margin": 162.08523559570312, + "fcm_dpo/q_t": 0.3959714472293854, + "grad_norm": 25.674280166625977, + "learning_rate": 3.7831923608280514e-07, + "logits/chosen": -0.4081183075904846, + "logits/rejected": -0.3879523277282715, + "logps/chosen": -280.9557800292969, + "logps/ref_chosen": -56.76456832885742, + "logps/ref_rejected": -92.51383972167969, + "logps/rejected": -478.790283203125, + "loss": 1.0488, + "margin_dpo/margin_mean": 162.08523559570312, + "margin_dpo/margin_std": 191.40362548828125, + "step": 271 + }, + { + "KL/chosen_KL_mean": -210.31185913085938, + "KL/mean": -310.5980224609375, + "KL/rejected_KL_mean": -410.8841552734375, + "KL/std": 184.2425537109375, + "epoch": 0.39941262848751835, + "fcm_dpo/beta": 0.002755315974354744, + "fcm_dpo/delta": -0.16125299036502838, + "fcm_dpo/margin": 200.572265625, + "fcm_dpo/q_t": 0.37152132391929626, + "grad_norm": 33.71802520751953, + "learning_rate": 3.772161666010912e-07, + "logits/chosen": -0.3228394389152527, + "logits/rejected": -0.3339686989784241, + "logps/chosen": -259.80902099609375, + "logps/ref_chosen": -49.497154235839844, + "logps/ref_rejected": -105.54279327392578, + "logps/rejected": -516.4269409179688, + "loss": 0.973, + "margin_dpo/margin_mean": 200.572265625, + "margin_dpo/margin_std": 189.1830596923828, + "step": 272 + }, + { + "KL/chosen_KL_mean": -236.64874267578125, + "KL/mean": -329.9783630371094, + "KL/rejected_KL_mean": -423.3079833984375, + "KL/std": 181.1544189453125, + "epoch": 0.4008810572687225, + "fcm_dpo/beta": 0.0026712960097938776, + "fcm_dpo/delta": -0.10431107878684998, + "fcm_dpo/margin": 186.65921020507812, + "fcm_dpo/q_t": 0.3848682641983032, + "grad_norm": 41.27581024169922, + "learning_rate": 3.761097448550755e-07, + "logits/chosen": -0.32734841108322144, + "logits/rejected": -0.3092419505119324, + "logps/chosen": -299.6241455078125, + "logps/ref_chosen": -62.97539520263672, + "logps/ref_rejected": -92.49858093261719, + "logps/rejected": -515.8065185546875, + "loss": 1.0169, + "margin_dpo/margin_mean": 186.65921020507812, + "margin_dpo/margin_std": 203.4211883544922, + "step": 273 + }, + { + "KL/chosen_KL_mean": -278.07952880859375, + "KL/mean": -350.180419921875, + "KL/rejected_KL_mean": -422.2813415527344, + "KL/std": 171.34066772460938, + "epoch": 0.4023494860499266, + "fcm_dpo/beta": 0.002680413890630007, + "fcm_dpo/delta": 0.013700582087039948, + "fcm_dpo/margin": 144.20184326171875, + "fcm_dpo/q_t": 0.41100966930389404, + "grad_norm": 41.83562469482422, + "learning_rate": 3.75e-07, + "logits/chosen": -0.30099087953567505, + "logits/rejected": -0.2856178879737854, + "logps/chosen": -333.74725341796875, + "logps/ref_chosen": -55.66770935058594, + "logps/ref_rejected": -77.33308410644531, + "logps/rejected": -499.61444091796875, + "loss": 1.1027, + "margin_dpo/margin_mean": 144.20184326171875, + "margin_dpo/margin_std": 196.0882568359375, + "step": 274 + }, + { + "KL/chosen_KL_mean": -212.87841796875, + "KL/mean": -288.3249206542969, + "KL/rejected_KL_mean": -363.7713928222656, + "KL/std": 169.57728576660156, + "epoch": 0.40381791483113066, + "fcm_dpo/beta": 0.0026775910519063473, + "fcm_dpo/delta": -0.004319606348872185, + "fcm_dpo/margin": 150.89297485351562, + "fcm_dpo/q_t": 0.4061974585056305, + "grad_norm": 29.38013458251953, + "learning_rate": 3.738869612786737e-07, + "logits/chosen": -0.35198503732681274, + "logits/rejected": -0.35458293557167053, + "logps/chosen": -261.47314453125, + "logps/ref_chosen": -48.594703674316406, + "logps/ref_rejected": -93.30369567871094, + "logps/rejected": -457.0751037597656, + "loss": 1.0836, + "margin_dpo/margin_mean": 150.89297485351562, + "margin_dpo/margin_std": 189.14598083496094, + "step": 275 + }, + { + "KL/chosen_KL_mean": -223.02987670898438, + "KL/mean": -300.77679443359375, + "KL/rejected_KL_mean": -378.52374267578125, + "KL/std": 173.62960815429688, + "epoch": 0.4052863436123348, + "fcm_dpo/beta": 0.002657739445567131, + "fcm_dpo/delta": -0.014008134603500366, + "fcm_dpo/margin": 155.49386596679688, + "fcm_dpo/q_t": 0.40572842955589294, + "grad_norm": 26.85495376586914, + "learning_rate": 3.7277065802070204e-07, + "logits/chosen": -0.33715903759002686, + "logits/rejected": -0.30880868434906006, + "logps/chosen": -279.6072998046875, + "logps/ref_chosen": -56.57740783691406, + "logps/ref_rejected": -70.36566925048828, + "logps/rejected": -448.889404296875, + "loss": 1.0856, + "margin_dpo/margin_mean": 155.49386596679688, + "margin_dpo/margin_std": 207.1399383544922, + "step": 276 + }, + { + "KL/chosen_KL_mean": -241.0596923828125, + "KL/mean": -318.1791687011719, + "KL/rejected_KL_mean": -395.29864501953125, + "KL/std": 173.15396118164062, + "epoch": 0.4067547723935389, + "fcm_dpo/beta": 0.0026624128222465515, + "fcm_dpo/delta": -0.011134679429233074, + "fcm_dpo/margin": 154.23895263671875, + "fcm_dpo/q_t": 0.405393123626709, + "grad_norm": 23.91503143310547, + "learning_rate": 3.71651119641714e-07, + "logits/chosen": -0.3526584506034851, + "logits/rejected": -0.3328750431537628, + "logps/chosen": -297.33123779296875, + "logps/ref_chosen": -56.27156066894531, + "logps/ref_rejected": -92.88127136230469, + "logps/rejected": -488.1799011230469, + "loss": 1.0874, + "margin_dpo/margin_mean": 154.2389678955078, + "margin_dpo/margin_std": 204.90927124023438, + "step": 277 + }, + { + "KL/chosen_KL_mean": -204.98043823242188, + "KL/mean": -295.61114501953125, + "KL/rejected_KL_mean": -386.24188232421875, + "KL/std": 184.41384887695312, + "epoch": 0.40822320117474303, + "fcm_dpo/beta": 0.002619755920022726, + "fcm_dpo/delta": -0.07888495177030563, + "fcm_dpo/margin": 181.26145935058594, + "fcm_dpo/q_t": 0.3913354277610779, + "grad_norm": 29.900175094604492, + "learning_rate": 3.705283756425872e-07, + "logits/chosen": -0.339875727891922, + "logits/rejected": -0.3422485291957855, + "logps/chosen": -257.92236328125, + "logps/ref_chosen": -52.94194030761719, + "logps/ref_rejected": -91.25357818603516, + "logps/rejected": -477.4954833984375, + "loss": 1.0328, + "margin_dpo/margin_mean": 181.261474609375, + "margin_dpo/margin_std": 205.30252075195312, + "step": 278 + }, + { + "KL/chosen_KL_mean": -229.3821563720703, + "KL/mean": -320.7900085449219, + "KL/rejected_KL_mean": -412.1978759765625, + "KL/std": 190.40365600585938, + "epoch": 0.40969162995594716, + "fcm_dpo/beta": 0.0025727972388267517, + "fcm_dpo/delta": -0.07477246224880219, + "fcm_dpo/margin": 182.81570434570312, + "fcm_dpo/q_t": 0.39369115233421326, + "grad_norm": 24.77928924560547, + "learning_rate": 3.6940245560867e-07, + "logits/chosen": -0.29181522130966187, + "logits/rejected": -0.2875681519508362, + "logps/chosen": -278.0234680175781, + "logps/ref_chosen": -48.641319274902344, + "logps/ref_rejected": -87.8514404296875, + "logps/rejected": -500.04931640625, + "loss": 1.0538, + "margin_dpo/margin_mean": 182.81570434570312, + "margin_dpo/margin_std": 233.40939331054688, + "step": 279 + }, + { + "KL/chosen_KL_mean": -224.71835327148438, + "KL/mean": -317.265869140625, + "KL/rejected_KL_mean": -409.8133544921875, + "KL/std": 175.26214599609375, + "epoch": 0.4111600587371512, + "fcm_dpo/beta": 0.0025460803881287575, + "fcm_dpo/delta": -0.07485491782426834, + "fcm_dpo/margin": 185.09500122070312, + "fcm_dpo/q_t": 0.389259934425354, + "grad_norm": 30.460899353027344, + "learning_rate": 3.6827338920900253e-07, + "logits/chosen": -0.3362073004245758, + "logits/rejected": -0.33699339628219604, + "logps/chosen": -283.5154724121094, + "logps/ref_chosen": -58.797122955322266, + "logps/ref_rejected": -98.61885070800781, + "logps/rejected": -508.43218994140625, + "loss": 1.0257, + "margin_dpo/margin_mean": 185.09500122070312, + "margin_dpo/margin_std": 191.9048309326172, + "step": 280 + }, + { + "KL/chosen_KL_mean": -208.28512573242188, + "KL/mean": -291.41558837890625, + "KL/rejected_KL_mean": -374.5460510253906, + "KL/std": 173.07376098632812, + "epoch": 0.41262848751835535, + "fcm_dpo/beta": 0.002538030967116356, + "fcm_dpo/delta": -0.023100202903151512, + "fcm_dpo/margin": 166.26089477539062, + "fcm_dpo/q_t": 0.3998814821243286, + "grad_norm": 23.17946434020996, + "learning_rate": 3.6714120619553435e-07, + "logits/chosen": -0.3537529706954956, + "logits/rejected": -0.3235951066017151, + "logps/chosen": -263.7736511230469, + "logps/ref_chosen": -55.488521575927734, + "logps/ref_rejected": -80.88258361816406, + "logps/rejected": -455.4286193847656, + "loss": 1.0649, + "margin_dpo/margin_mean": 166.26089477539062, + "margin_dpo/margin_std": 189.45980834960938, + "step": 281 + }, + { + "KL/chosen_KL_mean": -245.1143341064453, + "KL/mean": -309.87060546875, + "KL/rejected_KL_mean": -374.62689208984375, + "KL/std": 184.24575805664062, + "epoch": 0.41409691629955947, + "fcm_dpo/beta": 0.002562709851190448, + "fcm_dpo/delta": 0.06986706703901291, + "fcm_dpo/margin": 129.51254272460938, + "fcm_dpo/q_t": 0.4261664152145386, + "grad_norm": 21.843597412109375, + "learning_rate": 3.660059364023408e-07, + "logits/chosen": -0.4187248945236206, + "logits/rejected": -0.39314818382263184, + "logps/chosen": -318.1844787597656, + "logps/ref_chosen": -73.07014465332031, + "logps/ref_rejected": -95.35098266601562, + "logps/rejected": -469.97784423828125, + "loss": 1.1482, + "margin_dpo/margin_mean": 129.5125274658203, + "margin_dpo/margin_std": 212.96649169921875, + "step": 282 + }, + { + "KL/chosen_KL_mean": -256.6600646972656, + "KL/mean": -358.04266357421875, + "KL/rejected_KL_mean": -459.42523193359375, + "KL/std": 218.51571655273438, + "epoch": 0.4155653450807636, + "fcm_dpo/beta": 0.00252789119258523, + "fcm_dpo/delta": -0.11851293593645096, + "fcm_dpo/margin": 202.76519775390625, + "fcm_dpo/q_t": 0.3806382119655609, + "grad_norm": 25.943897247314453, + "learning_rate": 3.6486760974483685e-07, + "logits/chosen": -0.3803967833518982, + "logits/rejected": -0.38235464692115784, + "logps/chosen": -318.5585021972656, + "logps/ref_chosen": -61.89844512939453, + "logps/ref_rejected": -96.98655700683594, + "logps/rejected": -556.4117431640625, + "loss": 1.004, + "margin_dpo/margin_mean": 202.7651824951172, + "margin_dpo/margin_std": 206.27056884765625, + "step": 283 + }, + { + "KL/chosen_KL_mean": -249.64425659179688, + "KL/mean": -341.79144287109375, + "KL/rejected_KL_mean": -433.9386291503906, + "KL/std": 203.23036193847656, + "epoch": 0.4170337738619677, + "fcm_dpo/beta": 0.002476719208061695, + "fcm_dpo/delta": -0.05925939232110977, + "fcm_dpo/margin": 184.29434204101562, + "fcm_dpo/q_t": 0.3947269022464752, + "grad_norm": 28.472728729248047, + "learning_rate": 3.6372625621898863e-07, + "logits/chosen": -0.4060632884502411, + "logits/rejected": -0.392697274684906, + "logps/chosen": -308.0798034667969, + "logps/ref_chosen": -58.4355354309082, + "logps/ref_rejected": -93.46926879882812, + "logps/rejected": -527.4078979492188, + "loss": 1.0392, + "margin_dpo/margin_mean": 184.29434204101562, + "margin_dpo/margin_std": 204.0950927734375, + "step": 284 + }, + { + "KL/chosen_KL_mean": -298.7416076660156, + "KL/mean": -381.11907958984375, + "KL/rejected_KL_mean": -463.49652099609375, + "KL/std": 184.44662475585938, + "epoch": 0.4185022026431718, + "fcm_dpo/beta": 0.002482138341292739, + "fcm_dpo/delta": -0.00985686480998993, + "fcm_dpo/margin": 164.75491333007812, + "fcm_dpo/q_t": 0.4034787714481354, + "grad_norm": 28.78072738647461, + "learning_rate": 3.625819059005228e-07, + "logits/chosen": -0.3715532422065735, + "logits/rejected": -0.3519101142883301, + "logps/chosen": -364.97381591796875, + "logps/ref_chosen": -66.23219299316406, + "logps/ref_rejected": -99.1268310546875, + "logps/rejected": -562.6233520507812, + "loss": 1.0822, + "margin_dpo/margin_mean": 164.7549285888672, + "margin_dpo/margin_std": 204.08389282226562, + "step": 285 + }, + { + "KL/chosen_KL_mean": -304.86749267578125, + "KL/mean": -396.3608093261719, + "KL/rejected_KL_mean": -487.8541259765625, + "KL/std": 213.21237182617188, + "epoch": 0.4199706314243759, + "fcm_dpo/beta": 0.002452992368489504, + "fcm_dpo/delta": -0.051132772117853165, + "fcm_dpo/margin": 182.9866485595703, + "fcm_dpo/q_t": 0.3967989683151245, + "grad_norm": 26.472732543945312, + "learning_rate": 3.614345889441346e-07, + "logits/chosen": -0.3906969428062439, + "logits/rejected": -0.3723870813846588, + "logps/chosen": -377.8184814453125, + "logps/ref_chosen": -72.95100402832031, + "logps/ref_rejected": -88.58845520019531, + "logps/rejected": -576.4425659179688, + "loss": 1.0611, + "margin_dpo/margin_mean": 182.98663330078125, + "margin_dpo/margin_std": 229.21395874023438, + "step": 286 + }, + { + "KL/chosen_KL_mean": -285.84619140625, + "KL/mean": -360.84112548828125, + "KL/rejected_KL_mean": -435.83599853515625, + "KL/std": 188.15838623046875, + "epoch": 0.42143906020558003, + "fcm_dpo/beta": 0.002456413581967354, + "fcm_dpo/delta": 0.032731398940086365, + "fcm_dpo/margin": 149.98983764648438, + "fcm_dpo/q_t": 0.4151533842086792, + "grad_norm": 32.449649810791016, + "learning_rate": 3.6028433558269275e-07, + "logits/chosen": -0.3350308835506439, + "logits/rejected": -0.3064236044883728, + "logps/chosen": -347.3873291015625, + "logps/ref_chosen": -61.54115295410156, + "logps/ref_rejected": -77.69607543945312, + "logps/rejected": -513.5321044921875, + "loss": 1.1096, + "margin_dpo/margin_mean": 149.98983764648438, + "margin_dpo/margin_std": 203.369384765625, + "step": 287 + }, + { + "KL/chosen_KL_mean": -282.95465087890625, + "KL/mean": -376.14697265625, + "KL/rejected_KL_mean": -469.3393249511719, + "KL/std": 190.60353088378906, + "epoch": 0.42290748898678415, + "fcm_dpo/beta": 0.0024195481091737747, + "fcm_dpo/delta": -0.05505270138382912, + "fcm_dpo/margin": 186.38467407226562, + "fcm_dpo/q_t": 0.39451804757118225, + "grad_norm": 28.688644409179688, + "learning_rate": 3.5913117612644327e-07, + "logits/chosen": -0.3529576063156128, + "logits/rejected": -0.33676382899284363, + "logps/chosen": -339.6158752441406, + "logps/ref_chosen": -56.661224365234375, + "logps/ref_rejected": -87.33570098876953, + "logps/rejected": -556.675048828125, + "loss": 1.0452, + "margin_dpo/margin_mean": 186.38467407226562, + "margin_dpo/margin_std": 199.76907348632812, + "step": 288 + }, + { + "KL/chosen_KL_mean": -267.9907531738281, + "KL/mean": -372.37811279296875, + "KL/rejected_KL_mean": -476.76544189453125, + "KL/std": 207.67068481445312, + "epoch": 0.4243759177679883, + "fcm_dpo/beta": 0.002400734229013324, + "fcm_dpo/delta": -0.10641852021217346, + "fcm_dpo/margin": 208.77468872070312, + "fcm_dpo/q_t": 0.3854549527168274, + "grad_norm": 35.85638427734375, + "learning_rate": 3.5797514096221024e-07, + "logits/chosen": -0.30069178342819214, + "logits/rejected": -0.30261388421058655, + "logps/chosen": -313.2211608886719, + "logps/ref_chosen": -45.23039245605469, + "logps/ref_rejected": -87.64266967773438, + "logps/rejected": -564.4080810546875, + "loss": 1.0183, + "margin_dpo/margin_mean": 208.77468872070312, + "margin_dpo/margin_std": 230.7718505859375, + "step": 289 + }, + { + "KL/chosen_KL_mean": -281.2525634765625, + "KL/mean": -386.38336181640625, + "KL/rejected_KL_mean": -491.51422119140625, + "KL/std": 217.03399658203125, + "epoch": 0.42584434654919234, + "fcm_dpo/beta": 0.002342382911592722, + "fcm_dpo/delta": -0.09751632809638977, + "fcm_dpo/margin": 210.26165771484375, + "fcm_dpo/q_t": 0.38870781660079956, + "grad_norm": 25.733200073242188, + "learning_rate": 3.568162605525952e-07, + "logits/chosen": -0.3384855091571808, + "logits/rejected": -0.35991525650024414, + "logps/chosen": -336.72406005859375, + "logps/ref_chosen": -55.47149658203125, + "logps/ref_rejected": -116.70857238769531, + "logps/rejected": -608.2227783203125, + "loss": 1.0358, + "margin_dpo/margin_mean": 210.26165771484375, + "margin_dpo/margin_std": 258.7429504394531, + "step": 290 + }, + { + "KL/chosen_KL_mean": -224.06832885742188, + "KL/mean": -321.0989685058594, + "KL/rejected_KL_mean": -418.129638671875, + "KL/std": 182.753662109375, + "epoch": 0.42731277533039647, + "fcm_dpo/beta": 0.00232522701844573, + "fcm_dpo/delta": -0.053772568702697754, + "fcm_dpo/margin": 194.061279296875, + "fcm_dpo/q_t": 0.3946029245853424, + "grad_norm": 24.563919067382812, + "learning_rate": 3.5565456543517485e-07, + "logits/chosen": -0.34820133447647095, + "logits/rejected": -0.3331354260444641, + "logps/chosen": -287.3287048339844, + "logps/ref_chosen": -63.26036834716797, + "logps/ref_rejected": -89.29708862304688, + "logps/rejected": -507.42669677734375, + "loss": 1.0465, + "margin_dpo/margin_mean": 194.061279296875, + "margin_dpo/margin_std": 216.65316772460938, + "step": 291 + }, + { + "KL/chosen_KL_mean": -240.75106811523438, + "KL/mean": -340.0538330078125, + "KL/rejected_KL_mean": -439.35662841796875, + "KL/std": 209.68600463867188, + "epoch": 0.4287812041116006, + "fcm_dpo/beta": 0.0022970177233219147, + "fcm_dpo/delta": -0.058918386697769165, + "fcm_dpo/margin": 198.60556030273438, + "fcm_dpo/q_t": 0.3946416676044464, + "grad_norm": 23.112098693847656, + "learning_rate": 3.5449008622169583e-07, + "logits/chosen": -0.35769540071487427, + "logits/rejected": -0.3433658480644226, + "logps/chosen": -294.66961669921875, + "logps/ref_chosen": -53.91852951049805, + "logps/ref_rejected": -89.96138000488281, + "logps/rejected": -529.3179931640625, + "loss": 1.0545, + "margin_dpo/margin_mean": 198.60556030273438, + "margin_dpo/margin_std": 243.43380737304688, + "step": 292 + }, + { + "KL/chosen_KL_mean": -235.00213623046875, + "KL/mean": -311.6048583984375, + "KL/rejected_KL_mean": -388.20758056640625, + "KL/std": 205.09054565429688, + "epoch": 0.4302496328928047, + "fcm_dpo/beta": 0.002302415668964386, + "fcm_dpo/delta": 0.04887353628873825, + "fcm_dpo/margin": 153.2054443359375, + "fcm_dpo/q_t": 0.41990119218826294, + "grad_norm": 43.493621826171875, + "learning_rate": 3.5332285359726846e-07, + "logits/chosen": -0.3963527977466583, + "logits/rejected": -0.385434627532959, + "logps/chosen": -295.378173828125, + "logps/ref_chosen": -60.376033782958984, + "logps/ref_rejected": -77.85244750976562, + "logps/rejected": -466.05999755859375, + "loss": 1.1312, + "margin_dpo/margin_mean": 153.2054443359375, + "margin_dpo/margin_std": 233.70460510253906, + "step": 293 + }, + { + "KL/chosen_KL_mean": -212.15213012695312, + "KL/mean": -297.83599853515625, + "KL/rejected_KL_mean": -383.51983642578125, + "KL/std": 185.86453247070312, + "epoch": 0.43171806167400884, + "fcm_dpo/beta": 0.002301940694451332, + "fcm_dpo/delta": 0.005669664591550827, + "fcm_dpo/margin": 171.36770629882812, + "fcm_dpo/q_t": 0.4098511040210724, + "grad_norm": 27.905162811279297, + "learning_rate": 3.5215289831955786e-07, + "logits/chosen": -0.3524784743785858, + "logits/rejected": -0.3563184142112732, + "logps/chosen": -260.23968505859375, + "logps/ref_chosen": -48.0875358581543, + "logps/ref_rejected": -81.89698791503906, + "logps/rejected": -465.4168395996094, + "loss": 1.0942, + "margin_dpo/margin_mean": 171.36770629882812, + "margin_dpo/margin_std": 227.7198486328125, + "step": 294 + }, + { + "KL/chosen_KL_mean": -258.012451171875, + "KL/mean": -350.6398620605469, + "KL/rejected_KL_mean": -443.26727294921875, + "KL/std": 206.24761962890625, + "epoch": 0.4331864904552129, + "fcm_dpo/beta": 0.002300859661772847, + "fcm_dpo/delta": -0.02741077169775963, + "fcm_dpo/margin": 185.25485229492188, + "fcm_dpo/q_t": 0.4024040997028351, + "grad_norm": 32.099098205566406, + "learning_rate": 3.509802512179737e-07, + "logits/chosen": -0.3441588878631592, + "logits/rejected": -0.3444691002368927, + "logps/chosen": -307.9371032714844, + "logps/ref_chosen": -49.92467498779297, + "logps/ref_rejected": -87.45632934570312, + "logps/rejected": -530.7236328125, + "loss": 1.0767, + "margin_dpo/margin_mean": 185.25485229492188, + "margin_dpo/margin_std": 240.50933837890625, + "step": 295 + }, + { + "KL/chosen_KL_mean": -321.74249267578125, + "KL/mean": -389.23687744140625, + "KL/rejected_KL_mean": -456.7313232421875, + "KL/std": 204.92347717285156, + "epoch": 0.434654919236417, + "fcm_dpo/beta": 0.0022906125523149967, + "fcm_dpo/delta": -0.00922891590744257, + "fcm_dpo/margin": 134.98883056640625, + "fcm_dpo/q_t": 0.42863988876342773, + "grad_norm": 31.48113250732422, + "learning_rate": 3.498049431928577e-07, + "logits/chosen": -0.40720900893211365, + "logits/rejected": -0.3905686140060425, + "logps/chosen": -387.2337341308594, + "logps/ref_chosen": -65.49124145507812, + "logps/ref_rejected": -93.08908081054688, + "logps/rejected": -549.8204345703125, + "loss": 1.1871, + "margin_dpo/margin_mean": 134.98883056640625, + "margin_dpo/margin_std": 261.055908203125, + "step": 296 + }, + { + "KL/chosen_KL_mean": -287.096435546875, + "KL/mean": -368.1800537109375, + "KL/rejected_KL_mean": -449.26361083984375, + "KL/std": 196.81576538085938, + "epoch": 0.43612334801762115, + "fcm_dpo/beta": 0.002297044266015291, + "fcm_dpo/delta": 0.028556976467370987, + "fcm_dpo/margin": 162.16717529296875, + "fcm_dpo/q_t": 0.41283226013183594, + "grad_norm": 23.6795711517334, + "learning_rate": 3.486270052146694e-07, + "logits/chosen": -0.3867399990558624, + "logits/rejected": -0.387523889541626, + "logps/chosen": -343.57342529296875, + "logps/ref_chosen": -56.476951599121094, + "logps/ref_rejected": -95.1385498046875, + "logps/rejected": -544.4021606445312, + "loss": 1.0962, + "margin_dpo/margin_mean": 162.16717529296875, + "margin_dpo/margin_std": 195.96707153320312, + "step": 297 + }, + { + "KL/chosen_KL_mean": -323.70086669921875, + "KL/mean": -429.325439453125, + "KL/rejected_KL_mean": -534.9500122070312, + "KL/std": 264.07275390625, + "epoch": 0.43759177679882527, + "fcm_dpo/beta": 0.0022826807107776403, + "fcm_dpo/delta": -0.08633655309677124, + "fcm_dpo/margin": 211.24917602539062, + "fcm_dpo/q_t": 0.39509522914886475, + "grad_norm": 26.146221160888672, + "learning_rate": 3.474464683231698e-07, + "logits/chosen": -0.39172685146331787, + "logits/rejected": -0.41062480211257935, + "logps/chosen": -391.0260314941406, + "logps/ref_chosen": -67.32516479492188, + "logps/ref_rejected": -116.66217041015625, + "logps/rejected": -651.6121826171875, + "loss": 1.0678, + "margin_dpo/margin_mean": 211.24917602539062, + "margin_dpo/margin_std": 307.80206298828125, + "step": 298 + }, + { + "KL/chosen_KL_mean": -274.95782470703125, + "KL/mean": -365.26654052734375, + "KL/rejected_KL_mean": -455.57525634765625, + "KL/std": 200.08763122558594, + "epoch": 0.4390602055800294, + "fcm_dpo/beta": 0.002272904384881258, + "fcm_dpo/delta": -0.011529970914125443, + "fcm_dpo/margin": 180.617431640625, + "fcm_dpo/q_t": 0.40732306241989136, + "grad_norm": 33.49094772338867, + "learning_rate": 3.462633636266041e-07, + "logits/chosen": -0.36451274156570435, + "logits/rejected": -0.3671821653842926, + "logps/chosen": -323.919921875, + "logps/ref_chosen": -48.96209716796875, + "logps/ref_rejected": -84.32823944091797, + "logps/rejected": -539.9035034179688, + "loss": 1.095, + "margin_dpo/margin_mean": 180.617431640625, + "margin_dpo/margin_std": 251.55711364746094, + "step": 299 + }, + { + "KL/chosen_KL_mean": -354.8046875, + "KL/mean": -461.15472412109375, + "KL/rejected_KL_mean": -567.5047607421875, + "KL/std": 243.07180786132812, + "epoch": 0.44052863436123346, + "fcm_dpo/beta": 0.002238738350570202, + "fcm_dpo/delta": -0.07992631196975708, + "fcm_dpo/margin": 212.70004272460938, + "fcm_dpo/q_t": 0.3924391269683838, + "grad_norm": 41.03089904785156, + "learning_rate": 3.4507772230088147e-07, + "logits/chosen": -0.35996705293655396, + "logits/rejected": -0.36685582995414734, + "logps/chosen": -413.8783874511719, + "logps/ref_chosen": -59.07371139526367, + "logps/ref_rejected": -95.9664535522461, + "logps/rejected": -663.47119140625, + "loss": 1.0665, + "margin_dpo/margin_mean": 212.70004272460938, + "margin_dpo/margin_std": 292.86248779296875, + "step": 300 + }, + { + "KL/chosen_KL_mean": -305.3809814453125, + "KL/mean": -412.4756774902344, + "KL/rejected_KL_mean": -519.5704345703125, + "KL/std": 226.57168579101562, + "epoch": 0.4419970631424376, + "fcm_dpo/beta": 0.0021908977068960667, + "fcm_dpo/delta": -0.07324320077896118, + "fcm_dpo/margin": 214.18943786621094, + "fcm_dpo/q_t": 0.3950774669647217, + "grad_norm": 35.91939163208008, + "learning_rate": 3.4388957558875316e-07, + "logits/chosen": -0.3929128050804138, + "logits/rejected": -0.3964204788208008, + "logps/chosen": -362.63031005859375, + "logps/ref_chosen": -57.249366760253906, + "logps/ref_rejected": -92.35354614257812, + "logps/rejected": -611.9239501953125, + "loss": 1.051, + "margin_dpo/margin_mean": 214.189453125, + "margin_dpo/margin_std": 272.2989807128906, + "step": 301 + }, + { + "KL/chosen_KL_mean": -259.3084411621094, + "KL/mean": -347.705810546875, + "KL/rejected_KL_mean": -436.1031494140625, + "KL/std": 198.4259033203125, + "epoch": 0.4434654919236417, + "fcm_dpo/beta": 0.0021872916258871555, + "fcm_dpo/delta": 0.013604838401079178, + "fcm_dpo/margin": 176.79473876953125, + "fcm_dpo/q_t": 0.4114551544189453, + "grad_norm": 35.29695129394531, + "learning_rate": 3.426989547989902e-07, + "logits/chosen": -0.37686437368392944, + "logits/rejected": -0.3835713863372803, + "logps/chosen": -310.5064392089844, + "logps/ref_chosen": -51.197994232177734, + "logps/ref_rejected": -97.22636413574219, + "logps/rejected": -533.3295288085938, + "loss": 1.0984, + "margin_dpo/margin_mean": 176.79470825195312, + "margin_dpo/margin_std": 234.30789184570312, + "step": 302 + }, + { + "KL/chosen_KL_mean": -262.9270935058594, + "KL/mean": -342.5615234375, + "KL/rejected_KL_mean": -422.1959228515625, + "KL/std": 198.67970275878906, + "epoch": 0.44493392070484583, + "fcm_dpo/beta": 0.002209719270467758, + "fcm_dpo/delta": 0.04984103888273239, + "fcm_dpo/margin": 159.26882934570312, + "fcm_dpo/q_t": 0.4192725419998169, + "grad_norm": 39.67668914794922, + "learning_rate": 3.4150589130555773e-07, + "logits/chosen": -0.39630812406539917, + "logits/rejected": -0.38712817430496216, + "logps/chosen": -329.64105224609375, + "logps/ref_chosen": -66.71394348144531, + "logps/ref_rejected": -86.94542694091797, + "logps/rejected": -509.141357421875, + "loss": 1.1322, + "margin_dpo/margin_mean": 159.26882934570312, + "margin_dpo/margin_std": 240.13339233398438, + "step": 303 + }, + { + "KL/chosen_KL_mean": -224.00103759765625, + "KL/mean": -315.3577880859375, + "KL/rejected_KL_mean": -406.7145080566406, + "KL/std": 181.42640686035156, + "epoch": 0.44640234948604995, + "fcm_dpo/beta": 0.0022241733968257904, + "fcm_dpo/delta": -0.006868166849017143, + "fcm_dpo/margin": 182.71343994140625, + "fcm_dpo/q_t": 0.4025576710700989, + "grad_norm": 28.95069694519043, + "learning_rate": 3.403104165467883e-07, + "logits/chosen": -0.4077110290527344, + "logits/rejected": -0.40159422159194946, + "logps/chosen": -295.95172119140625, + "logps/ref_chosen": -71.95069885253906, + "logps/ref_rejected": -90.47203063964844, + "logps/rejected": -497.1865234375, + "loss": 1.0501, + "margin_dpo/margin_mean": 182.71343994140625, + "margin_dpo/margin_std": 153.1214599609375, + "step": 304 + }, + { + "KL/chosen_KL_mean": -241.20379638671875, + "KL/mean": -322.8453369140625, + "KL/rejected_KL_mean": -404.48687744140625, + "KL/std": 208.91802978515625, + "epoch": 0.447870778267254, + "fcm_dpo/beta": 0.002216045744717121, + "fcm_dpo/delta": 0.03913535922765732, + "fcm_dpo/margin": 163.2830810546875, + "fcm_dpo/q_t": 0.41752344369888306, + "grad_norm": 25.189720153808594, + "learning_rate": 3.391125620245535e-07, + "logits/chosen": -0.3988510072231293, + "logits/rejected": -0.382416695356369, + "logps/chosen": -307.9990234375, + "logps/ref_chosen": -66.79523468017578, + "logps/ref_rejected": -92.75459289550781, + "logps/rejected": -497.241455078125, + "loss": 1.1197, + "margin_dpo/margin_mean": 163.2830810546875, + "margin_dpo/margin_std": 229.060546875, + "step": 305 + }, + { + "KL/chosen_KL_mean": -243.65699768066406, + "KL/mean": -323.1556396484375, + "KL/rejected_KL_mean": -402.654296875, + "KL/std": 187.81524658203125, + "epoch": 0.44933920704845814, + "fcm_dpo/beta": 0.0022513873409479856, + "fcm_dpo/delta": 0.04347452521324158, + "fcm_dpo/margin": 158.99728393554688, + "fcm_dpo/q_t": 0.41661351919174194, + "grad_norm": 25.411108016967773, + "learning_rate": 3.3791235930343417e-07, + "logits/chosen": -0.3887644410133362, + "logits/rejected": -0.3672389090061188, + "logps/chosen": -313.34088134765625, + "logps/ref_chosen": -69.68389892578125, + "logps/ref_rejected": -85.15919494628906, + "logps/rejected": -487.8134765625, + "loss": 1.1059, + "margin_dpo/margin_mean": 158.99728393554688, + "margin_dpo/margin_std": 197.02398681640625, + "step": 306 + }, + { + "KL/chosen_KL_mean": -229.40023803710938, + "KL/mean": -310.8966979980469, + "KL/rejected_KL_mean": -392.39312744140625, + "KL/std": 175.57015991210938, + "epoch": 0.45080763582966227, + "fcm_dpo/beta": 0.002260227221995592, + "fcm_dpo/delta": 0.03280823305249214, + "fcm_dpo/margin": 162.99290466308594, + "fcm_dpo/q_t": 0.41427597403526306, + "grad_norm": 30.055885314941406, + "learning_rate": 3.367098400098881e-07, + "logits/chosen": -0.3810919523239136, + "logits/rejected": -0.36472952365875244, + "logps/chosen": -299.565673828125, + "logps/ref_chosen": -70.16542053222656, + "logps/ref_rejected": -86.97230529785156, + "logps/rejected": -479.3654479980469, + "loss": 1.1067, + "margin_dpo/margin_mean": 162.99288940429688, + "margin_dpo/margin_std": 214.08172607421875, + "step": 307 + }, + { + "KL/chosen_KL_mean": -229.43406677246094, + "KL/mean": -322.5722351074219, + "KL/rejected_KL_mean": -415.71038818359375, + "KL/std": 199.0686492919922, + "epoch": 0.4522760646108664, + "fcm_dpo/beta": 0.002260176232084632, + "fcm_dpo/delta": -0.021941393613815308, + "fcm_dpo/margin": 186.27633666992188, + "fcm_dpo/q_t": 0.40062737464904785, + "grad_norm": 34.93415451049805, + "learning_rate": 3.355050358314172e-07, + "logits/chosen": -0.3374328017234802, + "logits/rejected": -0.32412296533584595, + "logps/chosen": -284.6790771484375, + "logps/ref_chosen": -55.2449951171875, + "logps/ref_rejected": -79.37226104736328, + "logps/rejected": -495.0826416015625, + "loss": 1.0539, + "margin_dpo/margin_mean": 186.27633666992188, + "margin_dpo/margin_std": 191.69363403320312, + "step": 308 + }, + { + "KL/chosen_KL_mean": -233.2113037109375, + "KL/mean": -323.8895568847656, + "KL/rejected_KL_mean": -414.56781005859375, + "KL/std": 213.318603515625, + "epoch": 0.45374449339207046, + "fcm_dpo/beta": 0.0022653641644865274, + "fcm_dpo/delta": -0.011782001703977585, + "fcm_dpo/margin": 181.3565216064453, + "fcm_dpo/q_t": 0.4036235809326172, + "grad_norm": 54.994361877441406, + "learning_rate": 3.3429797851573183e-07, + "logits/chosen": -0.33466869592666626, + "logits/rejected": -0.32765206694602966, + "logps/chosen": -282.17041015625, + "logps/ref_chosen": -48.959083557128906, + "logps/ref_rejected": -82.34072875976562, + "logps/rejected": -496.9085693359375, + "loss": 1.0826, + "margin_dpo/margin_mean": 181.3565216064453, + "margin_dpo/margin_std": 227.5936279296875, + "step": 309 + }, + { + "KL/chosen_KL_mean": -264.2091369628906, + "KL/mean": -353.9774169921875, + "KL/rejected_KL_mean": -443.7456970214844, + "KL/std": 186.558349609375, + "epoch": 0.4552129221732746, + "fcm_dpo/beta": 0.002258453518152237, + "fcm_dpo/delta": -0.0059468671679496765, + "fcm_dpo/margin": 179.53656005859375, + "fcm_dpo/q_t": 0.40412867069244385, + "grad_norm": 21.421035766601562, + "learning_rate": 3.3308869986991487e-07, + "logits/chosen": -0.35127896070480347, + "logits/rejected": -0.33690258860588074, + "logps/chosen": -326.950927734375, + "logps/ref_chosen": -62.74177932739258, + "logps/ref_rejected": -79.9302978515625, + "logps/rejected": -523.676025390625, + "loss": 1.0633, + "margin_dpo/margin_mean": 179.5365447998047, + "margin_dpo/margin_std": 181.55760192871094, + "step": 310 + }, + { + "KL/chosen_KL_mean": -284.3205871582031, + "KL/mean": -382.16046142578125, + "KL/rejected_KL_mean": -480.00030517578125, + "KL/std": 239.59608459472656, + "epoch": 0.4566813509544787, + "fcm_dpo/beta": 0.0022359404247254133, + "fcm_dpo/delta": -0.039258040487766266, + "fcm_dpo/margin": 195.67971801757812, + "fcm_dpo/q_t": 0.40077459812164307, + "grad_norm": 27.19573402404785, + "learning_rate": 3.3187723175958346e-07, + "logits/chosen": -0.36365634202957153, + "logits/rejected": -0.34147655963897705, + "logps/chosen": -337.34857177734375, + "logps/ref_chosen": -53.02798080444336, + "logps/ref_rejected": -77.43820190429688, + "logps/rejected": -557.4384765625, + "loss": 1.0689, + "margin_dpo/margin_mean": 195.67970275878906, + "margin_dpo/margin_std": 252.54080200195312, + "step": 311 + }, + { + "KL/chosen_KL_mean": -272.4962158203125, + "KL/mean": -364.0135498046875, + "KL/rejected_KL_mean": -455.53094482421875, + "KL/std": 213.603759765625, + "epoch": 0.4581497797356828, + "fcm_dpo/beta": 0.002237812615931034, + "fcm_dpo/delta": -0.01030636951327324, + "fcm_dpo/margin": 183.03475952148438, + "fcm_dpo/q_t": 0.4078907370567322, + "grad_norm": 24.042999267578125, + "learning_rate": 3.306636061080487e-07, + "logits/chosen": -0.2863520681858063, + "logits/rejected": -0.27598023414611816, + "logps/chosen": -321.888427734375, + "logps/ref_chosen": -49.39221954345703, + "logps/ref_rejected": -75.79280853271484, + "logps/rejected": -531.32373046875, + "loss": 1.0899, + "margin_dpo/margin_mean": 183.03475952148438, + "margin_dpo/margin_std": 251.37765502929688, + "step": 312 + }, + { + "KL/chosen_KL_mean": -254.2932891845703, + "KL/mean": -349.1951599121094, + "KL/rejected_KL_mean": -444.0970458984375, + "KL/std": 223.68814086914062, + "epoch": 0.45961820851688695, + "fcm_dpo/beta": 0.0022342309821397066, + "fcm_dpo/delta": -0.025912020355463028, + "fcm_dpo/margin": 189.80377197265625, + "fcm_dpo/q_t": 0.4029422402381897, + "grad_norm": 25.419347763061523, + "learning_rate": 3.2944785489547537e-07, + "logits/chosen": -0.36437875032424927, + "logits/rejected": -0.36159804463386536, + "logps/chosen": -304.446044921875, + "logps/ref_chosen": -50.152740478515625, + "logps/ref_rejected": -86.40620422363281, + "logps/rejected": -530.5032958984375, + "loss": 1.0826, + "margin_dpo/margin_mean": 189.80377197265625, + "margin_dpo/margin_std": 249.74856567382812, + "step": 313 + }, + { + "KL/chosen_KL_mean": -285.2285461425781, + "KL/mean": -373.33819580078125, + "KL/rejected_KL_mean": -461.4478759765625, + "KL/std": 217.80224609375, + "epoch": 0.461086637298091, + "fcm_dpo/beta": 0.0022100405767560005, + "fcm_dpo/delta": 0.010647352784872055, + "fcm_dpo/margin": 176.2192840576172, + "fcm_dpo/q_t": 0.4131169021129608, + "grad_norm": 22.368419647216797, + "learning_rate": 3.2823001015803857e-07, + "logits/chosen": -0.40202397108078003, + "logits/rejected": -0.4044821262359619, + "logps/chosen": -342.46612548828125, + "logps/ref_chosen": -57.237579345703125, + "logps/ref_rejected": -97.5965347290039, + "logps/rejected": -559.04443359375, + "loss": 1.1211, + "margin_dpo/margin_mean": 176.21929931640625, + "margin_dpo/margin_std": 275.643798828125, + "step": 314 + }, + { + "KL/chosen_KL_mean": -263.5173645019531, + "KL/mean": -342.3055114746094, + "KL/rejected_KL_mean": -421.0936279296875, + "KL/std": 194.72991943359375, + "epoch": 0.46255506607929514, + "fcm_dpo/beta": 0.0022343965247273445, + "fcm_dpo/delta": 0.049690838903188705, + "fcm_dpo/margin": 157.5762939453125, + "fcm_dpo/q_t": 0.4190711975097656, + "grad_norm": 22.533966064453125, + "learning_rate": 3.270101039870797e-07, + "logits/chosen": -0.3270511329174042, + "logits/rejected": -0.3310539126396179, + "logps/chosen": -312.58697509765625, + "logps/ref_chosen": -49.06958770751953, + "logps/ref_rejected": -85.68087768554688, + "logps/rejected": -506.7745361328125, + "loss": 1.1212, + "margin_dpo/margin_mean": 157.57626342773438, + "margin_dpo/margin_std": 219.2792205810547, + "step": 315 + }, + { + "KL/chosen_KL_mean": -256.9066162109375, + "KL/mean": -373.9107360839844, + "KL/rejected_KL_mean": -490.9148864746094, + "KL/std": 229.14527893066406, + "epoch": 0.46402349486049926, + "fcm_dpo/beta": 0.0021924672182649374, + "fcm_dpo/delta": -0.12023768573999405, + "fcm_dpo/margin": 234.0082244873047, + "fcm_dpo/q_t": 0.3829796314239502, + "grad_norm": 27.900861740112305, + "learning_rate": 3.2578816852826086e-07, + "logits/chosen": -0.3719561696052551, + "logits/rejected": -0.3757820725440979, + "logps/chosen": -311.1673583984375, + "logps/ref_chosen": -54.26074981689453, + "logps/ref_rejected": -101.2814712524414, + "logps/rejected": -592.1963500976562, + "loss": 1.0059, + "margin_dpo/margin_mean": 234.00823974609375, + "margin_dpo/margin_std": 248.06161499023438, + "step": 316 + }, + { + "KL/chosen_KL_mean": -261.26275634765625, + "KL/mean": -384.9669189453125, + "KL/rejected_KL_mean": -508.6710205078125, + "KL/std": 205.64381408691406, + "epoch": 0.4654919236417034, + "fcm_dpo/beta": 0.0021530133672058582, + "fcm_dpo/delta": -0.13990481197834015, + "fcm_dpo/margin": 247.40829467773438, + "fcm_dpo/q_t": 0.3760995864868164, + "grad_norm": 25.268577575683594, + "learning_rate": 3.2456423598071783e-07, + "logits/chosen": -0.3866614103317261, + "logits/rejected": -0.3781118392944336, + "logps/chosen": -317.3569641113281, + "logps/ref_chosen": -56.094207763671875, + "logps/ref_rejected": -100.69905090332031, + "logps/rejected": -609.3701171875, + "loss": 0.9834, + "margin_dpo/margin_mean": 247.40829467773438, + "margin_dpo/margin_std": 231.6420135498047, + "step": 317 + }, + { + "KL/chosen_KL_mean": -285.3980407714844, + "KL/mean": -377.98626708984375, + "KL/rejected_KL_mean": -470.574462890625, + "KL/std": 215.07859802246094, + "epoch": 0.4669603524229075, + "fcm_dpo/beta": 0.002135781804099679, + "fcm_dpo/delta": 0.00455857440829277, + "fcm_dpo/margin": 185.17642211914062, + "fcm_dpo/q_t": 0.4088175892829895, + "grad_norm": 25.41587257385254, + "learning_rate": 3.233383385962115e-07, + "logits/chosen": -0.43662551045417786, + "logits/rejected": -0.40799379348754883, + "logps/chosen": -350.0437316894531, + "logps/ref_chosen": -64.64569854736328, + "logps/ref_rejected": -82.76425170898438, + "logps/rejected": -553.3387451171875, + "loss": 1.0885, + "margin_dpo/margin_mean": 185.17642211914062, + "margin_dpo/margin_std": 236.15972900390625, + "step": 318 + }, + { + "KL/chosen_KL_mean": -250.97140502929688, + "KL/mean": -361.4615478515625, + "KL/rejected_KL_mean": -471.95166015625, + "KL/std": 230.63836669921875, + "epoch": 0.4684287812041116, + "fcm_dpo/beta": 0.002109553199261427, + "fcm_dpo/delta": -0.06939505785703659, + "fcm_dpo/margin": 220.9802703857422, + "fcm_dpo/q_t": 0.3919963836669922, + "grad_norm": 22.969181060791016, + "learning_rate": 3.2211050867827805e-07, + "logits/chosen": -0.36899369955062866, + "logits/rejected": -0.38391441106796265, + "logps/chosen": -300.35516357421875, + "logps/ref_chosen": -49.383758544921875, + "logps/ref_rejected": -113.90650939941406, + "logps/rejected": -585.858154296875, + "loss": 1.0362, + "margin_dpo/margin_mean": 220.98025512695312, + "margin_dpo/margin_std": 247.80764770507812, + "step": 319 + }, + { + "KL/chosen_KL_mean": -261.7906494140625, + "KL/mean": -381.9345703125, + "KL/rejected_KL_mean": -502.07843017578125, + "KL/std": 240.1503448486328, + "epoch": 0.4698972099853157, + "fcm_dpo/beta": 0.002064064610749483, + "fcm_dpo/delta": -0.10146654397249222, + "fcm_dpo/margin": 240.28778076171875, + "fcm_dpo/q_t": 0.3857959806919098, + "grad_norm": 25.455394744873047, + "learning_rate": 3.208807785813777e-07, + "logits/chosen": -0.3855065703392029, + "logits/rejected": -0.39106667041778564, + "logps/chosen": -321.2955322265625, + "logps/ref_chosen": -59.50489044189453, + "logps/ref_rejected": -97.66717529296875, + "logps/rejected": -599.74560546875, + "loss": 1.0151, + "margin_dpo/margin_mean": 240.2877960205078, + "margin_dpo/margin_std": 257.2183837890625, + "step": 320 + }, + { + "KL/chosen_KL_mean": -324.5029296875, + "KL/mean": -426.83648681640625, + "KL/rejected_KL_mean": -529.1701049804688, + "KL/std": 247.80294799804688, + "epoch": 0.4713656387665198, + "fcm_dpo/beta": 0.002041730796918273, + "fcm_dpo/delta": -0.019452113658189774, + "fcm_dpo/margin": 204.66717529296875, + "fcm_dpo/q_t": 0.4043254256248474, + "grad_norm": 22.51926612854004, + "learning_rate": 3.1964918071004217e-07, + "logits/chosen": -0.36786073446273804, + "logits/rejected": -0.35931724309921265, + "logps/chosen": -386.05157470703125, + "logps/ref_chosen": -61.548683166503906, + "logps/ref_rejected": -91.64103698730469, + "logps/rejected": -620.8111572265625, + "loss": 1.084, + "margin_dpo/margin_mean": 204.66717529296875, + "margin_dpo/margin_std": 266.84027099609375, + "step": 321 + }, + { + "KL/chosen_KL_mean": -277.02001953125, + "KL/mean": -390.20526123046875, + "KL/rejected_KL_mean": -503.3905029296875, + "KL/std": 221.27413940429688, + "epoch": 0.47283406754772395, + "fcm_dpo/beta": 0.002025635913014412, + "fcm_dpo/delta": -0.0618242546916008, + "fcm_dpo/margin": 226.3704833984375, + "fcm_dpo/q_t": 0.39296412467956543, + "grad_norm": 22.475814819335938, + "learning_rate": 3.184157475180207e-07, + "logits/chosen": -0.3741741180419922, + "logits/rejected": -0.37576234340667725, + "logps/chosen": -334.31005859375, + "logps/ref_chosen": -57.29003143310547, + "logps/ref_rejected": -95.74992370605469, + "logps/rejected": -599.1404418945312, + "loss": 1.0349, + "margin_dpo/margin_mean": 226.3704833984375, + "margin_dpo/margin_std": 237.702392578125, + "step": 322 + }, + { + "KL/chosen_KL_mean": -298.865234375, + "KL/mean": -393.89263916015625, + "KL/rejected_KL_mean": -488.9200439453125, + "KL/std": 218.2051239013672, + "epoch": 0.47430249632892807, + "fcm_dpo/beta": 0.002028942573815584, + "fcm_dpo/delta": 0.014963037334382534, + "fcm_dpo/margin": 190.0548095703125, + "fcm_dpo/q_t": 0.4098392724990845, + "grad_norm": 34.564815521240234, + "learning_rate": 3.171805115074251e-07, + "logits/chosen": -0.3972129225730896, + "logits/rejected": -0.39619508385658264, + "logps/chosen": -350.09918212890625, + "logps/ref_chosen": -51.23395919799805, + "logps/ref_rejected": -75.06192016601562, + "logps/rejected": -563.98193359375, + "loss": 1.0968, + "margin_dpo/margin_mean": 190.0548095703125, + "margin_dpo/margin_std": 241.6279296875, + "step": 323 + }, + { + "KL/chosen_KL_mean": -339.42047119140625, + "KL/mean": -431.14385986328125, + "KL/rejected_KL_mean": -522.8672485351562, + "KL/std": 229.73959350585938, + "epoch": 0.47577092511013214, + "fcm_dpo/beta": 0.002055136486887932, + "fcm_dpo/delta": 0.022490426898002625, + "fcm_dpo/margin": 183.44676208496094, + "fcm_dpo/q_t": 0.41455233097076416, + "grad_norm": 38.610740661621094, + "learning_rate": 3.1594350522787295e-07, + "logits/chosen": -0.37280696630477905, + "logits/rejected": -0.35853368043899536, + "logps/chosen": -404.5556640625, + "logps/ref_chosen": -65.13516998291016, + "logps/ref_rejected": -86.47750854492188, + "logps/rejected": -609.3447265625, + "loss": 1.1193, + "margin_dpo/margin_mean": 183.44676208496094, + "margin_dpo/margin_std": 259.76324462890625, + "step": 324 + }, + { + "KL/chosen_KL_mean": -265.3223876953125, + "KL/mean": -344.13836669921875, + "KL/rejected_KL_mean": -422.95440673828125, + "KL/std": 204.55453491210938, + "epoch": 0.47723935389133626, + "fcm_dpo/beta": 0.0020671868696808815, + "fcm_dpo/delta": 0.07653862237930298, + "fcm_dpo/margin": 157.63201904296875, + "fcm_dpo/q_t": 0.42351895570755005, + "grad_norm": 25.032848358154297, + "learning_rate": 3.147047612756302e-07, + "logits/chosen": -0.4788200259208679, + "logits/rejected": -0.4589323401451111, + "logps/chosen": -321.5379638671875, + "logps/ref_chosen": -56.215599060058594, + "logps/ref_rejected": -70.08592987060547, + "logps/rejected": -493.0403137207031, + "loss": 1.1329, + "margin_dpo/margin_mean": 157.63201904296875, + "margin_dpo/margin_std": 206.6846466064453, + "step": 325 + }, + { + "KL/chosen_KL_mean": -288.2630615234375, + "KL/mean": -365.63433837890625, + "KL/rejected_KL_mean": -443.005615234375, + "KL/std": 189.14825439453125, + "epoch": 0.4787077826725404, + "fcm_dpo/beta": 0.0020953970961272717, + "fcm_dpo/delta": 0.07834838330745697, + "fcm_dpo/margin": 154.7425537109375, + "fcm_dpo/q_t": 0.42310160398483276, + "grad_norm": 34.663387298583984, + "learning_rate": 3.134643122927519e-07, + "logits/chosen": -0.4711052179336548, + "logits/rejected": -0.4501519799232483, + "logps/chosen": -360.988037109375, + "logps/ref_chosen": -72.72496032714844, + "logps/ref_rejected": -79.8467788696289, + "logps/rejected": -522.8524169921875, + "loss": 1.1242, + "margin_dpo/margin_mean": 154.7425537109375, + "margin_dpo/margin_std": 181.62420654296875, + "step": 326 + }, + { + "KL/chosen_KL_mean": -249.2471923828125, + "KL/mean": -359.001953125, + "KL/rejected_KL_mean": -468.7567138671875, + "KL/std": 200.14559936523438, + "epoch": 0.4801762114537445, + "fcm_dpo/beta": 0.00208103284239769, + "fcm_dpo/delta": -0.05987313389778137, + "fcm_dpo/margin": 219.50950622558594, + "fcm_dpo/q_t": 0.3920641541481018, + "grad_norm": 37.02153396606445, + "learning_rate": 3.1222219096622264e-07, + "logits/chosen": -0.46292924880981445, + "logits/rejected": -0.45082515478134155, + "logps/chosen": -318.3816223144531, + "logps/ref_chosen": -69.13441467285156, + "logps/ref_rejected": -111.93377685546875, + "logps/rejected": -580.6904907226562, + "loss": 1.0304, + "margin_dpo/margin_mean": 219.50949096679688, + "margin_dpo/margin_std": 218.749755859375, + "step": 327 + }, + { + "KL/chosen_KL_mean": -263.05267333984375, + "KL/mean": -361.53070068359375, + "KL/rejected_KL_mean": -460.0086669921875, + "KL/std": 220.0689697265625, + "epoch": 0.48164464023494863, + "fcm_dpo/beta": 0.0020799068734049797, + "fcm_dpo/delta": -0.010076452046632767, + "fcm_dpo/margin": 196.9560089111328, + "fcm_dpo/q_t": 0.4056174159049988, + "grad_norm": 29.511402130126953, + "learning_rate": 3.1097843002709427e-07, + "logits/chosen": -0.4060869514942169, + "logits/rejected": -0.4065949320793152, + "logps/chosen": -322.7398681640625, + "logps/ref_chosen": -59.68719482421875, + "logps/ref_rejected": -90.85499572753906, + "logps/rejected": -550.8636474609375, + "loss": 1.0771, + "margin_dpo/margin_mean": 196.95599365234375, + "margin_dpo/margin_std": 243.63185119628906, + "step": 328 + }, + { + "KL/chosen_KL_mean": -295.07080078125, + "KL/mean": -402.98480224609375, + "KL/rejected_KL_mean": -510.89874267578125, + "KL/std": 238.93309020996094, + "epoch": 0.4831130690161527, + "fcm_dpo/beta": 0.002052995143458247, + "fcm_dpo/delta": -0.04573259502649307, + "fcm_dpo/margin": 215.82797241210938, + "fcm_dpo/q_t": 0.3972422778606415, + "grad_norm": 23.92837142944336, + "learning_rate": 3.0973306224962437e-07, + "logits/chosen": -0.4066659212112427, + "logits/rejected": -0.3950307369232178, + "logps/chosen": -360.3170166015625, + "logps/ref_chosen": -65.2461929321289, + "logps/ref_rejected": -100.69770812988281, + "logps/rejected": -611.596435546875, + "loss": 1.0616, + "margin_dpo/margin_mean": 215.82797241210938, + "margin_dpo/margin_std": 258.53314208984375, + "step": 329 + }, + { + "KL/chosen_KL_mean": -256.5444641113281, + "KL/mean": -369.53118896484375, + "KL/rejected_KL_mean": -482.51788330078125, + "KL/std": 233.46661376953125, + "epoch": 0.4845814977973568, + "fcm_dpo/beta": 0.002038386417552829, + "fcm_dpo/delta": -0.06354449689388275, + "fcm_dpo/margin": 225.9734344482422, + "fcm_dpo/q_t": 0.3932916224002838, + "grad_norm": 26.08322525024414, + "learning_rate": 3.084861204504122e-07, + "logits/chosen": -0.40733757615089417, + "logits/rejected": -0.40871596336364746, + "logps/chosen": -303.5428161621094, + "logps/ref_chosen": -46.998348236083984, + "logps/ref_rejected": -86.87684631347656, + "logps/rejected": -569.394775390625, + "loss": 1.04, + "margin_dpo/margin_mean": 225.9734344482422, + "margin_dpo/margin_std": 254.14117431640625, + "step": 330 + }, + { + "KL/chosen_KL_mean": -283.42938232421875, + "KL/mean": -396.7335205078125, + "KL/rejected_KL_mean": -510.03765869140625, + "KL/std": 195.69345092773438, + "epoch": 0.48604992657856094, + "fcm_dpo/beta": 0.0020131845958530903, + "fcm_dpo/delta": -0.058914512395858765, + "fcm_dpo/margin": 226.6082763671875, + "fcm_dpo/q_t": 0.3916124403476715, + "grad_norm": 24.841083526611328, + "learning_rate": 3.072376374875335e-07, + "logits/chosen": -0.4177253246307373, + "logits/rejected": -0.41352635622024536, + "logps/chosen": -333.95367431640625, + "logps/ref_chosen": -50.52424621582031, + "logps/ref_rejected": -89.01544189453125, + "logps/rejected": -599.0531005859375, + "loss": 1.015, + "margin_dpo/margin_mean": 226.6082763671875, + "margin_dpo/margin_std": 190.06936645507812, + "step": 331 + }, + { + "KL/chosen_KL_mean": -292.7176513671875, + "KL/mean": -374.81634521484375, + "KL/rejected_KL_mean": -456.9150390625, + "KL/std": 211.74179077148438, + "epoch": 0.48751835535976507, + "fcm_dpo/beta": 0.002031027339398861, + "fcm_dpo/delta": 0.06868893653154373, + "fcm_dpo/margin": 164.1973876953125, + "fcm_dpo/q_t": 0.423465371131897, + "grad_norm": 21.33700180053711, + "learning_rate": 3.059876462596758e-07, + "logits/chosen": -0.4500772953033447, + "logits/rejected": -0.4330589771270752, + "logps/chosen": -341.8979187011719, + "logps/ref_chosen": -49.18028259277344, + "logps/ref_rejected": -76.48515319824219, + "logps/rejected": -533.4002075195312, + "loss": 1.1317, + "margin_dpo/margin_mean": 164.1973876953125, + "margin_dpo/margin_std": 231.62454223632812, + "step": 332 + }, + { + "KL/chosen_KL_mean": -320.2816162109375, + "KL/mean": -428.072998046875, + "KL/rejected_KL_mean": -535.8644409179688, + "KL/std": 247.35726928710938, + "epoch": 0.4889867841409692, + "fcm_dpo/beta": 0.0020122663117945194, + "fcm_dpo/delta": -0.03633493557572365, + "fcm_dpo/margin": 215.58282470703125, + "fcm_dpo/q_t": 0.40161222219467163, + "grad_norm": 20.72551918029785, + "learning_rate": 3.0473617970527015e-07, + "logits/chosen": -0.4343733787536621, + "logits/rejected": -0.427177369594574, + "logps/chosen": -384.037353515625, + "logps/ref_chosen": -63.75574493408203, + "logps/ref_rejected": -95.04411315917969, + "logps/rejected": -630.9085693359375, + "loss": 1.0836, + "margin_dpo/margin_mean": 215.58282470703125, + "margin_dpo/margin_std": 294.230712890625, + "step": 333 + }, + { + "KL/chosen_KL_mean": -313.5834045410156, + "KL/mean": -410.6392822265625, + "KL/rejected_KL_mean": -507.6951904296875, + "KL/std": 270.35736083984375, + "epoch": 0.49045521292217326, + "fcm_dpo/beta": 0.002020814223214984, + "fcm_dpo/delta": 0.008031206205487251, + "fcm_dpo/margin": 194.11180114746094, + "fcm_dpo/q_t": 0.411624014377594, + "grad_norm": 25.060213088989258, + "learning_rate": 3.034832708016243e-07, + "logits/chosen": -0.4468313455581665, + "logits/rejected": -0.44486457109451294, + "logps/chosen": -380.56317138671875, + "logps/ref_chosen": -66.97975158691406, + "logps/ref_rejected": -95.31692504882812, + "logps/rejected": -603.0120849609375, + "loss": 1.121, + "margin_dpo/margin_mean": 194.11180114746094, + "margin_dpo/margin_std": 303.725341796875, + "step": 334 + }, + { + "KL/chosen_KL_mean": -346.2706604003906, + "KL/mean": -420.2770080566406, + "KL/rejected_KL_mean": -494.2833557128906, + "KL/std": 254.03070068359375, + "epoch": 0.4919236417033774, + "fcm_dpo/beta": 0.0020229285582900047, + "fcm_dpo/delta": -0.0058713615871965885, + "fcm_dpo/margin": 148.0127410888672, + "fcm_dpo/q_t": 0.4324929714202881, + "grad_norm": 31.06818962097168, + "learning_rate": 3.022289525640531e-07, + "logits/chosen": -0.4586733281612396, + "logits/rejected": -0.4365878701210022, + "logps/chosen": -408.8131408691406, + "logps/ref_chosen": -62.54248046875, + "logps/ref_rejected": -87.61770629882812, + "logps/rejected": -581.9010620117188, + "loss": 1.1873, + "margin_dpo/margin_mean": 148.0127410888672, + "margin_dpo/margin_std": 279.176025390625, + "step": 335 + }, + { + "KL/chosen_KL_mean": -344.3829345703125, + "KL/mean": -469.1910095214844, + "KL/rejected_KL_mean": -593.9991455078125, + "KL/std": 296.28973388671875, + "epoch": 0.4933920704845815, + "fcm_dpo/beta": 0.001999348634853959, + "fcm_dpo/delta": -0.10420601069927216, + "fcm_dpo/margin": 249.61618041992188, + "fcm_dpo/q_t": 0.38974490761756897, + "grad_norm": 26.365034103393555, + "learning_rate": 3.009732580450086e-07, + "logits/chosen": -0.43404412269592285, + "logits/rejected": -0.43407052755355835, + "logps/chosen": -398.9140625, + "logps/ref_chosen": -54.53115463256836, + "logps/ref_rejected": -104.40424346923828, + "logps/rejected": -698.4033813476562, + "loss": 1.0618, + "margin_dpo/margin_mean": 249.61618041992188, + "margin_dpo/margin_std": 351.21514892578125, + "step": 336 + }, + { + "KL/chosen_KL_mean": -312.9530029296875, + "KL/mean": -438.01409912109375, + "KL/rejected_KL_mean": -563.0751342773438, + "KL/std": 238.04376220703125, + "epoch": 0.4948604992657856, + "fcm_dpo/beta": 0.0019480783957988024, + "fcm_dpo/delta": -0.0921003520488739, + "fcm_dpo/margin": 250.12213134765625, + "fcm_dpo/q_t": 0.38848379254341125, + "grad_norm": 26.773256301879883, + "learning_rate": 2.9971622033320914e-07, + "logits/chosen": -0.48545369505882263, + "logits/rejected": -0.47427335381507874, + "logps/chosen": -378.08172607421875, + "logps/ref_chosen": -65.12869262695312, + "logps/ref_rejected": -101.72701263427734, + "logps/rejected": -664.8021240234375, + "loss": 1.026, + "margin_dpo/margin_mean": 250.12213134765625, + "margin_dpo/margin_std": 282.658935546875, + "step": 337 + }, + { + "KL/chosen_KL_mean": -279.4560546875, + "KL/mean": -396.4298400878906, + "KL/rejected_KL_mean": -513.4036254882812, + "KL/std": 230.5653533935547, + "epoch": 0.49632892804698975, + "fcm_dpo/beta": 0.001927088014781475, + "fcm_dpo/delta": -0.05328977108001709, + "fcm_dpo/margin": 233.9475555419922, + "fcm_dpo/q_t": 0.39445608854293823, + "grad_norm": 26.295778274536133, + "learning_rate": 2.984578725527675e-07, + "logits/chosen": -0.44427040219306946, + "logits/rejected": -0.438721239566803, + "logps/chosen": -337.8787536621094, + "logps/ref_chosen": -58.422706604003906, + "logps/ref_rejected": -89.06854248046875, + "logps/rejected": -602.47216796875, + "loss": 1.0356, + "margin_dpo/margin_mean": 233.94757080078125, + "margin_dpo/margin_std": 240.76602172851562, + "step": 338 + }, + { + "KL/chosen_KL_mean": -305.31781005859375, + "KL/mean": -421.2132568359375, + "KL/rejected_KL_mean": -537.1087646484375, + "KL/std": 242.9071044921875, + "epoch": 0.4977973568281938, + "fcm_dpo/beta": 0.0019262076821178198, + "fcm_dpo/delta": -0.04981581121683121, + "fcm_dpo/margin": 231.79090881347656, + "fcm_dpo/q_t": 0.3960764408111572, + "grad_norm": 24.693683624267578, + "learning_rate": 2.9719824786231796e-07, + "logits/chosen": -0.4791075587272644, + "logits/rejected": -0.46518805623054504, + "logps/chosen": -365.3131408691406, + "logps/ref_chosen": -59.99531555175781, + "logps/ref_rejected": -103.9109115600586, + "logps/rejected": -641.0196533203125, + "loss": 1.0471, + "margin_dpo/margin_mean": 231.79090881347656, + "margin_dpo/margin_std": 240.60723876953125, + "step": 339 + }, + { + "KL/chosen_KL_mean": -322.9487609863281, + "KL/mean": -422.3816223144531, + "KL/rejected_KL_mean": -521.814453125, + "KL/std": 234.76840209960938, + "epoch": 0.49926578560939794, + "fcm_dpo/beta": 0.0019024586072191596, + "fcm_dpo/delta": 0.0222429558634758, + "fcm_dpo/margin": 198.86572265625, + "fcm_dpo/q_t": 0.41325610876083374, + "grad_norm": 33.532562255859375, + "learning_rate": 2.959373794541426e-07, + "logits/chosen": -0.4173485040664673, + "logits/rejected": -0.3965187966823578, + "logps/chosen": -375.77899169921875, + "logps/ref_chosen": -52.83022689819336, + "logps/ref_rejected": -73.10723114013672, + "logps/rejected": -594.9216918945312, + "loss": 1.1133, + "margin_dpo/margin_mean": 198.86572265625, + "margin_dpo/margin_std": 284.8116455078125, + "step": 340 + }, + { + "KL/chosen_KL_mean": -319.81683349609375, + "KL/mean": -433.06732177734375, + "KL/rejected_KL_mean": -546.3178100585938, + "KL/std": 254.18038940429688, + "epoch": 0.5007342143906021, + "fcm_dpo/beta": 0.0018979123560711741, + "fcm_dpo/delta": -0.03151214122772217, + "fcm_dpo/margin": 226.50100708007812, + "fcm_dpo/q_t": 0.40071308612823486, + "grad_norm": 26.986059188842773, + "learning_rate": 2.946753005532965e-07, + "logits/chosen": -0.43833357095718384, + "logits/rejected": -0.4382708966732025, + "logps/chosen": -367.7166442871094, + "logps/ref_chosen": -47.899803161621094, + "logps/ref_rejected": -101.80987548828125, + "logps/rejected": -648.127685546875, + "loss": 1.0612, + "margin_dpo/margin_mean": 226.50100708007812, + "margin_dpo/margin_std": 261.280517578125, + "step": 341 + }, + { + "KL/chosen_KL_mean": -317.66387939453125, + "KL/mean": -418.5531005859375, + "KL/rejected_KL_mean": -519.4423828125, + "KL/std": 237.76806640625, + "epoch": 0.5022026431718062, + "fcm_dpo/beta": 0.0019124182872474194, + "fcm_dpo/delta": 0.014296330511569977, + "fcm_dpo/margin": 201.77845764160156, + "fcm_dpo/q_t": 0.41131168603897095, + "grad_norm": 24.853057861328125, + "learning_rate": 2.934120444167326e-07, + "logits/chosen": -0.4720449149608612, + "logits/rejected": -0.4480515718460083, + "logps/chosen": -389.6605224609375, + "logps/ref_chosen": -71.99664306640625, + "logps/ref_rejected": -92.58959197998047, + "logps/rejected": -612.031982421875, + "loss": 1.1035, + "margin_dpo/margin_mean": 201.77847290039062, + "margin_dpo/margin_std": 277.33526611328125, + "step": 342 + }, + { + "KL/chosen_KL_mean": -305.1226806640625, + "KL/mean": -425.2977294921875, + "KL/rejected_KL_mean": -545.4727783203125, + "KL/std": 241.86111450195312, + "epoch": 0.5036710719530103, + "fcm_dpo/beta": 0.0018888043705374002, + "fcm_dpo/delta": -0.05659223720431328, + "fcm_dpo/margin": 240.35008239746094, + "fcm_dpo/q_t": 0.392503947019577, + "grad_norm": 22.807832717895508, + "learning_rate": 2.9214764433242476e-07, + "logits/chosen": -0.4819292724132538, + "logits/rejected": -0.4859675168991089, + "logps/chosen": -359.5283203125, + "logps/ref_chosen": -54.405616760253906, + "logps/ref_rejected": -111.04142761230469, + "logps/rejected": -656.51416015625, + "loss": 1.0242, + "margin_dpo/margin_mean": 240.35009765625, + "margin_dpo/margin_std": 217.2589874267578, + "step": 343 + }, + { + "KL/chosen_KL_mean": -299.1427001953125, + "KL/mean": -402.548583984375, + "KL/rejected_KL_mean": -505.9544677734375, + "KL/std": 257.1475524902344, + "epoch": 0.5051395007342144, + "fcm_dpo/beta": 0.0019029853865504265, + "fcm_dpo/delta": 0.0050534456968307495, + "fcm_dpo/margin": 206.81173706054688, + "fcm_dpo/q_t": 0.41077619791030884, + "grad_norm": 28.06170654296875, + "learning_rate": 2.9088213361849126e-07, + "logits/chosen": -0.45593854784965515, + "logits/rejected": -0.45924174785614014, + "logps/chosen": -353.10736083984375, + "logps/ref_chosen": -53.96466827392578, + "logps/ref_rejected": -90.62336730957031, + "logps/rejected": -596.5778198242188, + "loss": 1.0943, + "margin_dpo/margin_mean": 206.81173706054688, + "margin_dpo/margin_std": 261.9127197265625, + "step": 344 + }, + { + "KL/chosen_KL_mean": -354.2217712402344, + "KL/mean": -471.0653076171875, + "KL/rejected_KL_mean": -587.9088745117188, + "KL/std": 249.9000244140625, + "epoch": 0.5066079295154186, + "fcm_dpo/beta": 0.0018797038355842233, + "fcm_dpo/delta": -0.04108835384249687, + "fcm_dpo/margin": 233.6870880126953, + "fcm_dpo/q_t": 0.3980991244316101, + "grad_norm": 19.72064208984375, + "learning_rate": 2.896155456223163e-07, + "logits/chosen": -0.46400630474090576, + "logits/rejected": -0.4607963263988495, + "logps/chosen": -415.907470703125, + "logps/ref_chosen": -61.685699462890625, + "logps/ref_rejected": -99.49041748046875, + "logps/rejected": -687.3992919921875, + "loss": 1.0559, + "margin_dpo/margin_mean": 233.68710327148438, + "margin_dpo/margin_std": 272.0915222167969, + "step": 345 + }, + { + "KL/chosen_KL_mean": -343.4974365234375, + "KL/mean": -453.58154296875, + "KL/rejected_KL_mean": -563.6656494140625, + "KL/std": 240.47105407714844, + "epoch": 0.5080763582966226, + "fcm_dpo/beta": 0.0018681611400097609, + "fcm_dpo/delta": -0.011793499812483788, + "fcm_dpo/margin": 220.168212890625, + "fcm_dpo/q_t": 0.4032723307609558, + "grad_norm": 25.529489517211914, + "learning_rate": 2.883479137196714e-07, + "logits/chosen": -0.43244969844818115, + "logits/rejected": -0.42079615592956543, + "logps/chosen": -398.75372314453125, + "logps/ref_chosen": -55.256263732910156, + "logps/ref_rejected": -77.41532135009766, + "logps/rejected": -641.0809326171875, + "loss": 1.0708, + "margin_dpo/margin_mean": 220.168212890625, + "margin_dpo/margin_std": 256.2587890625, + "step": 346 + }, + { + "KL/chosen_KL_mean": -339.9598388671875, + "KL/mean": -450.0728454589844, + "KL/rejected_KL_mean": -560.1858520507812, + "KL/std": 254.47296142578125, + "epoch": 0.5095447870778267, + "fcm_dpo/beta": 0.00186370057053864, + "fcm_dpo/delta": -0.010882144793868065, + "fcm_dpo/margin": 220.22601318359375, + "fcm_dpo/q_t": 0.4054613709449768, + "grad_norm": 21.177968978881836, + "learning_rate": 2.8707927131383614e-07, + "logits/chosen": -0.4151489734649658, + "logits/rejected": -0.407335102558136, + "logps/chosen": -397.52606201171875, + "logps/ref_chosen": -57.56623840332031, + "logps/ref_rejected": -92.35509490966797, + "logps/rejected": -652.5409545898438, + "loss": 1.0836, + "margin_dpo/margin_mean": 220.22601318359375, + "margin_dpo/margin_std": 285.551513671875, + "step": 347 + }, + { + "KL/chosen_KL_mean": -299.27691650390625, + "KL/mean": -389.9990234375, + "KL/rejected_KL_mean": -480.7210998535156, + "KL/std": 215.0816650390625, + "epoch": 0.5110132158590308, + "fcm_dpo/beta": 0.0018694268073886633, + "fcm_dpo/delta": 0.06262210756540298, + "fcm_dpo/margin": 181.44418334960938, + "fcm_dpo/q_t": 0.42170295119285583, + "grad_norm": 25.129812240600586, + "learning_rate": 2.858096518347179e-07, + "logits/chosen": -0.49058231711387634, + "logits/rejected": -0.49180328845977783, + "logps/chosen": -355.5946044921875, + "logps/ref_chosen": -56.31770324707031, + "logps/ref_rejected": -89.13836669921875, + "logps/rejected": -569.8594970703125, + "loss": 1.131, + "margin_dpo/margin_mean": 181.44418334960938, + "margin_dpo/margin_std": 251.6490020751953, + "step": 348 + }, + { + "KL/chosen_KL_mean": -280.0154724121094, + "KL/mean": -386.7165832519531, + "KL/rejected_KL_mean": -493.41766357421875, + "KL/std": 241.07833862304688, + "epoch": 0.5124816446402349, + "fcm_dpo/beta": 0.0018920442089438438, + "fcm_dpo/delta": -0.0041931793093681335, + "fcm_dpo/margin": 213.40219116210938, + "fcm_dpo/q_t": 0.40846750140190125, + "grad_norm": 20.649566650390625, + "learning_rate": 2.845390887379706e-07, + "logits/chosen": -0.43403786420822144, + "logits/rejected": -0.43399712443351746, + "logps/chosen": -338.0409851074219, + "logps/ref_chosen": -58.025516510009766, + "logps/ref_rejected": -97.50515747070312, + "logps/rejected": -590.9228515625, + "loss": 1.0975, + "margin_dpo/margin_mean": 213.40220642089844, + "margin_dpo/margin_std": 298.5577392578125, + "step": 349 + }, + { + "KL/chosen_KL_mean": -297.84869384765625, + "KL/mean": -400.894775390625, + "KL/rejected_KL_mean": -503.94085693359375, + "KL/std": 231.921875, + "epoch": 0.5139500734214391, + "fcm_dpo/beta": 0.0018797710072249174, + "fcm_dpo/delta": 0.012750823050737381, + "fcm_dpo/margin": 206.09213256835938, + "fcm_dpo/q_t": 0.4099721312522888, + "grad_norm": 27.677011489868164, + "learning_rate": 2.8326761550411346e-07, + "logits/chosen": -0.4817023277282715, + "logits/rejected": -0.48322421312332153, + "logps/chosen": -362.17919921875, + "logps/ref_chosen": -64.33049011230469, + "logps/ref_rejected": -89.87164306640625, + "logps/rejected": -593.8125, + "loss": 1.1045, + "margin_dpo/margin_mean": 206.09213256835938, + "margin_dpo/margin_std": 283.2620544433594, + "step": 350 + }, + { + "KL/chosen_KL_mean": -282.1582336425781, + "KL/mean": -409.1272277832031, + "KL/rejected_KL_mean": -536.09619140625, + "KL/std": 275.96209716796875, + "epoch": 0.5154185022026432, + "fcm_dpo/beta": 0.0018625000957399607, + "fcm_dpo/delta": -0.07697418332099915, + "fcm_dpo/margin": 253.93798828125, + "fcm_dpo/q_t": 0.39376571774482727, + "grad_norm": 27.421785354614258, + "learning_rate": 2.819952656376487e-07, + "logits/chosen": -0.4568382501602173, + "logits/rejected": -0.4523654282093048, + "logps/chosen": -342.8303527832031, + "logps/ref_chosen": -60.6721305847168, + "logps/ref_rejected": -101.5654296875, + "logps/rejected": -637.66162109375, + "loss": 1.0495, + "margin_dpo/margin_mean": 253.93798828125, + "margin_dpo/margin_std": 320.8644104003906, + "step": 351 + }, + { + "KL/chosen_KL_mean": -334.1682434082031, + "KL/mean": -413.40325927734375, + "KL/rejected_KL_mean": -492.63824462890625, + "KL/std": 255.41502380371094, + "epoch": 0.5168869309838473, + "fcm_dpo/beta": 0.0018845018930733204, + "fcm_dpo/delta": 0.1046164482831955, + "fcm_dpo/margin": 158.46998596191406, + "fcm_dpo/q_t": 0.43189874291419983, + "grad_norm": 38.70182800292969, + "learning_rate": 2.8072207266617854e-07, + "logits/chosen": -0.4881801903247833, + "logits/rejected": -0.45606744289398193, + "logps/chosen": -405.1116943359375, + "logps/ref_chosen": -70.9434585571289, + "logps/ref_rejected": -76.6419677734375, + "logps/rejected": -569.2802124023438, + "loss": 1.1774, + "margin_dpo/margin_mean": 158.46998596191406, + "margin_dpo/margin_std": 281.17779541015625, + "step": 352 + }, + { + "KL/chosen_KL_mean": -315.9549865722656, + "KL/mean": -417.49176025390625, + "KL/rejected_KL_mean": -519.028564453125, + "KL/std": 250.93426513671875, + "epoch": 0.5183553597650514, + "fcm_dpo/beta": 0.0018944459734484553, + "fcm_dpo/delta": 0.015432950109243393, + "fcm_dpo/margin": 203.0735626220703, + "fcm_dpo/q_t": 0.41237473487854004, + "grad_norm": 27.271644592285156, + "learning_rate": 2.794480701395219e-07, + "logits/chosen": -0.5225635170936584, + "logits/rejected": -0.5105962157249451, + "logps/chosen": -374.350341796875, + "logps/ref_chosen": -58.39533996582031, + "logps/ref_rejected": -80.33553314208984, + "logps/rejected": -599.3640747070312, + "loss": 1.1081, + "margin_dpo/margin_mean": 203.0735626220703, + "margin_dpo/margin_std": 283.2236328125, + "step": 353 + }, + { + "KL/chosen_KL_mean": -264.493408203125, + "KL/mean": -378.5892333984375, + "KL/rejected_KL_mean": -492.68505859375, + "KL/std": 236.89236450195312, + "epoch": 0.5198237885462555, + "fcm_dpo/beta": 0.0018932155799120665, + "fcm_dpo/delta": -0.0335673987865448, + "fcm_dpo/margin": 228.191650390625, + "fcm_dpo/q_t": 0.3988182246685028, + "grad_norm": 26.834888458251953, + "learning_rate": 2.781732916288303e-07, + "logits/chosen": -0.48592621088027954, + "logits/rejected": -0.4727493226528168, + "logps/chosen": -324.29638671875, + "logps/ref_chosen": -59.80299377441406, + "logps/ref_rejected": -88.75750732421875, + "logps/rejected": -581.4425659179688, + "loss": 1.0474, + "margin_dpo/margin_mean": 228.191650390625, + "margin_dpo/margin_std": 236.67225646972656, + "step": 354 + }, + { + "KL/chosen_KL_mean": -266.8475341796875, + "KL/mean": -379.05157470703125, + "KL/rejected_KL_mean": -491.2556457519531, + "KL/std": 240.4083251953125, + "epoch": 0.5212922173274597, + "fcm_dpo/beta": 0.0018815842922776937, + "fcm_dpo/delta": -0.023417077958583832, + "fcm_dpo/margin": 224.40811157226562, + "fcm_dpo/q_t": 0.400523841381073, + "grad_norm": 34.09590530395508, + "learning_rate": 2.7689777072570284e-07, + "logits/chosen": -0.5412899255752563, + "logits/rejected": -0.5298917293548584, + "logps/chosen": -320.97601318359375, + "logps/ref_chosen": -54.12849807739258, + "logps/ref_rejected": -82.40606689453125, + "logps/rejected": -573.6617431640625, + "loss": 1.056, + "margin_dpo/margin_mean": 224.4081268310547, + "margin_dpo/margin_std": 237.8057403564453, + "step": 355 + }, + { + "KL/chosen_KL_mean": -328.6931457519531, + "KL/mean": -391.6343078613281, + "KL/rejected_KL_mean": -454.5754699707031, + "KL/std": 245.00680541992188, + "epoch": 0.5227606461086637, + "fcm_dpo/beta": 0.001889348030090332, + "fcm_dpo/delta": 0.027527010068297386, + "fcm_dpo/margin": 125.88235473632812, + "fcm_dpo/q_t": 0.4463768005371094, + "grad_norm": 29.567127227783203, + "learning_rate": 2.7562154104130176e-07, + "logits/chosen": -0.5134952068328857, + "logits/rejected": -0.49404820799827576, + "logps/chosen": -393.366943359375, + "logps/ref_chosen": -64.6738052368164, + "logps/ref_rejected": -75.89926147460938, + "logps/rejected": -530.4747314453125, + "loss": 1.241, + "margin_dpo/margin_mean": 125.88235473632812, + "margin_dpo/margin_std": 300.37933349609375, + "step": 356 + }, + { + "KL/chosen_KL_mean": -305.4680480957031, + "KL/mean": -405.6684875488281, + "KL/rejected_KL_mean": -505.8689270019531, + "KL/std": 243.4521484375, + "epoch": 0.5242290748898678, + "fcm_dpo/beta": 0.0018927913624793291, + "fcm_dpo/delta": 0.021241577342152596, + "fcm_dpo/margin": 200.40087890625, + "fcm_dpo/q_t": 0.4112818241119385, + "grad_norm": 25.547868728637695, + "learning_rate": 2.7434463620546594e-07, + "logits/chosen": -0.5083039999008179, + "logits/rejected": -0.49678516387939453, + "logps/chosen": -358.19384765625, + "logps/ref_chosen": -52.725799560546875, + "logps/ref_rejected": -86.84115600585938, + "logps/rejected": -592.7100830078125, + "loss": 1.0964, + "margin_dpo/margin_mean": 200.40087890625, + "margin_dpo/margin_std": 248.13658142089844, + "step": 357 + }, + { + "KL/chosen_KL_mean": -281.19622802734375, + "KL/mean": -375.6390380859375, + "KL/rejected_KL_mean": -470.081787109375, + "KL/std": 238.79005432128906, + "epoch": 0.5256975036710719, + "fcm_dpo/beta": 0.0019136819755658507, + "fcm_dpo/delta": 0.039984140545129776, + "fcm_dpo/margin": 188.8855743408203, + "fcm_dpo/q_t": 0.41719043254852295, + "grad_norm": 26.77370262145996, + "learning_rate": 2.730670898658255e-07, + "logits/chosen": -0.49271106719970703, + "logits/rejected": -0.4746229648590088, + "logps/chosen": -344.40167236328125, + "logps/ref_chosen": -63.20543670654297, + "logps/ref_rejected": -88.373291015625, + "logps/rejected": -558.455078125, + "loss": 1.1142, + "margin_dpo/margin_mean": 188.8855743408203, + "margin_dpo/margin_std": 258.85284423828125, + "step": 358 + }, + { + "KL/chosen_KL_mean": -321.8160400390625, + "KL/mean": -434.1688537597656, + "KL/rejected_KL_mean": -546.5216674804688, + "KL/std": 240.7170867919922, + "epoch": 0.527165932452276, + "fcm_dpo/beta": 0.0019082968356087804, + "fcm_dpo/delta": -0.030117180198431015, + "fcm_dpo/margin": 224.70556640625, + "fcm_dpo/q_t": 0.40221983194351196, + "grad_norm": 35.96456527709961, + "learning_rate": 2.717889356869146e-07, + "logits/chosen": -0.4789687991142273, + "logits/rejected": -0.4714996814727783, + "logps/chosen": -378.186279296875, + "logps/ref_chosen": -56.370216369628906, + "logps/ref_rejected": -82.17375183105469, + "logps/rejected": -628.6954345703125, + "loss": 1.0748, + "margin_dpo/margin_mean": 224.70556640625, + "margin_dpo/margin_std": 287.6982421875, + "step": 359 + }, + { + "KL/chosen_KL_mean": -325.312255859375, + "KL/mean": -405.36517333984375, + "KL/rejected_KL_mean": -485.41815185546875, + "KL/std": 198.17393493652344, + "epoch": 0.5286343612334802, + "fcm_dpo/beta": 0.0019348189234733582, + "fcm_dpo/delta": 0.09300471842288971, + "fcm_dpo/margin": 160.10589599609375, + "fcm_dpo/q_t": 0.42690205574035645, + "grad_norm": 55.63818359375, + "learning_rate": 2.7051020734928443e-07, + "logits/chosen": -0.4359634816646576, + "logits/rejected": -0.422908216714859, + "logps/chosen": -376.77264404296875, + "logps/ref_chosen": -51.460384368896484, + "logps/ref_rejected": -69.83892059326172, + "logps/rejected": -555.257080078125, + "loss": 1.1367, + "margin_dpo/margin_mean": 160.10589599609375, + "margin_dpo/margin_std": 199.83489990234375, + "step": 360 + }, + { + "KL/chosen_KL_mean": -338.5645751953125, + "KL/mean": -420.45660400390625, + "KL/rejected_KL_mean": -502.3486328125, + "KL/std": 235.536376953125, + "epoch": 0.5301027900146843, + "fcm_dpo/beta": 0.0019699514377862215, + "fcm_dpo/delta": 0.07963744550943375, + "fcm_dpo/margin": 163.78407287597656, + "fcm_dpo/q_t": 0.4253769516944885, + "grad_norm": 43.57426071166992, + "learning_rate": 2.6923093854861593e-07, + "logits/chosen": -0.44554078578948975, + "logits/rejected": -0.4408929944038391, + "logps/chosen": -392.43408203125, + "logps/ref_chosen": -53.86951446533203, + "logps/ref_rejected": -90.7692642211914, + "logps/rejected": -593.117919921875, + "loss": 1.1513, + "margin_dpo/margin_mean": 163.7840576171875, + "margin_dpo/margin_std": 258.46649169921875, + "step": 361 + }, + { + "KL/chosen_KL_mean": -293.4056396484375, + "KL/mean": -428.86041259765625, + "KL/rejected_KL_mean": -564.315185546875, + "KL/std": 247.4333038330078, + "epoch": 0.5315712187958884, + "fcm_dpo/beta": 0.0019274294609203935, + "fcm_dpo/delta": -0.1298113465309143, + "fcm_dpo/margin": 270.9095153808594, + "fcm_dpo/q_t": 0.38001787662506104, + "grad_norm": 25.4000301361084, + "learning_rate": 2.679511629948319e-07, + "logits/chosen": -0.4862041473388672, + "logits/rejected": -0.4968222975730896, + "logps/chosen": -352.044677734375, + "logps/ref_chosen": -58.639060974121094, + "logps/ref_rejected": -105.58195495605469, + "logps/rejected": -669.8970947265625, + "loss": 0.991, + "margin_dpo/margin_mean": 270.9095458984375, + "margin_dpo/margin_std": 266.97491455078125, + "step": 362 + }, + { + "KL/chosen_KL_mean": -263.7435607910156, + "KL/mean": -401.75506591796875, + "KL/rejected_KL_mean": -539.7665405273438, + "KL/std": 247.50381469726562, + "epoch": 0.5330396475770925, + "fcm_dpo/beta": 0.0018918986897915602, + "fcm_dpo/delta": -0.12874022126197815, + "fcm_dpo/margin": 276.02301025390625, + "fcm_dpo/q_t": 0.37962085008621216, + "grad_norm": 24.668289184570312, + "learning_rate": 2.6667091441120816e-07, + "logits/chosen": -0.4323340654373169, + "logits/rejected": -0.4228121340274811, + "logps/chosen": -308.30194091796875, + "logps/ref_chosen": -44.558380126953125, + "logps/ref_rejected": -74.69496154785156, + "logps/rejected": -614.4615478515625, + "loss": 0.9916, + "margin_dpo/margin_mean": 276.02301025390625, + "margin_dpo/margin_std": 272.3272705078125, + "step": 363 + }, + { + "KL/chosen_KL_mean": -291.88446044921875, + "KL/mean": -395.2333984375, + "KL/rejected_KL_mean": -498.5823059082031, + "KL/std": 239.70700073242188, + "epoch": 0.5345080763582967, + "fcm_dpo/beta": 0.0018847124883905053, + "fcm_dpo/delta": 0.010454859584569931, + "fcm_dpo/margin": 206.69784545898438, + "fcm_dpo/q_t": 0.40976476669311523, + "grad_norm": 24.834049224853516, + "learning_rate": 2.6539022653348575e-07, + "logits/chosen": -0.4836190342903137, + "logits/rejected": -0.4936879873275757, + "logps/chosen": -340.779052734375, + "logps/ref_chosen": -48.894622802734375, + "logps/ref_rejected": -91.395751953125, + "logps/rejected": -589.97802734375, + "loss": 1.0996, + "margin_dpo/margin_mean": 206.69784545898438, + "margin_dpo/margin_std": 276.881103515625, + "step": 364 + }, + { + "KL/chosen_KL_mean": -279.62744140625, + "KL/mean": -388.6301574707031, + "KL/rejected_KL_mean": -497.63287353515625, + "KL/std": 249.4620361328125, + "epoch": 0.5359765051395007, + "fcm_dpo/beta": 0.0018760417588055134, + "fcm_dpo/delta": -0.009370389394462109, + "fcm_dpo/margin": 218.00540161132812, + "fcm_dpo/q_t": 0.40619686245918274, + "grad_norm": 22.168062210083008, + "learning_rate": 2.641091331089811e-07, + "logits/chosen": -0.4246390461921692, + "logits/rejected": -0.43436652421951294, + "logps/chosen": -331.12017822265625, + "logps/ref_chosen": -51.49274444580078, + "logps/ref_rejected": -92.70166778564453, + "logps/rejected": -590.3345336914062, + "loss": 1.0731, + "margin_dpo/margin_mean": 218.00540161132812, + "margin_dpo/margin_std": 258.5311279296875, + "step": 365 + }, + { + "KL/chosen_KL_mean": -257.15478515625, + "KL/mean": -366.0369873046875, + "KL/rejected_KL_mean": -474.91925048828125, + "KL/std": 234.462890625, + "epoch": 0.5374449339207048, + "fcm_dpo/beta": 0.0018647974357008934, + "fcm_dpo/delta": -0.006713632494211197, + "fcm_dpo/margin": 217.76443481445312, + "fcm_dpo/q_t": 0.4064916968345642, + "grad_norm": 22.63542366027832, + "learning_rate": 2.6282766789569736e-07, + "logits/chosen": -0.4495304822921753, + "logits/rejected": -0.46502619981765747, + "logps/chosen": -301.8753662109375, + "logps/ref_chosen": -44.7205696105957, + "logps/ref_rejected": -83.31040954589844, + "logps/rejected": -558.2296142578125, + "loss": 1.086, + "margin_dpo/margin_mean": 217.76443481445312, + "margin_dpo/margin_std": 280.0198669433594, + "step": 366 + }, + { + "KL/chosen_KL_mean": -264.7220458984375, + "KL/mean": -356.255859375, + "KL/rejected_KL_mean": -447.7897033691406, + "KL/std": 209.75563049316406, + "epoch": 0.5389133627019089, + "fcm_dpo/beta": 0.0018905512988567352, + "fcm_dpo/delta": 0.05568384379148483, + "fcm_dpo/margin": 183.06765747070312, + "fcm_dpo/q_t": 0.4182465672492981, + "grad_norm": 18.776704788208008, + "learning_rate": 2.615458646614349e-07, + "logits/chosen": -0.4651241898536682, + "logits/rejected": -0.44852566719055176, + "logps/chosen": -323.12744140625, + "logps/ref_chosen": -58.405418395996094, + "logps/ref_rejected": -76.75132751464844, + "logps/rejected": -524.541015625, + "loss": 1.1141, + "margin_dpo/margin_mean": 183.06765747070312, + "margin_dpo/margin_std": 226.84693908691406, + "step": 367 + }, + { + "KL/chosen_KL_mean": -251.9151153564453, + "KL/mean": -395.54022216796875, + "KL/rejected_KL_mean": -539.165283203125, + "KL/std": 242.84780883789062, + "epoch": 0.540381791483113, + "fcm_dpo/beta": 0.0018544028280302882, + "fcm_dpo/delta": -0.14009898900985718, + "fcm_dpo/margin": 287.25018310546875, + "fcm_dpo/q_t": 0.373285174369812, + "grad_norm": 33.4195671081543, + "learning_rate": 2.6026375718290083e-07, + "logits/chosen": -0.4662426710128784, + "logits/rejected": -0.47398853302001953, + "logps/chosen": -296.36761474609375, + "logps/ref_chosen": -44.452518463134766, + "logps/ref_rejected": -98.55526733398438, + "logps/rejected": -637.7205810546875, + "loss": 0.9614, + "margin_dpo/margin_mean": 287.25018310546875, + "margin_dpo/margin_std": 218.70684814453125, + "step": 368 + }, + { + "KL/chosen_KL_mean": -325.591796875, + "KL/mean": -402.11102294921875, + "KL/rejected_KL_mean": -478.63018798828125, + "KL/std": 241.74417114257812, + "epoch": 0.5418502202643172, + "fcm_dpo/beta": 0.0018784540006890893, + "fcm_dpo/delta": 0.11527148634195328, + "fcm_dpo/margin": 153.03839111328125, + "fcm_dpo/q_t": 0.43329665064811707, + "grad_norm": 27.64653968811035, + "learning_rate": 2.589813792448196e-07, + "logits/chosen": -0.44844913482666016, + "logits/rejected": -0.4270949065685272, + "logps/chosen": -396.97332763671875, + "logps/ref_chosen": -71.38150024414062, + "logps/ref_rejected": -91.29582214355469, + "logps/rejected": -569.926025390625, + "loss": 1.1808, + "margin_dpo/margin_mean": 153.0383758544922, + "margin_dpo/margin_std": 270.01470947265625, + "step": 369 + }, + { + "KL/chosen_KL_mean": -349.7052307128906, + "KL/mean": -421.733642578125, + "KL/rejected_KL_mean": -493.76202392578125, + "KL/std": 252.10357666015625, + "epoch": 0.5433186490455213, + "fcm_dpo/beta": 0.0019235580693930387, + "fcm_dpo/delta": 0.12600602209568024, + "fcm_dpo/margin": 144.05679321289062, + "fcm_dpo/q_t": 0.43653106689453125, + "grad_norm": 27.10540199279785, + "learning_rate": 2.5769876463904263e-07, + "logits/chosen": -0.4947051405906677, + "logits/rejected": -0.487566202878952, + "logps/chosen": -421.312744140625, + "logps/ref_chosen": -71.60749816894531, + "logps/ref_rejected": -97.25978088378906, + "logps/rejected": -591.0218505859375, + "loss": 1.1983, + "margin_dpo/margin_mean": 144.05679321289062, + "margin_dpo/margin_std": 280.3094482421875, + "step": 370 + }, + { + "KL/chosen_KL_mean": -337.39990234375, + "KL/mean": -443.57403564453125, + "KL/rejected_KL_mean": -549.7481689453125, + "KL/std": 258.0762939453125, + "epoch": 0.5447870778267254, + "fcm_dpo/beta": 0.0019333376549184322, + "fcm_dpo/delta": -0.011104363948106766, + "fcm_dpo/margin": 212.34832763671875, + "fcm_dpo/q_t": 0.40682950615882874, + "grad_norm": 26.90560531616211, + "learning_rate": 2.5641594716365744e-07, + "logits/chosen": -0.5080785751342773, + "logits/rejected": -0.4954741299152374, + "logps/chosen": -406.81439208984375, + "logps/ref_chosen": -69.41448974609375, + "logps/ref_rejected": -99.17217254638672, + "logps/rejected": -648.9203491210938, + "loss": 1.095, + "margin_dpo/margin_mean": 212.34832763671875, + "margin_dpo/margin_std": 297.38665771484375, + "step": 371 + }, + { + "KL/chosen_KL_mean": -323.286865234375, + "KL/mean": -451.05670166015625, + "KL/rejected_KL_mean": -578.8265991210938, + "KL/std": 294.25408935546875, + "epoch": 0.5462555066079295, + "fcm_dpo/beta": 0.0018996518338099122, + "fcm_dpo/delta": -0.08990687876939774, + "fcm_dpo/margin": 255.53970336914062, + "fcm_dpo/q_t": 0.3917329013347626, + "grad_norm": 22.939546585083008, + "learning_rate": 2.551329606220976e-07, + "logits/chosen": -0.4645116329193115, + "logits/rejected": -0.444297730922699, + "logps/chosen": -385.1048583984375, + "logps/ref_chosen": -61.8179931640625, + "logps/ref_rejected": -78.53948974609375, + "logps/rejected": -657.3660888671875, + "loss": 1.0443, + "margin_dpo/margin_mean": 255.53970336914062, + "margin_dpo/margin_std": 328.15814208984375, + "step": 372 + }, + { + "KL/chosen_KL_mean": -354.2593994140625, + "KL/mean": -475.4353332519531, + "KL/rejected_KL_mean": -596.6112060546875, + "KL/std": 283.9608459472656, + "epoch": 0.5477239353891337, + "fcm_dpo/beta": 0.001885814475826919, + "fcm_dpo/delta": -0.060002297163009644, + "fcm_dpo/margin": 242.35189819335938, + "fcm_dpo/q_t": 0.393940806388855, + "grad_norm": 27.45345115661621, + "learning_rate": 2.538498388222517e-07, + "logits/chosen": -0.46065136790275574, + "logits/rejected": -0.438961923122406, + "logps/chosen": -418.4765319824219, + "logps/ref_chosen": -64.21713256835938, + "logps/ref_rejected": -85.95960998535156, + "logps/rejected": -682.5708618164062, + "loss": 1.0473, + "margin_dpo/margin_mean": 242.35189819335938, + "margin_dpo/margin_std": 272.34967041015625, + "step": 373 + }, + { + "KL/chosen_KL_mean": -322.3292236328125, + "KL/mean": -431.51788330078125, + "KL/rejected_KL_mean": -540.70654296875, + "KL/std": 311.81536865234375, + "epoch": 0.5491923641703378, + "fcm_dpo/beta": 0.0018586989026516676, + "fcm_dpo/delta": -0.0067335814237594604, + "fcm_dpo/margin": 218.37728881835938, + "fcm_dpo/q_t": 0.4113299250602722, + "grad_norm": 24.44922637939453, + "learning_rate": 2.525666155755725e-07, + "logits/chosen": -0.524357795715332, + "logits/rejected": -0.5057187676429749, + "logps/chosen": -392.97943115234375, + "logps/ref_chosen": -70.65018463134766, + "logps/ref_rejected": -93.64016723632812, + "logps/rejected": -634.3466796875, + "loss": 1.1182, + "margin_dpo/margin_mean": 218.37728881835938, + "margin_dpo/margin_std": 353.18353271484375, + "step": 374 + }, + { + "KL/chosen_KL_mean": -333.91644287109375, + "KL/mean": -439.2584228515625, + "KL/rejected_KL_mean": -544.6004638671875, + "KL/std": 251.31211853027344, + "epoch": 0.5506607929515418, + "fcm_dpo/beta": 0.0018582877237349749, + "fcm_dpo/delta": 0.008078165352344513, + "fcm_dpo/margin": 210.68402099609375, + "fcm_dpo/q_t": 0.4099903106689453, + "grad_norm": 27.943613052368164, + "learning_rate": 2.512833246961859e-07, + "logits/chosen": -0.4510612487792969, + "logits/rejected": -0.44956958293914795, + "logps/chosen": -393.9966735839844, + "logps/ref_chosen": -60.080223083496094, + "logps/ref_rejected": -88.93830871582031, + "logps/rejected": -633.5387573242188, + "loss": 1.1099, + "margin_dpo/margin_mean": 210.68402099609375, + "margin_dpo/margin_std": 301.5238952636719, + "step": 375 + }, + { + "KL/chosen_KL_mean": -335.46563720703125, + "KL/mean": -467.35919189453125, + "KL/rejected_KL_mean": -599.252685546875, + "KL/std": 275.66827392578125, + "epoch": 0.5521292217327459, + "fcm_dpo/beta": 0.001843743957579136, + "fcm_dpo/delta": -0.09088477492332458, + "fcm_dpo/margin": 263.78704833984375, + "fcm_dpo/q_t": 0.3887389302253723, + "grad_norm": 23.84757423400879, + "learning_rate": 2.5e-07, + "logits/chosen": -0.4562457203865051, + "logits/rejected": -0.446555495262146, + "logps/chosen": -398.1259460449219, + "logps/ref_chosen": -62.660308837890625, + "logps/ref_rejected": -105.52660369873047, + "logps/rejected": -704.779296875, + "loss": 1.0383, + "margin_dpo/margin_mean": 263.7870788574219, + "margin_dpo/margin_std": 319.8635559082031, + "step": 376 + }, + { + "KL/chosen_KL_mean": -334.3457946777344, + "KL/mean": -462.3714599609375, + "KL/rejected_KL_mean": -590.397216796875, + "KL/std": 288.103515625, + "epoch": 0.55359765051395, + "fcm_dpo/beta": 0.0018218334298580885, + "fcm_dpo/delta": -0.0696791335940361, + "fcm_dpo/margin": 256.0513916015625, + "fcm_dpo/q_t": 0.3930322229862213, + "grad_norm": 21.212696075439453, + "learning_rate": 2.487166753038141e-07, + "logits/chosen": -0.40101295709609985, + "logits/rejected": -0.3998126685619354, + "logps/chosen": -388.82452392578125, + "logps/ref_chosen": -54.478736877441406, + "logps/ref_rejected": -98.70335388183594, + "logps/rejected": -689.1005249023438, + "loss": 1.0428, + "margin_dpo/margin_mean": 256.0513916015625, + "margin_dpo/margin_std": 300.54119873046875, + "step": 377 + }, + { + "KL/chosen_KL_mean": -315.5291748046875, + "KL/mean": -449.3112487792969, + "KL/rejected_KL_mean": -583.0933227539062, + "KL/std": 265.04840087890625, + "epoch": 0.5550660792951542, + "fcm_dpo/beta": 0.001788057736121118, + "fcm_dpo/delta": -0.08244302868843079, + "fcm_dpo/margin": 267.56414794921875, + "fcm_dpo/q_t": 0.38815170526504517, + "grad_norm": 26.153120040893555, + "learning_rate": 2.4743338442442754e-07, + "logits/chosen": -0.42576664686203003, + "logits/rejected": -0.4415106773376465, + "logps/chosen": -360.5497131347656, + "logps/ref_chosen": -45.02053451538086, + "logps/ref_rejected": -88.0469741821289, + "logps/rejected": -671.1402587890625, + "loss": 1.025, + "margin_dpo/margin_mean": 267.56414794921875, + "margin_dpo/margin_std": 286.3600769042969, + "step": 378 + }, + { + "KL/chosen_KL_mean": -355.9398193359375, + "KL/mean": -485.5458984375, + "KL/rejected_KL_mean": -615.1519775390625, + "KL/std": 267.96209716796875, + "epoch": 0.5565345080763583, + "fcm_dpo/beta": 0.0017578438855707645, + "fcm_dpo/delta": -0.05908029526472092, + "fcm_dpo/margin": 259.2121887207031, + "fcm_dpo/q_t": 0.3962175250053406, + "grad_norm": 28.71318244934082, + "learning_rate": 2.461501611777483e-07, + "logits/chosen": -0.42304420471191406, + "logits/rejected": -0.44598186016082764, + "logps/chosen": -409.1219177246094, + "logps/ref_chosen": -53.182098388671875, + "logps/ref_rejected": -114.3001708984375, + "logps/rejected": -729.4521484375, + "loss": 1.0578, + "margin_dpo/margin_mean": 259.21221923828125, + "margin_dpo/margin_std": 324.53790283203125, + "step": 379 + }, + { + "KL/chosen_KL_mean": -336.93890380859375, + "KL/mean": -479.41204833984375, + "KL/rejected_KL_mean": -621.8851318359375, + "KL/std": 297.79949951171875, + "epoch": 0.5580029368575624, + "fcm_dpo/beta": 0.0017373515293002129, + "fcm_dpo/delta": -0.09990786015987396, + "fcm_dpo/margin": 284.94622802734375, + "fcm_dpo/q_t": 0.38500848412513733, + "grad_norm": 25.73267364501953, + "learning_rate": 2.4486703937790243e-07, + "logits/chosen": -0.43626442551612854, + "logits/rejected": -0.4630964398384094, + "logps/chosen": -388.29193115234375, + "logps/ref_chosen": -51.3530387878418, + "logps/ref_rejected": -104.19169616699219, + "logps/rejected": -726.0767822265625, + "loss": 1.0264, + "margin_dpo/margin_mean": 284.94622802734375, + "margin_dpo/margin_std": 328.1457824707031, + "step": 380 + }, + { + "KL/chosen_KL_mean": -349.9728088378906, + "KL/mean": -449.6802978515625, + "KL/rejected_KL_mean": -549.3878173828125, + "KL/std": 246.59634399414062, + "epoch": 0.5594713656387665, + "fcm_dpo/beta": 0.0017377103213220835, + "fcm_dpo/delta": 0.055430181324481964, + "fcm_dpo/margin": 199.41497802734375, + "fcm_dpo/q_t": 0.42109525203704834, + "grad_norm": 24.38262939453125, + "learning_rate": 2.435840528363426e-07, + "logits/chosen": -0.4588872790336609, + "logits/rejected": -0.4429172873497009, + "logps/chosen": -407.77587890625, + "logps/ref_chosen": -57.80306625366211, + "logps/ref_rejected": -79.21940612792969, + "logps/rejected": -628.607177734375, + "loss": 1.1478, + "margin_dpo/margin_mean": 199.41497802734375, + "margin_dpo/margin_std": 332.27398681640625, + "step": 381 + }, + { + "KL/chosen_KL_mean": -328.7398681640625, + "KL/mean": -452.5684509277344, + "KL/rejected_KL_mean": -576.39697265625, + "KL/std": 232.17242431640625, + "epoch": 0.5609397944199707, + "fcm_dpo/beta": 0.001735961064696312, + "fcm_dpo/delta": -0.031305499374866486, + "fcm_dpo/margin": 247.65719604492188, + "fcm_dpo/q_t": 0.3991192877292633, + "grad_norm": 26.342195510864258, + "learning_rate": 2.4230123536095745e-07, + "logits/chosen": -0.48217618465423584, + "logits/rejected": -0.48925304412841797, + "logps/chosen": -394.7601623535156, + "logps/ref_chosen": -66.02030181884766, + "logps/ref_rejected": -110.71016693115234, + "logps/rejected": -687.107177734375, + "loss": 1.0484, + "margin_dpo/margin_mean": 247.65719604492188, + "margin_dpo/margin_std": 255.80958557128906, + "step": 382 + }, + { + "KL/chosen_KL_mean": -337.642822265625, + "KL/mean": -457.468505859375, + "KL/rejected_KL_mean": -577.294189453125, + "KL/std": 262.3541564941406, + "epoch": 0.5624082232011748, + "fcm_dpo/beta": 0.0017293533310294151, + "fcm_dpo/delta": -0.015089768916368484, + "fcm_dpo/margin": 239.6513671875, + "fcm_dpo/q_t": 0.40462052822113037, + "grad_norm": 30.611806869506836, + "learning_rate": 2.4101862075518037e-07, + "logits/chosen": -0.4417022466659546, + "logits/rejected": -0.4511658549308777, + "logps/chosen": -388.0343017578125, + "logps/ref_chosen": -50.39148712158203, + "logps/ref_rejected": -93.71589660644531, + "logps/rejected": -671.0100708007812, + "loss": 1.0938, + "margin_dpo/margin_mean": 239.65135192871094, + "margin_dpo/margin_std": 338.258544921875, + "step": 383 + }, + { + "KL/chosen_KL_mean": -352.222900390625, + "KL/mean": -446.8468322753906, + "KL/rejected_KL_mean": -541.4708251953125, + "KL/std": 242.38162231445312, + "epoch": 0.5638766519823789, + "fcm_dpo/beta": 0.0017538972897455096, + "fcm_dpo/delta": 0.06988409906625748, + "fcm_dpo/margin": 189.2479248046875, + "fcm_dpo/q_t": 0.4214822053909302, + "grad_norm": 24.98710060119629, + "learning_rate": 2.397362428170992e-07, + "logits/chosen": -0.4990885853767395, + "logits/rejected": -0.4952540993690491, + "logps/chosen": -404.26898193359375, + "logps/ref_chosen": -52.046104431152344, + "logps/ref_rejected": -85.76089477539062, + "logps/rejected": -627.231689453125, + "loss": 1.1222, + "margin_dpo/margin_mean": 189.2479248046875, + "margin_dpo/margin_std": 231.77182006835938, + "step": 384 + }, + { + "KL/chosen_KL_mean": -317.3160400390625, + "KL/mean": -436.0095520019531, + "KL/rejected_KL_mean": -554.7030639648438, + "KL/std": 214.3333740234375, + "epoch": 0.5653450807635829, + "fcm_dpo/beta": 0.0017491495236754417, + "fcm_dpo/delta": -0.015889476984739304, + "fcm_dpo/margin": 237.3870086669922, + "fcm_dpo/q_t": 0.40170639753341675, + "grad_norm": 29.25759506225586, + "learning_rate": 2.3845413533856514e-07, + "logits/chosen": -0.5247458219528198, + "logits/rejected": -0.5023648738861084, + "logps/chosen": -382.8681945800781, + "logps/ref_chosen": -65.55215454101562, + "logps/ref_rejected": -77.82792663574219, + "logps/rejected": -632.531005859375, + "loss": 1.055, + "margin_dpo/margin_mean": 237.38702392578125, + "margin_dpo/margin_std": 238.87646484375, + "step": 385 + }, + { + "KL/chosen_KL_mean": -334.15228271484375, + "KL/mean": -458.6162109375, + "KL/rejected_KL_mean": -583.0802001953125, + "KL/std": 262.49871826171875, + "epoch": 0.566813509544787, + "fcm_dpo/beta": 0.0017403860110789537, + "fcm_dpo/delta": -0.03472103923559189, + "fcm_dpo/margin": 248.92791748046875, + "fcm_dpo/q_t": 0.3999601900577545, + "grad_norm": 26.947490692138672, + "learning_rate": 2.3717233210430254e-07, + "logits/chosen": -0.5123308300971985, + "logits/rejected": -0.5101590156555176, + "logps/chosen": -392.3741455078125, + "logps/ref_chosen": -58.22185516357422, + "logps/ref_rejected": -92.32742309570312, + "logps/rejected": -675.4075927734375, + "loss": 1.0633, + "margin_dpo/margin_mean": 248.9279022216797, + "margin_dpo/margin_std": 302.4674987792969, + "step": 386 + }, + { + "KL/chosen_KL_mean": -361.41119384765625, + "KL/mean": -466.2529602050781, + "KL/rejected_KL_mean": -571.0947265625, + "KL/std": 245.76097106933594, + "epoch": 0.5682819383259912, + "fcm_dpo/beta": 0.0017379240598529577, + "fcm_dpo/delta": 0.036699328571558, + "fcm_dpo/margin": 209.68359375, + "fcm_dpo/q_t": 0.4142611622810364, + "grad_norm": 30.391345977783203, + "learning_rate": 2.3589086689101889e-07, + "logits/chosen": -0.5567930340766907, + "logits/rejected": -0.5412279367446899, + "logps/chosen": -427.83062744140625, + "logps/ref_chosen": -66.41944885253906, + "logps/ref_rejected": -92.16915893554688, + "logps/rejected": -663.263916015625, + "loss": 1.1027, + "margin_dpo/margin_mean": 209.68357849121094, + "margin_dpo/margin_std": 252.79678344726562, + "step": 387 + }, + { + "KL/chosen_KL_mean": -343.9563903808594, + "KL/mean": -484.5562744140625, + "KL/rejected_KL_mean": -625.1561889648438, + "KL/std": 287.8593444824219, + "epoch": 0.5697503671071953, + "fcm_dpo/beta": 0.0017218522261828184, + "fcm_dpo/delta": -0.08878612518310547, + "fcm_dpo/margin": 281.1998291015625, + "fcm_dpo/q_t": 0.390036940574646, + "grad_norm": 26.922496795654297, + "learning_rate": 2.3460977346651428e-07, + "logits/chosen": -0.46857941150665283, + "logits/rejected": -0.48115378618240356, + "logps/chosen": -394.0858459472656, + "logps/ref_chosen": -50.129459381103516, + "logps/ref_rejected": -104.43305969238281, + "logps/rejected": -729.5892333984375, + "loss": 1.03, + "margin_dpo/margin_mean": 281.1997985839844, + "margin_dpo/margin_std": 325.59906005859375, + "step": 388 + }, + { + "KL/chosen_KL_mean": -386.0830383300781, + "KL/mean": -507.82427978515625, + "KL/rejected_KL_mean": -629.5654296875, + "KL/std": 286.28631591796875, + "epoch": 0.5712187958883994, + "fcm_dpo/beta": 0.0017109981272369623, + "fcm_dpo/delta": -0.01739252358675003, + "fcm_dpo/margin": 243.48245239257812, + "fcm_dpo/q_t": 0.4042537808418274, + "grad_norm": 24.15456771850586, + "learning_rate": 2.3332908558879177e-07, + "logits/chosen": -0.5262615084648132, + "logits/rejected": -0.5186604261398315, + "logps/chosen": -443.9896240234375, + "logps/ref_chosen": -57.906593322753906, + "logps/ref_rejected": -77.91454315185547, + "logps/rejected": -707.47998046875, + "loss": 1.0799, + "margin_dpo/margin_mean": 243.48245239257812, + "margin_dpo/margin_std": 314.66058349609375, + "step": 389 + }, + { + "KL/chosen_KL_mean": -384.50494384765625, + "KL/mean": -505.17059326171875, + "KL/rejected_KL_mean": -625.836181640625, + "KL/std": 288.66546630859375, + "epoch": 0.5726872246696035, + "fcm_dpo/beta": 0.0017028467264026403, + "fcm_dpo/delta": -0.011735277250409126, + "fcm_dpo/margin": 241.33126831054688, + "fcm_dpo/q_t": 0.4092911183834076, + "grad_norm": 26.528804779052734, + "learning_rate": 2.320488370051681e-07, + "logits/chosen": -0.46930596232414246, + "logits/rejected": -0.46219387650489807, + "logps/chosen": -433.7308349609375, + "logps/ref_chosen": -49.22591781616211, + "logps/ref_rejected": -85.5281982421875, + "logps/rejected": -711.3643798828125, + "loss": 1.1069, + "margin_dpo/margin_mean": 241.33126831054688, + "margin_dpo/margin_std": 371.08599853515625, + "step": 390 + }, + { + "KL/chosen_KL_mean": -386.6776123046875, + "KL/mean": -456.50341796875, + "KL/rejected_KL_mean": -526.3292236328125, + "KL/std": 271.3560791015625, + "epoch": 0.5741556534508077, + "fcm_dpo/beta": 0.0017502898117527366, + "fcm_dpo/delta": 0.15936514735221863, + "fcm_dpo/margin": 139.651611328125, + "fcm_dpo/q_t": 0.4439963400363922, + "grad_norm": 45.76322555541992, + "learning_rate": 2.3076906145138405e-07, + "logits/chosen": -0.5254815220832825, + "logits/rejected": -0.5183066725730896, + "logps/chosen": -451.00726318359375, + "logps/ref_chosen": -64.32965087890625, + "logps/ref_rejected": -86.73820495605469, + "logps/rejected": -613.0674438476562, + "loss": 1.2173, + "margin_dpo/margin_mean": 139.65162658691406, + "margin_dpo/margin_std": 283.3598937988281, + "step": 391 + }, + { + "KL/chosen_KL_mean": -326.25396728515625, + "KL/mean": -469.5254211425781, + "KL/rejected_KL_mean": -612.796875, + "KL/std": 288.09954833984375, + "epoch": 0.5756240822320118, + "fcm_dpo/beta": 0.00174234458245337, + "fcm_dpo/delta": -0.10435783863067627, + "fcm_dpo/margin": 286.5428771972656, + "fcm_dpo/q_t": 0.38457435369491577, + "grad_norm": 23.80723762512207, + "learning_rate": 2.294897926507156e-07, + "logits/chosen": -0.4814883768558502, + "logits/rejected": -0.4757598340511322, + "logps/chosen": -379.7579345703125, + "logps/ref_chosen": -53.50397872924805, + "logps/ref_rejected": -102.34584045410156, + "logps/rejected": -715.1427001953125, + "loss": 1.0049, + "margin_dpo/margin_mean": 286.5428771972656, + "margin_dpo/margin_std": 283.0867004394531, + "step": 392 + }, + { + "KL/chosen_KL_mean": -324.5339660644531, + "KL/mean": -436.1412353515625, + "KL/rejected_KL_mean": -547.74853515625, + "KL/std": 283.82720947265625, + "epoch": 0.5770925110132159, + "fcm_dpo/beta": 0.0017277842853218317, + "fcm_dpo/delta": 0.014872867614030838, + "fcm_dpo/margin": 223.2145233154297, + "fcm_dpo/q_t": 0.41501516103744507, + "grad_norm": 21.790613174438477, + "learning_rate": 2.2821106431308543e-07, + "logits/chosen": -0.46388766169548035, + "logits/rejected": -0.46215295791625977, + "logps/chosen": -371.00787353515625, + "logps/ref_chosen": -46.473915100097656, + "logps/ref_rejected": -71.96885681152344, + "logps/rejected": -619.7174072265625, + "loss": 1.1214, + "margin_dpo/margin_mean": 223.21453857421875, + "margin_dpo/margin_std": 357.96539306640625, + "step": 393 + }, + { + "KL/chosen_KL_mean": -370.5946044921875, + "KL/mean": -487.31060791015625, + "KL/rejected_KL_mean": -604.026611328125, + "KL/std": 303.3579406738281, + "epoch": 0.57856093979442, + "fcm_dpo/beta": 0.001729074981994927, + "fcm_dpo/delta": -0.0038064131513237953, + "fcm_dpo/margin": 233.43197631835938, + "fcm_dpo/q_t": 0.4082695245742798, + "grad_norm": 26.26580810546875, + "learning_rate": 2.2693291013417452e-07, + "logits/chosen": -0.4947393238544464, + "logits/rejected": -0.4953378438949585, + "logps/chosen": -423.50616455078125, + "logps/ref_chosen": -52.91154861450195, + "logps/ref_rejected": -90.8226318359375, + "logps/rejected": -694.8492431640625, + "loss": 1.0907, + "margin_dpo/margin_mean": 233.43197631835938, + "margin_dpo/margin_std": 314.2247314453125, + "step": 394 + }, + { + "KL/chosen_KL_mean": -367.9709777832031, + "KL/mean": -494.29119873046875, + "KL/rejected_KL_mean": -620.6114501953125, + "KL/std": 292.8189697265625, + "epoch": 0.580029368575624, + "fcm_dpo/beta": 0.001716281520202756, + "fcm_dpo/delta": -0.03543686866760254, + "fcm_dpo/margin": 252.64047241210938, + "fcm_dpo/q_t": 0.4020352363586426, + "grad_norm": 25.020362854003906, + "learning_rate": 2.2565536379453404e-07, + "logits/chosen": -0.5321957468986511, + "logits/rejected": -0.5300949811935425, + "logps/chosen": -430.51708984375, + "logps/ref_chosen": -62.546112060546875, + "logps/ref_rejected": -83.78262329101562, + "logps/rejected": -704.39404296875, + "loss": 1.0783, + "margin_dpo/margin_mean": 252.64048767089844, + "margin_dpo/margin_std": 341.7522888183594, + "step": 395 + }, + { + "KL/chosen_KL_mean": -370.54937744140625, + "KL/mean": -482.26983642578125, + "KL/rejected_KL_mean": -593.9903564453125, + "KL/std": 286.0380554199219, + "epoch": 0.5814977973568282, + "fcm_dpo/beta": 0.001719313906505704, + "fcm_dpo/delta": 0.016418248414993286, + "fcm_dpo/margin": 223.4409942626953, + "fcm_dpo/q_t": 0.4104015827178955, + "grad_norm": 26.507614135742188, + "learning_rate": 2.2437845895869825e-07, + "logits/chosen": -0.5050040483474731, + "logits/rejected": -0.4854010343551636, + "logps/chosen": -439.5453186035156, + "logps/ref_chosen": -68.99594116210938, + "logps/ref_rejected": -88.64665985107422, + "logps/rejected": -682.6370239257812, + "loss": 1.0883, + "margin_dpo/margin_mean": 223.44097900390625, + "margin_dpo/margin_std": 268.4827880859375, + "step": 396 + }, + { + "KL/chosen_KL_mean": -350.86260986328125, + "KL/mean": -498.1780700683594, + "KL/rejected_KL_mean": -645.4935302734375, + "KL/std": 290.6457214355469, + "epoch": 0.5829662261380323, + "fcm_dpo/beta": 0.0016906873788684607, + "fcm_dpo/delta": -0.10405933111906052, + "fcm_dpo/margin": 294.6309509277344, + "fcm_dpo/q_t": 0.38490188121795654, + "grad_norm": 32.973846435546875, + "learning_rate": 2.2310222927429716e-07, + "logits/chosen": -0.49156516790390015, + "logits/rejected": -0.4970093369483948, + "logps/chosen": -412.1397705078125, + "logps/ref_chosen": -61.27716827392578, + "logps/ref_rejected": -103.11612701416016, + "logps/rejected": -748.6097412109375, + "loss": 1.0113, + "margin_dpo/margin_mean": 294.6309509277344, + "margin_dpo/margin_std": 307.0301513671875, + "step": 397 + }, + { + "KL/chosen_KL_mean": -376.7286376953125, + "KL/mean": -511.4410400390625, + "KL/rejected_KL_mean": -646.1535034179688, + "KL/std": 304.7323303222656, + "epoch": 0.5844346549192364, + "fcm_dpo/beta": 0.001674711937084794, + "fcm_dpo/delta": -0.053648628294467926, + "fcm_dpo/margin": 269.42486572265625, + "fcm_dpo/q_t": 0.3986341953277588, + "grad_norm": 23.229272842407227, + "learning_rate": 2.2182670837116972e-07, + "logits/chosen": -0.5308432579040527, + "logits/rejected": -0.5298266410827637, + "logps/chosen": -444.88018798828125, + "logps/ref_chosen": -68.15155029296875, + "logps/ref_rejected": -108.52360534667969, + "logps/rejected": -754.6771240234375, + "loss": 1.0636, + "margin_dpo/margin_mean": 269.42486572265625, + "margin_dpo/margin_std": 354.4512634277344, + "step": 398 + }, + { + "KL/chosen_KL_mean": -326.7265625, + "KL/mean": -445.32049560546875, + "KL/rejected_KL_mean": -563.9144287109375, + "KL/std": 262.29473876953125, + "epoch": 0.5859030837004405, + "fcm_dpo/beta": 0.0016672208439558744, + "fcm_dpo/delta": 0.004600860178470612, + "fcm_dpo/margin": 237.18785095214844, + "fcm_dpo/q_t": 0.40930691361427307, + "grad_norm": 31.19171142578125, + "learning_rate": 2.2055192986047804e-07, + "logits/chosen": -0.4892912209033966, + "logits/rejected": -0.45055025815963745, + "logps/chosen": -387.6163635253906, + "logps/ref_chosen": -60.889801025390625, + "logps/ref_rejected": -77.965576171875, + "logps/rejected": -641.8800048828125, + "loss": 1.1002, + "margin_dpo/margin_mean": 237.1878662109375, + "margin_dpo/margin_std": 328.7167663574219, + "step": 399 + }, + { + "KL/chosen_KL_mean": -316.741943359375, + "KL/mean": -485.7972412109375, + "KL/rejected_KL_mean": -654.8525390625, + "KL/std": 280.26068115234375, + "epoch": 0.5873715124816447, + "fcm_dpo/beta": 0.001628828700631857, + "fcm_dpo/delta": -0.1600421667098999, + "fcm_dpo/margin": 338.1106872558594, + "fcm_dpo/q_t": 0.3711463212966919, + "grad_norm": 22.955949783325195, + "learning_rate": 2.192779273338215e-07, + "logits/chosen": -0.5029030442237854, + "logits/rejected": -0.4994921386241913, + "logps/chosen": -380.385498046875, + "logps/ref_chosen": -63.64359664916992, + "logps/ref_rejected": -105.252685546875, + "logps/rejected": -760.105224609375, + "loss": 0.9701, + "margin_dpo/margin_mean": 338.11065673828125, + "margin_dpo/margin_std": 314.16839599609375, + "step": 400 + }, + { + "KL/chosen_KL_mean": -364.88543701171875, + "KL/mean": -461.265869140625, + "KL/rejected_KL_mean": -557.6463012695312, + "KL/std": 291.4111022949219, + "epoch": 0.5888399412628488, + "fcm_dpo/beta": 0.001636154600419104, + "fcm_dpo/delta": 0.0874527096748352, + "fcm_dpo/margin": 192.76087951660156, + "fcm_dpo/q_t": 0.43033739924430847, + "grad_norm": 27.67872428894043, + "learning_rate": 2.1800473436235136e-07, + "logits/chosen": -0.499002069234848, + "logits/rejected": -0.49258700013160706, + "logps/chosen": -422.0484619140625, + "logps/ref_chosen": -57.16303253173828, + "logps/ref_rejected": -83.79249572753906, + "logps/rejected": -641.4387817382812, + "loss": 1.1922, + "margin_dpo/margin_mean": 192.7608642578125, + "margin_dpo/margin_std": 390.67706298828125, + "step": 401 + }, + { + "KL/chosen_KL_mean": -276.1816711425781, + "KL/mean": -451.49737548828125, + "KL/rejected_KL_mean": -626.81298828125, + "KL/std": 308.62689208984375, + "epoch": 0.5903083700440529, + "fcm_dpo/beta": 0.0016060995403677225, + "fcm_dpo/delta": -0.17291411757469177, + "fcm_dpo/margin": 350.63134765625, + "fcm_dpo/q_t": 0.3695389926433563, + "grad_norm": 34.74311065673828, + "learning_rate": 2.1673238449588665e-07, + "logits/chosen": -0.4849190413951874, + "logits/rejected": -0.4742359220981598, + "logps/chosen": -326.92205810546875, + "logps/ref_chosen": -50.74037170410156, + "logps/ref_rejected": -81.0460433959961, + "logps/rejected": -707.8590698242188, + "loss": 0.9581, + "margin_dpo/margin_mean": 350.63134765625, + "margin_dpo/margin_std": 317.71551513671875, + "step": 402 + }, + { + "KL/chosen_KL_mean": -312.72406005859375, + "KL/mean": -446.8978271484375, + "KL/rejected_KL_mean": -581.0715942382812, + "KL/std": 288.34051513671875, + "epoch": 0.591776798825257, + "fcm_dpo/beta": 0.001585017773322761, + "fcm_dpo/delta": -0.026479586958885193, + "fcm_dpo/margin": 268.3475341796875, + "fcm_dpo/q_t": 0.4014202356338501, + "grad_norm": 23.439414978027344, + "learning_rate": 2.154609112620295e-07, + "logits/chosen": -0.49934089183807373, + "logits/rejected": -0.50015789270401, + "logps/chosen": -359.87139892578125, + "logps/ref_chosen": -47.14731216430664, + "logps/ref_rejected": -77.2666015625, + "logps/rejected": -658.3381958007812, + "loss": 1.0603, + "margin_dpo/margin_mean": 268.3475341796875, + "margin_dpo/margin_std": 303.0990295410156, + "step": 403 + }, + { + "KL/chosen_KL_mean": -346.9129333496094, + "KL/mean": -478.5081787109375, + "KL/rejected_KL_mean": -610.1033935546875, + "KL/std": 282.465087890625, + "epoch": 0.593245227606461, + "fcm_dpo/beta": 0.00157838873565197, + "fcm_dpo/delta": -0.016118429601192474, + "fcm_dpo/margin": 263.1905517578125, + "fcm_dpo/q_t": 0.40540170669555664, + "grad_norm": 29.329235076904297, + "learning_rate": 2.1419034816528218e-07, + "logits/chosen": -0.4767064154148102, + "logits/rejected": -0.46850764751434326, + "logps/chosen": -394.7882080078125, + "logps/ref_chosen": -47.875274658203125, + "logps/ref_rejected": -77.15499877929688, + "logps/rejected": -687.2584228515625, + "loss": 1.0909, + "margin_dpo/margin_mean": 263.1905517578125, + "margin_dpo/margin_std": 365.6813659667969, + "step": 404 + }, + { + "KL/chosen_KL_mean": -388.49285888671875, + "KL/mean": -496.2237243652344, + "KL/rejected_KL_mean": -603.95458984375, + "KL/std": 306.67413330078125, + "epoch": 0.5947136563876652, + "fcm_dpo/beta": 0.0015723207034170628, + "fcm_dpo/delta": -0.039775051176548004, + "fcm_dpo/margin": 215.46173095703125, + "fcm_dpo/q_t": 0.423758327960968, + "grad_norm": 30.571292877197266, + "learning_rate": 2.129207286861638e-07, + "logits/chosen": -0.45147573947906494, + "logits/rejected": -0.441570520401001, + "logps/chosen": -453.65576171875, + "logps/ref_chosen": -65.16290283203125, + "logps/ref_rejected": -87.18678283691406, + "logps/rejected": -691.141357421875, + "loss": 1.1641, + "margin_dpo/margin_mean": 215.4617462158203, + "margin_dpo/margin_std": 380.0777587890625, + "step": 405 + }, + { + "KL/chosen_KL_mean": -344.60443115234375, + "KL/mean": -486.24908447265625, + "KL/rejected_KL_mean": -627.893798828125, + "KL/std": 301.5284423828125, + "epoch": 0.5961820851688693, + "fcm_dpo/beta": 0.0015619369223713875, + "fcm_dpo/delta": -0.044593267142772675, + "fcm_dpo/margin": 283.2893981933594, + "fcm_dpo/q_t": 0.39848363399505615, + "grad_norm": 23.295684814453125, + "learning_rate": 2.1165208628032861e-07, + "logits/chosen": -0.5039137005805969, + "logits/rejected": -0.5129928588867188, + "logps/chosen": -394.34521484375, + "logps/ref_chosen": -49.740814208984375, + "logps/ref_rejected": -92.07862854003906, + "logps/rejected": -719.972412109375, + "loss": 1.0554, + "margin_dpo/margin_mean": 283.28936767578125, + "margin_dpo/margin_std": 333.28466796875, + "step": 406 + }, + { + "KL/chosen_KL_mean": -370.0421142578125, + "KL/mean": -455.7298889160156, + "KL/rejected_KL_mean": -541.4176025390625, + "KL/std": 237.25067138671875, + "epoch": 0.5976505139500734, + "fcm_dpo/beta": 0.0015546645736321807, + "fcm_dpo/delta": 0.016098780557513237, + "fcm_dpo/margin": 171.37547302246094, + "fcm_dpo/q_t": 0.436930388212204, + "grad_norm": 48.98335647583008, + "learning_rate": 2.1038445437768375e-07, + "logits/chosen": -0.4963209331035614, + "logits/rejected": -0.47049441933631897, + "logps/chosen": -426.3728332519531, + "logps/ref_chosen": -56.33069610595703, + "logps/ref_rejected": -77.51209259033203, + "logps/rejected": -618.9296875, + "loss": 1.1985, + "margin_dpo/margin_mean": 171.37548828125, + "margin_dpo/margin_std": 317.56884765625, + "step": 407 + }, + { + "KL/chosen_KL_mean": -374.8062744140625, + "KL/mean": -474.4832458496094, + "KL/rejected_KL_mean": -574.1602783203125, + "KL/std": 233.34344482421875, + "epoch": 0.5991189427312775, + "fcm_dpo/beta": 0.0015771770849823952, + "fcm_dpo/delta": 0.08839617669582367, + "fcm_dpo/margin": 199.35397338867188, + "fcm_dpo/q_t": 0.4258885979652405, + "grad_norm": 24.81488037109375, + "learning_rate": 2.0911786638150872e-07, + "logits/chosen": -0.484347403049469, + "logits/rejected": -0.4580131769180298, + "logps/chosen": -444.5955810546875, + "logps/ref_chosen": -69.789306640625, + "logps/ref_rejected": -90.09693908691406, + "logps/rejected": -664.2572021484375, + "loss": 1.1376, + "margin_dpo/margin_mean": 199.35397338867188, + "margin_dpo/margin_std": 261.7845764160156, + "step": 408 + }, + { + "KL/chosen_KL_mean": -363.4441833496094, + "KL/mean": -462.02862548828125, + "KL/rejected_KL_mean": -560.6130981445312, + "KL/std": 254.19630432128906, + "epoch": 0.6005873715124816, + "fcm_dpo/beta": 0.0016081533394753933, + "fcm_dpo/delta": 0.0854191780090332, + "fcm_dpo/margin": 197.1689453125, + "fcm_dpo/q_t": 0.42619985342025757, + "grad_norm": 33.778438568115234, + "learning_rate": 2.0785235566757517e-07, + "logits/chosen": -0.4777407944202423, + "logits/rejected": -0.46434783935546875, + "logps/chosen": -430.7615966796875, + "logps/ref_chosen": -67.31744384765625, + "logps/ref_rejected": -84.904296875, + "logps/rejected": -645.5173950195312, + "loss": 1.1435, + "margin_dpo/margin_mean": 197.1689453125, + "margin_dpo/margin_std": 282.058349609375, + "step": 409 + }, + { + "KL/chosen_KL_mean": -339.41485595703125, + "KL/mean": -453.9027404785156, + "KL/rejected_KL_mean": -568.390625, + "KL/std": 249.15707397460938, + "epoch": 0.6020558002936858, + "fcm_dpo/beta": 0.0016180926468223333, + "fcm_dpo/delta": 0.030641639605164528, + "fcm_dpo/margin": 228.97573852539062, + "fcm_dpo/q_t": 0.41249266266822815, + "grad_norm": 26.49384880065918, + "learning_rate": 2.065879555832674e-07, + "logits/chosen": -0.5207273960113525, + "logits/rejected": -0.5231969952583313, + "logps/chosen": -390.8802185058594, + "logps/ref_chosen": -51.465354919433594, + "logps/ref_rejected": -83.198974609375, + "logps/rejected": -651.589599609375, + "loss": 1.0996, + "margin_dpo/margin_mean": 228.97573852539062, + "margin_dpo/margin_std": 282.3933410644531, + "step": 410 + }, + { + "KL/chosen_KL_mean": -361.8911437988281, + "KL/mean": -477.2146911621094, + "KL/rejected_KL_mean": -592.5382080078125, + "KL/std": 280.512939453125, + "epoch": 0.6035242290748899, + "fcm_dpo/beta": 0.0016009939135983586, + "fcm_dpo/delta": -0.06567565351724625, + "fcm_dpo/margin": 230.64710998535156, + "fcm_dpo/q_t": 0.41523507237434387, + "grad_norm": 34.77162170410156, + "learning_rate": 2.0532469944670343e-07, + "logits/chosen": -0.4923670291900635, + "logits/rejected": -0.5041638612747192, + "logps/chosen": -414.19842529296875, + "logps/ref_chosen": -52.30727005004883, + "logps/ref_rejected": -80.69495391845703, + "logps/rejected": -673.2332153320312, + "loss": 1.117, + "margin_dpo/margin_mean": 230.6470947265625, + "margin_dpo/margin_std": 321.73370361328125, + "step": 411 + }, + { + "KL/chosen_KL_mean": -363.21783447265625, + "KL/mean": -484.7549133300781, + "KL/rejected_KL_mean": -606.2919311523438, + "KL/std": 272.1863098144531, + "epoch": 0.604992657856094, + "fcm_dpo/beta": 0.0016049096593633294, + "fcm_dpo/delta": 0.010263003408908844, + "fcm_dpo/margin": 243.07411193847656, + "fcm_dpo/q_t": 0.40966540575027466, + "grad_norm": 34.43694305419922, + "learning_rate": 2.0406262054585738e-07, + "logits/chosen": -0.5590307712554932, + "logits/rejected": -0.5907352566719055, + "logps/chosen": -416.3619689941406, + "logps/ref_chosen": -53.144126892089844, + "logps/ref_rejected": -100.0608139038086, + "logps/rejected": -706.352783203125, + "loss": 1.0948, + "margin_dpo/margin_mean": 243.0740966796875, + "margin_dpo/margin_std": 316.7934875488281, + "step": 412 + }, + { + "KL/chosen_KL_mean": -387.7173767089844, + "KL/mean": -507.04754638671875, + "KL/rejected_KL_mean": -626.3777465820312, + "KL/std": 278.0577392578125, + "epoch": 0.6064610866372981, + "fcm_dpo/beta": 0.0016117544146254659, + "fcm_dpo/delta": 0.01580866426229477, + "fcm_dpo/margin": 238.6603240966797, + "fcm_dpo/q_t": 0.40934064984321594, + "grad_norm": 25.042572021484375, + "learning_rate": 2.0280175213768205e-07, + "logits/chosen": -0.5094854235649109, + "logits/rejected": -0.5156064033508301, + "logps/chosen": -449.2993469238281, + "logps/ref_chosen": -61.58196258544922, + "logps/ref_rejected": -99.47340393066406, + "logps/rejected": -725.8511352539062, + "loss": 1.0935, + "margin_dpo/margin_mean": 238.66033935546875, + "margin_dpo/margin_std": 299.20660400390625, + "step": 413 + }, + { + "KL/chosen_KL_mean": -353.5774841308594, + "KL/mean": -484.88934326171875, + "KL/rejected_KL_mean": -616.2012329101562, + "KL/std": 261.0632019042969, + "epoch": 0.6079295154185022, + "fcm_dpo/beta": 0.0016139191575348377, + "fcm_dpo/delta": -0.025633584707975388, + "fcm_dpo/margin": 262.623779296875, + "fcm_dpo/q_t": 0.4009360074996948, + "grad_norm": 28.529882431030273, + "learning_rate": 2.0154212744723247e-07, + "logits/chosen": -0.43805867433547974, + "logits/rejected": -0.43269163370132446, + "logps/chosen": -400.208984375, + "logps/ref_chosen": -46.63148498535156, + "logps/ref_rejected": -87.64653015136719, + "logps/rejected": -703.8477783203125, + "loss": 1.0665, + "margin_dpo/margin_mean": 262.6237487792969, + "margin_dpo/margin_std": 301.7000732421875, + "step": 414 + }, + { + "KL/chosen_KL_mean": -398.528564453125, + "KL/mean": -497.85247802734375, + "KL/rejected_KL_mean": -597.1763916015625, + "KL/std": 269.6009826660156, + "epoch": 0.6093979441997063, + "fcm_dpo/beta": 0.0016152863390743732, + "fcm_dpo/delta": 0.08179127424955368, + "fcm_dpo/margin": 198.6478271484375, + "fcm_dpo/q_t": 0.42490124702453613, + "grad_norm": 25.62877655029297, + "learning_rate": 2.002837796667909e-07, + "logits/chosen": -0.5635542869567871, + "logits/rejected": -0.5637483596801758, + "logps/chosen": -477.1468505859375, + "logps/ref_chosen": -78.6182861328125, + "logps/ref_rejected": -100.47752380371094, + "logps/rejected": -697.6539306640625, + "loss": 1.1494, + "margin_dpo/margin_mean": 198.6478271484375, + "margin_dpo/margin_std": 303.8634033203125, + "step": 415 + }, + { + "KL/chosen_KL_mean": -366.1192321777344, + "KL/mean": -525.1725463867188, + "KL/rejected_KL_mean": -684.225830078125, + "KL/std": 304.578369140625, + "epoch": 0.6108663729809104, + "fcm_dpo/beta": 0.0016041703056544065, + "fcm_dpo/delta": -0.11606433987617493, + "fcm_dpo/margin": 318.10662841796875, + "fcm_dpo/q_t": 0.38077855110168457, + "grad_norm": 45.315086364746094, + "learning_rate": 1.990267419549914e-07, + "logits/chosen": -0.523003101348877, + "logits/rejected": -0.5284410715103149, + "logps/chosen": -424.39837646484375, + "logps/ref_chosen": -58.27912521362305, + "logps/ref_rejected": -90.56871795654297, + "logps/rejected": -774.7945556640625, + "loss": 0.9918, + "margin_dpo/margin_mean": 318.10662841796875, + "margin_dpo/margin_std": 293.16387939453125, + "step": 416 + }, + { + "KL/chosen_KL_mean": -363.7352600097656, + "KL/mean": -493.62274169921875, + "KL/rejected_KL_mean": -623.51025390625, + "KL/std": 269.7994384765625, + "epoch": 0.6123348017621145, + "fcm_dpo/beta": 0.0015893441159278154, + "fcm_dpo/delta": -0.013450254686176777, + "fcm_dpo/margin": 259.77496337890625, + "fcm_dpo/q_t": 0.4028276801109314, + "grad_norm": 28.700593948364258, + "learning_rate": 1.9777104743594686e-07, + "logits/chosen": -0.5118107795715332, + "logits/rejected": -0.49247753620147705, + "logps/chosen": -413.9339599609375, + "logps/ref_chosen": -50.1987190246582, + "logps/ref_rejected": -68.15184020996094, + "logps/rejected": -691.6620483398438, + "loss": 1.0588, + "margin_dpo/margin_mean": 259.77496337890625, + "margin_dpo/margin_std": 269.56451416015625, + "step": 417 + }, + { + "KL/chosen_KL_mean": -390.80078125, + "KL/mean": -528.1920166015625, + "KL/rejected_KL_mean": -665.583251953125, + "KL/std": 311.567626953125, + "epoch": 0.6138032305433186, + "fcm_dpo/beta": 0.0015889217611402273, + "fcm_dpo/delta": -0.039183445274829865, + "fcm_dpo/margin": 274.78253173828125, + "fcm_dpo/q_t": 0.4020264744758606, + "grad_norm": 25.165157318115234, + "learning_rate": 1.965167291983757e-07, + "logits/chosen": -0.6080072522163391, + "logits/rejected": -0.5904369950294495, + "logps/chosen": -472.77923583984375, + "logps/ref_chosen": -81.97846984863281, + "logps/ref_rejected": -104.69148254394531, + "logps/rejected": -770.2747802734375, + "loss": 1.0794, + "margin_dpo/margin_mean": 274.78253173828125, + "margin_dpo/margin_std": 366.9202575683594, + "step": 418 + }, + { + "KL/chosen_KL_mean": -365.83148193359375, + "KL/mean": -516.37353515625, + "KL/rejected_KL_mean": -666.91552734375, + "KL/std": 287.1883544921875, + "epoch": 0.6152716593245228, + "fcm_dpo/beta": 0.0015577776357531548, + "fcm_dpo/delta": -0.07237845659255981, + "fcm_dpo/margin": 301.0840759277344, + "fcm_dpo/q_t": 0.39097434282302856, + "grad_norm": 31.140954971313477, + "learning_rate": 1.9526382029472988e-07, + "logits/chosen": -0.5190507173538208, + "logits/rejected": -0.5203031897544861, + "logps/chosen": -418.7801208496094, + "logps/ref_chosen": -52.948646545410156, + "logps/ref_rejected": -91.58309936523438, + "logps/rejected": -758.4986572265625, + "loss": 1.0359, + "margin_dpo/margin_mean": 301.0841064453125, + "margin_dpo/margin_std": 336.95245361328125, + "step": 419 + }, + { + "KL/chosen_KL_mean": -464.80908203125, + "KL/mean": -552.330810546875, + "KL/rejected_KL_mean": -639.8525390625, + "KL/std": 300.29180908203125, + "epoch": 0.6167400881057269, + "fcm_dpo/beta": 0.0015820781700313091, + "fcm_dpo/delta": 0.12642702460289001, + "fcm_dpo/margin": 175.04342651367188, + "fcm_dpo/q_t": 0.4385032057762146, + "grad_norm": 58.83283996582031, + "learning_rate": 1.9401235374032425e-07, + "logits/chosen": -0.5799360275268555, + "logits/rejected": -0.5508887767791748, + "logps/chosen": -542.5789794921875, + "logps/ref_chosen": -77.7699203491211, + "logps/ref_rejected": -69.31985473632812, + "logps/rejected": -709.17236328125, + "loss": 1.2225, + "margin_dpo/margin_mean": 175.04344177246094, + "margin_dpo/margin_std": 401.68768310546875, + "step": 420 + }, + { + "KL/chosen_KL_mean": -378.92333984375, + "KL/mean": -482.3503112792969, + "KL/rejected_KL_mean": -585.7772216796875, + "KL/std": 293.73455810546875, + "epoch": 0.618208516886931, + "fcm_dpo/beta": 0.0016132977325469255, + "fcm_dpo/delta": 0.06797365099191666, + "fcm_dpo/margin": 206.85389709472656, + "fcm_dpo/q_t": 0.4212290644645691, + "grad_norm": 25.49981117248535, + "learning_rate": 1.9276236251246653e-07, + "logits/chosen": -0.5703746676445007, + "logits/rejected": -0.5595937371253967, + "logps/chosen": -432.689208984375, + "logps/ref_chosen": -53.765865325927734, + "logps/ref_rejected": -89.28144836425781, + "logps/rejected": -675.0587158203125, + "loss": 1.1441, + "margin_dpo/margin_mean": 206.85391235351562, + "margin_dpo/margin_std": 313.36297607421875, + "step": 421 + }, + { + "KL/chosen_KL_mean": -427.87286376953125, + "KL/mean": -548.6137084960938, + "KL/rejected_KL_mean": -669.3544921875, + "KL/std": 294.9280090332031, + "epoch": 0.6196769456681351, + "fcm_dpo/beta": 0.001614258624613285, + "fcm_dpo/delta": 0.01060008816421032, + "fcm_dpo/margin": 241.48162841796875, + "fcm_dpo/q_t": 0.4094482660293579, + "grad_norm": 32.47233963012695, + "learning_rate": 1.9151387954958792e-07, + "logits/chosen": -0.5928431749343872, + "logits/rejected": -0.5967549681663513, + "logps/chosen": -496.50665283203125, + "logps/ref_chosen": -68.6337661743164, + "logps/ref_rejected": -87.86351013183594, + "logps/rejected": -757.218017578125, + "loss": 1.1069, + "margin_dpo/margin_mean": 241.4816436767578, + "margin_dpo/margin_std": 345.95001220703125, + "step": 422 + }, + { + "KL/chosen_KL_mean": -398.66766357421875, + "KL/mean": -534.7645263671875, + "KL/rejected_KL_mean": -670.8614501953125, + "KL/std": 283.50732421875, + "epoch": 0.6211453744493393, + "fcm_dpo/beta": 0.001606134930625558, + "fcm_dpo/delta": -0.038889989256858826, + "fcm_dpo/margin": 272.19378662109375, + "fcm_dpo/q_t": 0.39851221442222595, + "grad_norm": 29.974559783935547, + "learning_rate": 1.902669377503756e-07, + "logits/chosen": -0.5618699789047241, + "logits/rejected": -0.5707763433456421, + "logps/chosen": -453.657958984375, + "logps/ref_chosen": -54.99030303955078, + "logps/ref_rejected": -86.30654907226562, + "logps/rejected": -757.16796875, + "loss": 1.0539, + "margin_dpo/margin_mean": 272.19378662109375, + "margin_dpo/margin_std": 310.67779541015625, + "step": 423 + }, + { + "KL/chosen_KL_mean": -362.41912841796875, + "KL/mean": -485.88983154296875, + "KL/rejected_KL_mean": -609.360595703125, + "KL/std": 279.98773193359375, + "epoch": 0.6226138032305433, + "fcm_dpo/beta": 0.0015977869043126702, + "fcm_dpo/delta": 0.005315911024808884, + "fcm_dpo/margin": 246.94143676757812, + "fcm_dpo/q_t": 0.41002586483955383, + "grad_norm": 31.341785430908203, + "learning_rate": 1.890215699729057e-07, + "logits/chosen": -0.5942381620407104, + "logits/rejected": -0.574604332447052, + "logps/chosen": -418.4310607910156, + "logps/ref_chosen": -56.01192092895508, + "logps/ref_rejected": -66.47896575927734, + "logps/rejected": -675.839599609375, + "loss": 1.0959, + "margin_dpo/margin_mean": 246.94146728515625, + "margin_dpo/margin_std": 331.605712890625, + "step": 424 + }, + { + "KL/chosen_KL_mean": -399.5330810546875, + "KL/mean": -494.6478271484375, + "KL/rejected_KL_mean": -589.7625732421875, + "KL/std": 262.13092041015625, + "epoch": 0.6240822320117474, + "fcm_dpo/beta": 0.001631318125873804, + "fcm_dpo/delta": 0.09219174087047577, + "fcm_dpo/margin": 190.22943115234375, + "fcm_dpo/q_t": 0.4265892803668976, + "grad_norm": 32.349361419677734, + "learning_rate": 1.8777780903377732e-07, + "logits/chosen": -0.5598398447036743, + "logits/rejected": -0.5601568818092346, + "logps/chosen": -446.402099609375, + "logps/ref_chosen": -46.86899948120117, + "logps/ref_rejected": -95.92545318603516, + "logps/rejected": -685.68798828125, + "loss": 1.1678, + "margin_dpo/margin_mean": 190.22943115234375, + "margin_dpo/margin_std": 325.59716796875, + "step": 425 + }, + { + "KL/chosen_KL_mean": -367.98974609375, + "KL/mean": -488.088134765625, + "KL/rejected_KL_mean": -608.1864624023438, + "KL/std": 268.52386474609375, + "epoch": 0.6255506607929515, + "fcm_dpo/beta": 0.0016432944685220718, + "fcm_dpo/delta": 0.005093574523925781, + "fcm_dpo/margin": 240.1967315673828, + "fcm_dpo/q_t": 0.4079374670982361, + "grad_norm": 29.509531021118164, + "learning_rate": 1.8653568770724803e-07, + "logits/chosen": -0.6483026742935181, + "logits/rejected": -0.6253814697265625, + "logps/chosen": -444.57330322265625, + "logps/ref_chosen": -76.58354187011719, + "logps/ref_rejected": -81.26658630371094, + "logps/rejected": -689.4530639648438, + "loss": 1.0895, + "margin_dpo/margin_mean": 240.19671630859375, + "margin_dpo/margin_std": 298.68304443359375, + "step": 426 + }, + { + "KL/chosen_KL_mean": -346.615478515625, + "KL/mean": -436.65435791015625, + "KL/rejected_KL_mean": -526.6932373046875, + "KL/std": 234.79974365234375, + "epoch": 0.6270190895741556, + "fcm_dpo/beta": 0.00165902404114604, + "fcm_dpo/delta": 0.10449196398258209, + "fcm_dpo/margin": 180.07781982421875, + "fcm_dpo/q_t": 0.4308916926383972, + "grad_norm": 24.951610565185547, + "learning_rate": 1.8529523872436977e-07, + "logits/chosen": -0.5918477177619934, + "logits/rejected": -0.5726908445358276, + "logps/chosen": -411.4693603515625, + "logps/ref_chosen": -64.8538818359375, + "logps/ref_rejected": -78.5660171508789, + "logps/rejected": -605.25927734375, + "loss": 1.1632, + "margin_dpo/margin_mean": 180.07781982421875, + "margin_dpo/margin_std": 284.20269775390625, + "step": 427 + }, + { + "KL/chosen_KL_mean": -417.6781921386719, + "KL/mean": -544.4334106445312, + "KL/rejected_KL_mean": -671.1885986328125, + "KL/std": 306.112060546875, + "epoch": 0.6284875183553598, + "fcm_dpo/beta": 0.0016601982060819864, + "fcm_dpo/delta": -0.02208590693771839, + "fcm_dpo/margin": 253.5104217529297, + "fcm_dpo/q_t": 0.4037541151046753, + "grad_norm": 30.45539665222168, + "learning_rate": 1.8405649477212697e-07, + "logits/chosen": -0.5910390615463257, + "logits/rejected": -0.5962928533554077, + "logps/chosen": -480.3148498535156, + "logps/ref_chosen": -62.63666534423828, + "logps/ref_rejected": -103.28181457519531, + "logps/rejected": -774.470458984375, + "loss": 1.0964, + "margin_dpo/margin_mean": 253.5104217529297, + "margin_dpo/margin_std": 367.505859375, + "step": 428 + }, + { + "KL/chosen_KL_mean": -423.49957275390625, + "KL/mean": -519.6409301757812, + "KL/rejected_KL_mean": -615.7822875976562, + "KL/std": 274.84283447265625, + "epoch": 0.6299559471365639, + "fcm_dpo/beta": 0.0016591004095971584, + "fcm_dpo/delta": -0.028070662170648575, + "fcm_dpo/margin": 192.28273010253906, + "fcm_dpo/q_t": 0.426498144865036, + "grad_norm": 33.337589263916016, + "learning_rate": 1.828194884925749e-07, + "logits/chosen": -0.5892548561096191, + "logits/rejected": -0.5679141283035278, + "logps/chosen": -504.73358154296875, + "logps/ref_chosen": -81.23401641845703, + "logps/ref_rejected": -91.79493713378906, + "logps/rejected": -707.5772094726562, + "loss": 1.172, + "margin_dpo/margin_mean": 192.28273010253906, + "margin_dpo/margin_std": 336.96649169921875, + "step": 429 + }, + { + "KL/chosen_KL_mean": -343.0795593261719, + "KL/mean": -443.554931640625, + "KL/rejected_KL_mean": -544.0302734375, + "KL/std": 249.0330047607422, + "epoch": 0.631424375917768, + "fcm_dpo/beta": 0.0016736264806240797, + "fcm_dpo/delta": 0.06572603434324265, + "fcm_dpo/margin": 200.95074462890625, + "fcm_dpo/q_t": 0.42213696241378784, + "grad_norm": 27.670103073120117, + "learning_rate": 1.8158425248197928e-07, + "logits/chosen": -0.5829579830169678, + "logits/rejected": -0.5811977386474609, + "logps/chosen": -403.9998779296875, + "logps/ref_chosen": -60.920326232910156, + "logps/ref_rejected": -104.42280578613281, + "logps/rejected": -648.453125, + "loss": 1.129, + "margin_dpo/margin_mean": 200.95074462890625, + "margin_dpo/margin_std": 278.2074890136719, + "step": 430 + }, + { + "KL/chosen_KL_mean": -320.6529541015625, + "KL/mean": -468.82305908203125, + "KL/rejected_KL_mean": -616.9931640625, + "KL/std": 276.59454345703125, + "epoch": 0.6328928046989721, + "fcm_dpo/beta": 0.001651083119213581, + "fcm_dpo/delta": -0.09401773661375046, + "fcm_dpo/margin": 296.34014892578125, + "fcm_dpo/q_t": 0.3859713673591614, + "grad_norm": 23.38682746887207, + "learning_rate": 1.8035081928995788e-07, + "logits/chosen": -0.5685824751853943, + "logits/rejected": -0.5741355419158936, + "logps/chosen": -378.001708984375, + "logps/ref_chosen": -57.34874725341797, + "logps/ref_rejected": -92.84022521972656, + "logps/rejected": -709.8333740234375, + "loss": 1.0172, + "margin_dpo/margin_mean": 296.3401794433594, + "margin_dpo/margin_std": 303.3402404785156, + "step": 431 + }, + { + "KL/chosen_KL_mean": -317.8546447753906, + "KL/mean": -456.84588623046875, + "KL/rejected_KL_mean": -595.837158203125, + "KL/std": 269.2180480957031, + "epoch": 0.6343612334801763, + "fcm_dpo/beta": 0.0016406788490712643, + "fcm_dpo/delta": -0.0591546930372715, + "fcm_dpo/margin": 277.9825439453125, + "fcm_dpo/q_t": 0.3939441442489624, + "grad_norm": 41.92903518676758, + "learning_rate": 1.791192214186223e-07, + "logits/chosen": -0.531327486038208, + "logits/rejected": -0.520300030708313, + "logps/chosen": -388.929443359375, + "logps/ref_chosen": -71.07479095458984, + "logps/ref_rejected": -98.57952880859375, + "logps/rejected": -694.4166870117188, + "loss": 1.0337, + "margin_dpo/margin_mean": 277.9825134277344, + "margin_dpo/margin_std": 276.3160400390625, + "step": 432 + }, + { + "KL/chosen_KL_mean": -403.21221923828125, + "KL/mean": -496.0498046875, + "KL/rejected_KL_mean": -588.8873291015625, + "KL/std": 261.9140625, + "epoch": 0.6358296622613803, + "fcm_dpo/beta": 0.0016517346957698464, + "fcm_dpo/delta": 0.09607505798339844, + "fcm_dpo/margin": 185.67514038085938, + "fcm_dpo/q_t": 0.4271540939807892, + "grad_norm": 35.29652404785156, + "learning_rate": 1.7788949132172193e-07, + "logits/chosen": -0.5547606945037842, + "logits/rejected": -0.541266679763794, + "logps/chosen": -461.48541259765625, + "logps/ref_chosen": -58.273193359375, + "logps/ref_rejected": -95.95089721679688, + "logps/rejected": -684.8382568359375, + "loss": 1.1672, + "margin_dpo/margin_mean": 185.67514038085938, + "margin_dpo/margin_std": 311.87078857421875, + "step": 433 + }, + { + "KL/chosen_KL_mean": -343.99139404296875, + "KL/mean": -456.23834228515625, + "KL/rejected_KL_mean": -568.4853515625, + "KL/std": 267.9556579589844, + "epoch": 0.6372980910425844, + "fcm_dpo/beta": 0.0016591593157500029, + "fcm_dpo/delta": 0.02848285809159279, + "fcm_dpo/margin": 224.49386596679688, + "fcm_dpo/q_t": 0.4180990159511566, + "grad_norm": 25.378862380981445, + "learning_rate": 1.7666166140378853e-07, + "logits/chosen": -0.5715805292129517, + "logits/rejected": -0.5707394480705261, + "logps/chosen": -405.965087890625, + "logps/ref_chosen": -61.97370147705078, + "logps/ref_rejected": -78.49861145019531, + "logps/rejected": -646.98388671875, + "loss": 1.1177, + "margin_dpo/margin_mean": 224.49386596679688, + "margin_dpo/margin_std": 339.36627197265625, + "step": 434 + }, + { + "KL/chosen_KL_mean": -311.0776062011719, + "KL/mean": -433.509521484375, + "KL/rejected_KL_mean": -555.94140625, + "KL/std": 261.578857421875, + "epoch": 0.6387665198237885, + "fcm_dpo/beta": 0.00166351068764925, + "fcm_dpo/delta": -0.007664802018553019, + "fcm_dpo/margin": 244.86380004882812, + "fcm_dpo/q_t": 0.40512967109680176, + "grad_norm": 25.661197662353516, + "learning_rate": 1.7543576401928218e-07, + "logits/chosen": -0.5694348812103271, + "logits/rejected": -0.559348464012146, + "logps/chosen": -362.57965087890625, + "logps/ref_chosen": -51.502052307128906, + "logps/ref_rejected": -87.56689453125, + "logps/rejected": -643.50830078125, + "loss": 1.0788, + "margin_dpo/margin_mean": 244.86380004882812, + "margin_dpo/margin_std": 291.82879638671875, + "step": 435 + }, + { + "KL/chosen_KL_mean": -326.98468017578125, + "KL/mean": -433.77142333984375, + "KL/rejected_KL_mean": -540.5582275390625, + "KL/std": 234.86660766601562, + "epoch": 0.6402349486049926, + "fcm_dpo/beta": 0.001671030418947339, + "fcm_dpo/delta": 0.04468690603971481, + "fcm_dpo/margin": 213.5735321044922, + "fcm_dpo/q_t": 0.41665488481521606, + "grad_norm": 39.46367263793945, + "learning_rate": 1.742118314717391e-07, + "logits/chosen": -0.5771512985229492, + "logits/rejected": -0.5507988929748535, + "logps/chosen": -398.3883972167969, + "logps/ref_chosen": -71.40371704101562, + "logps/ref_rejected": -82.72775268554688, + "logps/rejected": -623.2860107421875, + "loss": 1.1115, + "margin_dpo/margin_mean": 213.57354736328125, + "margin_dpo/margin_std": 275.3209533691406, + "step": 436 + }, + { + "KL/chosen_KL_mean": -331.77978515625, + "KL/mean": -442.20428466796875, + "KL/rejected_KL_mean": -552.6287841796875, + "KL/std": 225.34506225585938, + "epoch": 0.6417033773861968, + "fcm_dpo/beta": 0.0016848563682287931, + "fcm_dpo/delta": 0.028988715261220932, + "fcm_dpo/margin": 220.84902954101562, + "fcm_dpo/q_t": 0.41250330209732056, + "grad_norm": 25.351360321044922, + "learning_rate": 1.7298989601292036e-07, + "logits/chosen": -0.5720341205596924, + "logits/rejected": -0.5498570203781128, + "logps/chosen": -396.5240478515625, + "logps/ref_chosen": -64.7442626953125, + "logps/ref_rejected": -82.04356384277344, + "logps/rejected": -634.67236328125, + "loss": 1.0967, + "margin_dpo/margin_mean": 220.84902954101562, + "margin_dpo/margin_std": 267.39385986328125, + "step": 437 + }, + { + "KL/chosen_KL_mean": -341.92059326171875, + "KL/mean": -469.1558532714844, + "KL/rejected_KL_mean": -596.3910522460938, + "KL/std": 260.28424072265625, + "epoch": 0.6431718061674009, + "fcm_dpo/beta": 0.0016751789953559637, + "fcm_dpo/delta": -0.027896108105778694, + "fcm_dpo/margin": 254.47047424316406, + "fcm_dpo/q_t": 0.3999551236629486, + "grad_norm": 33.649723052978516, + "learning_rate": 1.7176998984196144e-07, + "logits/chosen": -0.5674476623535156, + "logits/rejected": -0.5483890771865845, + "logps/chosen": -400.93927001953125, + "logps/ref_chosen": -59.0186653137207, + "logps/ref_rejected": -83.07682800292969, + "logps/rejected": -679.4678955078125, + "loss": 1.0567, + "margin_dpo/margin_mean": 254.470458984375, + "margin_dpo/margin_std": 276.5482177734375, + "step": 438 + }, + { + "KL/chosen_KL_mean": -375.33843994140625, + "KL/mean": -480.2781982421875, + "KL/rejected_KL_mean": -585.218017578125, + "KL/std": 268.84649658203125, + "epoch": 0.644640234948605, + "fcm_dpo/beta": 0.0016563256504014134, + "fcm_dpo/delta": -0.06954063475131989, + "fcm_dpo/margin": 209.87954711914062, + "fcm_dpo/q_t": 0.4195774793624878, + "grad_norm": 28.397993087768555, + "learning_rate": 1.7055214510452458e-07, + "logits/chosen": -0.6017969846725464, + "logits/rejected": -0.6068276166915894, + "logps/chosen": -429.1225280761719, + "logps/ref_chosen": -53.78407669067383, + "logps/ref_rejected": -83.98545837402344, + "logps/rejected": -669.2034912109375, + "loss": 1.1355, + "margin_dpo/margin_mean": 209.87953186035156, + "margin_dpo/margin_std": 304.0859375, + "step": 439 + }, + { + "KL/chosen_KL_mean": -404.01019287109375, + "KL/mean": -522.5572509765625, + "KL/rejected_KL_mean": -641.1043701171875, + "KL/std": 323.501708984375, + "epoch": 0.6461086637298091, + "fcm_dpo/beta": 0.0016622185939922929, + "fcm_dpo/delta": 0.005979446694254875, + "fcm_dpo/margin": 237.09423828125, + "fcm_dpo/q_t": 0.41194236278533936, + "grad_norm": 36.218482971191406, + "learning_rate": 1.6933639389195134e-07, + "logits/chosen": -0.6418617367744446, + "logits/rejected": -0.638819694519043, + "logps/chosen": -482.576904296875, + "logps/ref_chosen": -78.56671905517578, + "logps/ref_rejected": -96.49775695800781, + "logps/rejected": -737.6021728515625, + "loss": 1.1007, + "margin_dpo/margin_mean": 237.09423828125, + "margin_dpo/margin_std": 334.8287658691406, + "step": 440 + }, + { + "KL/chosen_KL_mean": -462.35516357421875, + "KL/mean": -583.5091552734375, + "KL/rejected_KL_mean": -704.6630859375, + "KL/std": 340.33734130859375, + "epoch": 0.6475770925110133, + "fcm_dpo/beta": 0.001664304407313466, + "fcm_dpo/delta": -0.0037491731345653534, + "fcm_dpo/margin": 242.307861328125, + "fcm_dpo/q_t": 0.41193264722824097, + "grad_norm": 46.04979705810547, + "learning_rate": 1.681227682404166e-07, + "logits/chosen": -0.6510436534881592, + "logits/rejected": -0.6406994462013245, + "logps/chosen": -523.1796264648438, + "logps/ref_chosen": -60.824440002441406, + "logps/ref_rejected": -96.47080993652344, + "logps/rejected": -801.1338500976562, + "loss": 1.1303, + "margin_dpo/margin_mean": 242.307861328125, + "margin_dpo/margin_std": 407.91796875, + "step": 441 + }, + { + "KL/chosen_KL_mean": -394.96697998046875, + "KL/mean": -539.0298461914062, + "KL/rejected_KL_mean": -683.0927124023438, + "KL/std": 329.10321044921875, + "epoch": 0.6490455212922174, + "fcm_dpo/beta": 0.0016490614507347345, + "fcm_dpo/delta": -0.07931334525346756, + "fcm_dpo/margin": 288.12567138671875, + "fcm_dpo/q_t": 0.3943568170070648, + "grad_norm": 35.095680236816406, + "learning_rate": 1.669113001300851e-07, + "logits/chosen": -0.6585125923156738, + "logits/rejected": -0.6561766862869263, + "logps/chosen": -441.97821044921875, + "logps/ref_chosen": -47.01121520996094, + "logps/ref_rejected": -76.53926086425781, + "logps/rejected": -759.6319580078125, + "loss": 1.0536, + "margin_dpo/margin_mean": 288.12567138671875, + "margin_dpo/margin_std": 366.7057800292969, + "step": 442 + }, + { + "KL/chosen_KL_mean": -456.835693359375, + "KL/mean": -547.46435546875, + "KL/rejected_KL_mean": -638.0928955078125, + "KL/std": 329.8770751953125, + "epoch": 0.6505139500734214, + "fcm_dpo/beta": 0.0016302757430821657, + "fcm_dpo/delta": -0.0021791704930365086, + "fcm_dpo/margin": 181.2572021484375, + "fcm_dpo/q_t": 0.4339308440685272, + "grad_norm": 37.560585021972656, + "learning_rate": 1.6570202148426815e-07, + "logits/chosen": -0.6085466146469116, + "logits/rejected": -0.5840749740600586, + "logps/chosen": -528.1087646484375, + "logps/ref_chosen": -71.27301788330078, + "logps/ref_rejected": -86.679931640625, + "logps/rejected": -724.7728271484375, + "loss": 1.2126, + "margin_dpo/margin_mean": 181.2572021484375, + "margin_dpo/margin_std": 397.4133605957031, + "step": 443 + }, + { + "KL/chosen_KL_mean": -444.24237060546875, + "KL/mean": -589.8250732421875, + "KL/rejected_KL_mean": -735.4078369140625, + "KL/std": 349.052978515625, + "epoch": 0.6519823788546255, + "fcm_dpo/beta": 0.0016081281937658787, + "fcm_dpo/delta": -0.07197729498147964, + "fcm_dpo/margin": 291.16546630859375, + "fcm_dpo/q_t": 0.39475017786026, + "grad_norm": 26.796432495117188, + "learning_rate": 1.6449496416858282e-07, + "logits/chosen": -0.619565486907959, + "logits/rejected": -0.6288525462150574, + "logps/chosen": -501.4560546875, + "logps/ref_chosen": -57.213706970214844, + "logps/ref_rejected": -97.25489807128906, + "logps/rejected": -832.6627197265625, + "loss": 1.0554, + "margin_dpo/margin_mean": 291.16546630859375, + "margin_dpo/margin_std": 379.3155517578125, + "step": 444 + }, + { + "KL/chosen_KL_mean": -398.5299072265625, + "KL/mean": -536.5014038085938, + "KL/rejected_KL_mean": -674.472900390625, + "KL/std": 279.18701171875, + "epoch": 0.6534508076358296, + "fcm_dpo/beta": 0.0015993316192179918, + "fcm_dpo/delta": -0.04321688041090965, + "fcm_dpo/margin": 275.9429931640625, + "fcm_dpo/q_t": 0.3995450437068939, + "grad_norm": 27.847251892089844, + "learning_rate": 1.6329015999011182e-07, + "logits/chosen": -0.6301474571228027, + "logits/rejected": -0.6212267279624939, + "logps/chosen": -465.8297119140625, + "logps/ref_chosen": -67.29979705810547, + "logps/ref_rejected": -92.68267059326172, + "logps/rejected": -767.1555786132812, + "loss": 1.0649, + "margin_dpo/margin_mean": 275.9429931640625, + "margin_dpo/margin_std": 342.8664245605469, + "step": 445 + }, + { + "KL/chosen_KL_mean": -363.5679016113281, + "KL/mean": -512.4356689453125, + "KL/rejected_KL_mean": -661.303466796875, + "KL/std": 304.2501220703125, + "epoch": 0.6549192364170338, + "fcm_dpo/beta": 0.0015849031042307615, + "fcm_dpo/delta": -0.07573074102401733, + "fcm_dpo/margin": 297.735595703125, + "fcm_dpo/q_t": 0.3897179961204529, + "grad_norm": 30.288881301879883, + "learning_rate": 1.6208764069656578e-07, + "logits/chosen": -0.655229926109314, + "logits/rejected": -0.6672055721282959, + "logps/chosen": -422.6663818359375, + "logps/ref_chosen": -59.098487854003906, + "logps/ref_rejected": -101.26419067382812, + "logps/rejected": -762.567626953125, + "loss": 1.0286, + "margin_dpo/margin_mean": 297.735595703125, + "margin_dpo/margin_std": 308.36199951171875, + "step": 446 + }, + { + "KL/chosen_KL_mean": -363.2137756347656, + "KL/mean": -519.5991821289062, + "KL/rejected_KL_mean": -675.984619140625, + "KL/std": 346.8345947265625, + "epoch": 0.6563876651982379, + "fcm_dpo/beta": 0.001544747268781066, + "fcm_dpo/delta": -0.08775018155574799, + "fcm_dpo/margin": 312.77081298828125, + "fcm_dpo/q_t": 0.3913511037826538, + "grad_norm": 26.699710845947266, + "learning_rate": 1.608874379754465e-07, + "logits/chosen": -0.7068610191345215, + "logits/rejected": -0.7222627401351929, + "logps/chosen": -419.28912353515625, + "logps/ref_chosen": -56.07533264160156, + "logps/ref_rejected": -98.69475555419922, + "logps/rejected": -774.6793212890625, + "loss": 1.0369, + "margin_dpo/margin_mean": 312.7708435058594, + "margin_dpo/margin_std": 385.6178283691406, + "step": 447 + }, + { + "KL/chosen_KL_mean": -406.2823791503906, + "KL/mean": -554.0318603515625, + "KL/rejected_KL_mean": -701.7813720703125, + "KL/std": 298.70819091796875, + "epoch": 0.657856093979442, + "fcm_dpo/beta": 0.0015353120397776365, + "fcm_dpo/delta": -0.05634545907378197, + "fcm_dpo/margin": 295.49896240234375, + "fcm_dpo/q_t": 0.395630419254303, + "grad_norm": 35.73704528808594, + "learning_rate": 1.5968958345321177e-07, + "logits/chosen": -0.6082560420036316, + "logits/rejected": -0.6136020421981812, + "logps/chosen": -466.2862243652344, + "logps/ref_chosen": -60.00384521484375, + "logps/ref_rejected": -102.26465606689453, + "logps/rejected": -804.0460205078125, + "loss": 1.0467, + "margin_dpo/margin_mean": 295.4989929199219, + "margin_dpo/margin_std": 336.44561767578125, + "step": 448 + }, + { + "KL/chosen_KL_mean": -413.37811279296875, + "KL/mean": -562.44775390625, + "KL/rejected_KL_mean": -711.517333984375, + "KL/std": 366.28466796875, + "epoch": 0.6593245227606461, + "fcm_dpo/beta": 0.0015101665630936623, + "fcm_dpo/delta": -0.052754104137420654, + "fcm_dpo/margin": 298.13922119140625, + "fcm_dpo/q_t": 0.40061530470848083, + "grad_norm": 28.19297981262207, + "learning_rate": 1.584941086944423e-07, + "logits/chosen": -0.6445102095603943, + "logits/rejected": -0.6385193467140198, + "logps/chosen": -480.90472412109375, + "logps/ref_chosen": -67.52661895751953, + "logps/ref_rejected": -88.59690856933594, + "logps/rejected": -800.1142578125, + "loss": 1.0796, + "margin_dpo/margin_mean": 298.13922119140625, + "margin_dpo/margin_std": 435.361572265625, + "step": 449 + }, + { + "KL/chosen_KL_mean": -338.682861328125, + "KL/mean": -501.4732666015625, + "KL/rejected_KL_mean": -664.263671875, + "KL/std": 325.22833251953125, + "epoch": 0.6607929515418502, + "fcm_dpo/beta": 0.0014909481396898627, + "fcm_dpo/delta": -0.08969271928071976, + "fcm_dpo/margin": 325.58074951171875, + "fcm_dpo/q_t": 0.386138379573822, + "grad_norm": 47.2825813293457, + "learning_rate": 1.573010452010098e-07, + "logits/chosen": -0.6737087965011597, + "logits/rejected": -0.685724675655365, + "logps/chosen": -395.791015625, + "logps/ref_chosen": -57.10811996459961, + "logps/ref_rejected": -102.75494384765625, + "logps/rejected": -767.0185546875, + "loss": 1.0118, + "margin_dpo/margin_mean": 325.58074951171875, + "margin_dpo/margin_std": 320.9717102050781, + "step": 450 + }, + { + "KL/chosen_KL_mean": -454.3709716796875, + "KL/mean": -571.3306274414062, + "KL/rejected_KL_mean": -688.2903442382812, + "KL/std": 374.0126647949219, + "epoch": 0.6622613803230544, + "fcm_dpo/beta": 0.0014971659984439611, + "fcm_dpo/delta": 0.051308851689100266, + "fcm_dpo/margin": 233.9193878173828, + "fcm_dpo/q_t": 0.41749513149261475, + "grad_norm": 33.79815673828125, + "learning_rate": 1.5611042441124687e-07, + "logits/chosen": -0.7189067602157593, + "logits/rejected": -0.6965080499649048, + "logps/chosen": -512.8397827148438, + "logps/ref_chosen": -58.46883010864258, + "logps/ref_rejected": -72.92941284179688, + "logps/rejected": -761.2197265625, + "loss": 1.1623, + "margin_dpo/margin_mean": 233.91940307617188, + "margin_dpo/margin_std": 428.302490234375, + "step": 451 + }, + { + "KL/chosen_KL_mean": -321.31689453125, + "KL/mean": -462.2808532714844, + "KL/rejected_KL_mean": -603.2447509765625, + "KL/std": 290.2576904296875, + "epoch": 0.6637298091042585, + "fcm_dpo/beta": 0.0014898786321282387, + "fcm_dpo/delta": -0.021011171862483025, + "fcm_dpo/margin": 281.9278869628906, + "fcm_dpo/q_t": 0.40081116557121277, + "grad_norm": 25.158477783203125, + "learning_rate": 1.549222776991186e-07, + "logits/chosen": -0.6077337265014648, + "logits/rejected": -0.6246554851531982, + "logps/chosen": -371.70745849609375, + "logps/ref_chosen": -50.39055252075195, + "logps/ref_rejected": -97.77142333984375, + "logps/rejected": -701.0162353515625, + "loss": 1.0559, + "margin_dpo/margin_mean": 281.9278564453125, + "margin_dpo/margin_std": 295.3541564941406, + "step": 452 + }, + { + "KL/chosen_KL_mean": -374.7382507324219, + "KL/mean": -508.03814697265625, + "KL/rejected_KL_mean": -641.3380126953125, + "KL/std": 293.4873046875, + "epoch": 0.6651982378854625, + "fcm_dpo/beta": 0.0014873708132654428, + "fcm_dpo/delta": 0.0034573376178741455, + "fcm_dpo/margin": 266.5997314453125, + "fcm_dpo/q_t": 0.40999874472618103, + "grad_norm": 26.13146209716797, + "learning_rate": 1.5373663637339584e-07, + "logits/chosen": -0.6614656448364258, + "logits/rejected": -0.6468169689178467, + "logps/chosen": -432.453125, + "logps/ref_chosen": -57.71485137939453, + "logps/ref_rejected": -82.20741271972656, + "logps/rejected": -723.54541015625, + "loss": 1.0931, + "margin_dpo/margin_mean": 266.5997619628906, + "margin_dpo/margin_std": 356.6203918457031, + "step": 453 + }, + { + "KL/chosen_KL_mean": -450.31854248046875, + "KL/mean": -600.8720703125, + "KL/rejected_KL_mean": -751.4256591796875, + "KL/std": 340.4393005371094, + "epoch": 0.6666666666666666, + "fcm_dpo/beta": 0.0014775395393371582, + "fcm_dpo/delta": -0.047248564660549164, + "fcm_dpo/margin": 301.10711669921875, + "fcm_dpo/q_t": 0.39838463068008423, + "grad_norm": 28.11908721923828, + "learning_rate": 1.5255353167683017e-07, + "logits/chosen": -0.697075605392456, + "logits/rejected": -0.6863827109336853, + "logps/chosen": -511.26422119140625, + "logps/ref_chosen": -60.945648193359375, + "logps/ref_rejected": -84.95079040527344, + "logps/rejected": -836.37646484375, + "loss": 1.0611, + "margin_dpo/margin_mean": 301.10711669921875, + "margin_dpo/margin_std": 380.62457275390625, + "step": 454 + }, + { + "KL/chosen_KL_mean": -384.13677978515625, + "KL/mean": -556.47265625, + "KL/rejected_KL_mean": -728.8084716796875, + "KL/std": 345.8653869628906, + "epoch": 0.6681350954478708, + "fcm_dpo/beta": 0.0014633602695539594, + "fcm_dpo/delta": -0.11007063835859299, + "fcm_dpo/margin": 344.6717224121094, + "fcm_dpo/q_t": 0.38579294085502625, + "grad_norm": 37.92613983154297, + "learning_rate": 1.5137299478533064e-07, + "logits/chosen": -0.6768559217453003, + "logits/rejected": -0.6982386708259583, + "logps/chosen": -429.02349853515625, + "logps/ref_chosen": -44.88671112060547, + "logps/ref_rejected": -115.30147552490234, + "logps/rejected": -844.1099853515625, + "loss": 1.0253, + "margin_dpo/margin_mean": 344.6717224121094, + "margin_dpo/margin_std": 396.2593078613281, + "step": 455 + }, + { + "KL/chosen_KL_mean": -407.1464538574219, + "KL/mean": -576.0047607421875, + "KL/rejected_KL_mean": -744.863037109375, + "KL/std": 354.4407043457031, + "epoch": 0.6696035242290749, + "fcm_dpo/beta": 0.0014285333454608917, + "fcm_dpo/delta": -0.08655368536710739, + "fcm_dpo/margin": 337.71661376953125, + "fcm_dpo/q_t": 0.3888140320777893, + "grad_norm": 26.16177749633789, + "learning_rate": 1.5019505680714232e-07, + "logits/chosen": -0.6761212348937988, + "logits/rejected": -0.6974040865898132, + "logps/chosen": -464.1832275390625, + "logps/ref_chosen": -57.036781311035156, + "logps/ref_rejected": -105.21784210205078, + "logps/rejected": -850.0808715820312, + "loss": 1.015, + "margin_dpo/margin_mean": 337.71661376953125, + "margin_dpo/margin_std": 344.4359130859375, + "step": 456 + }, + { + "KL/chosen_KL_mean": -392.9554443359375, + "KL/mean": -561.5748291015625, + "KL/rejected_KL_mean": -730.1942138671875, + "KL/std": 337.5885009765625, + "epoch": 0.671071953010279, + "fcm_dpo/beta": 0.0014012358151376247, + "fcm_dpo/delta": -0.07646898925304413, + "fcm_dpo/margin": 337.23883056640625, + "fcm_dpo/q_t": 0.3888044059276581, + "grad_norm": 28.89864158630371, + "learning_rate": 1.4901974878202627e-07, + "logits/chosen": -0.6724662780761719, + "logits/rejected": -0.674906849861145, + "logps/chosen": -447.197998046875, + "logps/ref_chosen": -54.24253845214844, + "logps/ref_rejected": -85.10956573486328, + "logps/rejected": -815.3038330078125, + "loss": 1.0173, + "margin_dpo/margin_mean": 337.23883056640625, + "margin_dpo/margin_std": 322.967529296875, + "step": 457 + }, + { + "KL/chosen_KL_mean": -399.1230163574219, + "KL/mean": -552.3309326171875, + "KL/rejected_KL_mean": -705.5389404296875, + "KL/std": 313.0858154296875, + "epoch": 0.6725403817914831, + "fcm_dpo/beta": 0.0013883748324587941, + "fcm_dpo/delta": -0.02694622240960598, + "fcm_dpo/margin": 306.4158935546875, + "fcm_dpo/q_t": 0.40227359533309937, + "grad_norm": 22.595535278320312, + "learning_rate": 1.4784710168044212e-07, + "logits/chosen": -0.6672220826148987, + "logits/rejected": -0.6631453633308411, + "logps/chosen": -454.5318908691406, + "logps/ref_chosen": -55.40888214111328, + "logps/ref_rejected": -97.68325805664062, + "logps/rejected": -803.22216796875, + "loss": 1.0629, + "margin_dpo/margin_mean": 306.4158935546875, + "margin_dpo/margin_std": 356.400634765625, + "step": 458 + }, + { + "KL/chosen_KL_mean": -442.6391296386719, + "KL/mean": -608.4810180664062, + "KL/rejected_KL_mean": -774.3228759765625, + "KL/std": 361.2178955078125, + "epoch": 0.6740088105726872, + "fcm_dpo/beta": 0.0013751968508586287, + "fcm_dpo/delta": -0.05920097231864929, + "fcm_dpo/margin": 331.68365478515625, + "fcm_dpo/q_t": 0.39526090025901794, + "grad_norm": 31.071313858032227, + "learning_rate": 1.466771464027316e-07, + "logits/chosen": -0.6531388759613037, + "logits/rejected": -0.6709892749786377, + "logps/chosen": -489.1966247558594, + "logps/ref_chosen": -46.55748748779297, + "logps/ref_rejected": -86.16854095458984, + "logps/rejected": -860.4913940429688, + "loss": 1.0531, + "margin_dpo/margin_mean": 331.6836853027344, + "margin_dpo/margin_std": 400.57806396484375, + "step": 459 + }, + { + "KL/chosen_KL_mean": -484.61431884765625, + "KL/mean": -663.1461181640625, + "KL/rejected_KL_mean": -841.6778564453125, + "KL/std": 356.38165283203125, + "epoch": 0.6754772393538914, + "fcm_dpo/beta": 0.0013584838015958667, + "fcm_dpo/delta": -0.08933592587709427, + "fcm_dpo/margin": 357.0634765625, + "fcm_dpo/q_t": 0.38839712738990784, + "grad_norm": 46.59115982055664, + "learning_rate": 1.4550991377830423e-07, + "logits/chosen": -0.7092480063438416, + "logits/rejected": -0.7396787405014038, + "logps/chosen": -536.249267578125, + "logps/ref_chosen": -51.63489532470703, + "logps/ref_rejected": -104.11935424804688, + "logps/rejected": -945.7972412109375, + "loss": 1.022, + "margin_dpo/margin_mean": 357.0635070800781, + "margin_dpo/margin_std": 387.8919677734375, + "step": 460 + }, + { + "KL/chosen_KL_mean": -520.2779541015625, + "KL/mean": -661.3561401367188, + "KL/rejected_KL_mean": -802.4342651367188, + "KL/std": 363.57623291015625, + "epoch": 0.6769456681350955, + "fcm_dpo/beta": 0.0013587003340944648, + "fcm_dpo/delta": 0.017041990533471107, + "fcm_dpo/margin": 282.1562805175781, + "fcm_dpo/q_t": 0.4131912589073181, + "grad_norm": 28.806053161621094, + "learning_rate": 1.4434543456482518e-07, + "logits/chosen": -0.7396203279495239, + "logits/rejected": -0.7533408999443054, + "logps/chosen": -575.4599609375, + "logps/ref_chosen": -55.18195724487305, + "logps/ref_rejected": -86.47689819335938, + "logps/rejected": -888.9111328125, + "loss": 1.1072, + "margin_dpo/margin_mean": 282.15625, + "margin_dpo/margin_std": 399.5581359863281, + "step": 461 + }, + { + "KL/chosen_KL_mean": -545.4285888671875, + "KL/mean": -660.5108642578125, + "KL/rejected_KL_mean": -775.5931396484375, + "KL/std": 371.68798828125, + "epoch": 0.6784140969162996, + "fcm_dpo/beta": 0.001372592058032751, + "fcm_dpo/delta": 0.08686043322086334, + "fcm_dpo/margin": 230.16461181640625, + "fcm_dpo/q_t": 0.4292982220649719, + "grad_norm": 41.23543930053711, + "learning_rate": 1.4318373944740484e-07, + "logits/chosen": -0.8299468755722046, + "logits/rejected": -0.8264528512954712, + "logps/chosen": -615.3565673828125, + "logps/ref_chosen": -69.92803192138672, + "logps/ref_rejected": -78.84111022949219, + "logps/rejected": -854.4342041015625, + "loss": 1.1717, + "margin_dpo/margin_mean": 230.1645965576172, + "margin_dpo/margin_std": 417.91949462890625, + "step": 462 + }, + { + "KL/chosen_KL_mean": -548.3289184570312, + "KL/mean": -702.0982055664062, + "KL/rejected_KL_mean": -855.8675537109375, + "KL/std": 388.29705810546875, + "epoch": 0.6798825256975036, + "fcm_dpo/beta": 0.0013804540503770113, + "fcm_dpo/delta": -0.025950342416763306, + "fcm_dpo/margin": 307.53863525390625, + "fcm_dpo/q_t": 0.4052172899246216, + "grad_norm": 37.95475387573242, + "learning_rate": 1.4202485903778976e-07, + "logits/chosen": -0.8180972337722778, + "logits/rejected": -0.8314469456672668, + "logps/chosen": -603.603271484375, + "logps/ref_chosen": -55.27437210083008, + "logps/ref_rejected": -89.02497863769531, + "logps/rejected": -944.8925170898438, + "loss": 1.0942, + "margin_dpo/margin_mean": 307.53863525390625, + "margin_dpo/margin_std": 443.52276611328125, + "step": 463 + }, + { + "KL/chosen_KL_mean": -544.8458251953125, + "KL/mean": -780.4537353515625, + "KL/rejected_KL_mean": -1016.0616455078125, + "KL/std": 460.05206298828125, + "epoch": 0.6813509544787077, + "fcm_dpo/beta": 0.0013158408692106605, + "fcm_dpo/delta": -0.23714584112167358, + "fcm_dpo/margin": 471.21575927734375, + "fcm_dpo/q_t": 0.359811931848526, + "grad_norm": 43.35410690307617, + "learning_rate": 1.4086882387355658e-07, + "logits/chosen": -0.7928054332733154, + "logits/rejected": -0.8558509945869446, + "logps/chosen": -595.7581176757812, + "logps/ref_chosen": -50.91230010986328, + "logps/ref_rejected": -102.4893798828125, + "logps/rejected": -1118.551025390625, + "loss": 0.9447, + "margin_dpo/margin_mean": 471.21575927734375, + "margin_dpo/margin_std": 474.4074401855469, + "step": 464 + }, + { + "KL/chosen_KL_mean": -574.6008911132812, + "KL/mean": -763.1315307617188, + "KL/rejected_KL_mean": -951.6621704101562, + "KL/std": 477.3629150390625, + "epoch": 0.6828193832599119, + "fcm_dpo/beta": 0.0012953910045325756, + "fcm_dpo/delta": -0.09291453659534454, + "fcm_dpo/margin": 377.061279296875, + "fcm_dpo/q_t": 0.38684460520744324, + "grad_norm": 57.58442306518555, + "learning_rate": 1.3971566441730714e-07, + "logits/chosen": -0.7876610159873962, + "logits/rejected": -0.807873547077179, + "logps/chosen": -634.7177734375, + "logps/ref_chosen": -60.116851806640625, + "logps/ref_rejected": -113.94602966308594, + "logps/rejected": -1065.608154296875, + "loss": 1.0584, + "margin_dpo/margin_mean": 377.061279296875, + "margin_dpo/margin_std": 511.11419677734375, + "step": 465 + }, + { + "KL/chosen_KL_mean": -613.3887939453125, + "KL/mean": -784.2792358398438, + "KL/rejected_KL_mean": -955.1697998046875, + "KL/std": 442.28924560546875, + "epoch": 0.684287812041116, + "fcm_dpo/beta": 0.0012718967627733946, + "fcm_dpo/delta": -0.03685159608721733, + "fcm_dpo/margin": 341.7809753417969, + "fcm_dpo/q_t": 0.4008026123046875, + "grad_norm": 33.484703063964844, + "learning_rate": 1.3856541105586545e-07, + "logits/chosen": -0.8349906206130981, + "logits/rejected": -0.843805193901062, + "logps/chosen": -666.3096923828125, + "logps/ref_chosen": -52.920921325683594, + "logps/ref_rejected": -90.3154296875, + "logps/rejected": -1045.4852294921875, + "loss": 1.0906, + "margin_dpo/margin_mean": 341.7809753417969, + "margin_dpo/margin_std": 492.47955322265625, + "step": 466 + }, + { + "KL/chosen_KL_mean": -774.8294677734375, + "KL/mean": -954.3599243164062, + "KL/rejected_KL_mean": -1133.890380859375, + "KL/std": 575.3023681640625, + "epoch": 0.6857562408223201, + "fcm_dpo/beta": 0.00125328847207129, + "fcm_dpo/delta": -0.05458660423755646, + "fcm_dpo/margin": 359.0608825683594, + "fcm_dpo/q_t": 0.4034256041049957, + "grad_norm": 52.08469009399414, + "learning_rate": 1.3741809409947729e-07, + "logits/chosen": -0.9275529384613037, + "logits/rejected": -0.9244056940078735, + "logps/chosen": -853.5452880859375, + "logps/ref_chosen": -78.7158203125, + "logps/ref_rejected": -102.86019897460938, + "logps/rejected": -1236.7506103515625, + "loss": 1.1488, + "margin_dpo/margin_mean": 359.0609130859375, + "margin_dpo/margin_std": 667.78173828125, + "step": 467 + }, + { + "KL/chosen_KL_mean": -607.3834228515625, + "KL/mean": -832.3876953125, + "KL/rejected_KL_mean": -1057.39208984375, + "KL/std": 518.711669921875, + "epoch": 0.6872246696035242, + "fcm_dpo/beta": 0.0012304207775741816, + "fcm_dpo/delta": -0.16281697154045105, + "fcm_dpo/margin": 450.0086364746094, + "fcm_dpo/q_t": 0.3800439238548279, + "grad_norm": 44.7249641418457, + "learning_rate": 1.362737437810114e-07, + "logits/chosen": -0.9139019846916199, + "logits/rejected": -0.9288034439086914, + "logps/chosen": -677.3187255859375, + "logps/ref_chosen": -69.93536376953125, + "logps/ref_rejected": -101.02880859375, + "logps/rejected": -1158.4208984375, + "loss": 1.0164, + "margin_dpo/margin_mean": 450.0086364746094, + "margin_dpo/margin_std": 588.6818237304688, + "step": 468 + }, + { + "KL/chosen_KL_mean": -641.0040283203125, + "KL/mean": -866.3514404296875, + "KL/rejected_KL_mean": -1091.698974609375, + "KL/std": 456.729736328125, + "epoch": 0.6886930983847284, + "fcm_dpo/beta": 0.0011876230128109455, + "fcm_dpo/delta": -0.14456316828727722, + "fcm_dpo/margin": 450.6948547363281, + "fcm_dpo/q_t": 0.3780639171600342, + "grad_norm": 36.414100646972656, + "learning_rate": 1.351323902551631e-07, + "logits/chosen": -0.9149258136749268, + "logits/rejected": -0.9327446222305298, + "logps/chosen": -709.1287231445312, + "logps/ref_chosen": -68.12469482421875, + "logps/ref_rejected": -104.78640747070312, + "logps/rejected": -1196.4853515625, + "loss": 1.0044, + "margin_dpo/margin_mean": 450.69488525390625, + "margin_dpo/margin_std": 503.34381103515625, + "step": 469 + }, + { + "KL/chosen_KL_mean": -555.3209838867188, + "KL/mean": -754.7327880859375, + "KL/rejected_KL_mean": -954.1446533203125, + "KL/std": 484.958984375, + "epoch": 0.6901615271659325, + "fcm_dpo/beta": 0.0011768193216994405, + "fcm_dpo/delta": -0.07272230088710785, + "fcm_dpo/margin": 398.8236083984375, + "fcm_dpo/q_t": 0.3930772542953491, + "grad_norm": 25.32660484313965, + "learning_rate": 1.339940635976592e-07, + "logits/chosen": -0.8891603946685791, + "logits/rejected": -0.9033347368240356, + "logps/chosen": -599.1129150390625, + "logps/ref_chosen": -43.791927337646484, + "logps/ref_rejected": -82.70285034179688, + "logps/rejected": -1036.847412109375, + "loss": 1.0628, + "margin_dpo/margin_mean": 398.8235778808594, + "margin_dpo/margin_std": 535.6801147460938, + "step": 470 + }, + { + "KL/chosen_KL_mean": -693.4312744140625, + "KL/mean": -855.115234375, + "KL/rejected_KL_mean": -1016.7991943359375, + "KL/std": 482.7373352050781, + "epoch": 0.6916299559471366, + "fcm_dpo/beta": 0.0011662011966109276, + "fcm_dpo/delta": 0.02322380244731903, + "fcm_dpo/margin": 323.3678283691406, + "fcm_dpo/q_t": 0.416721373796463, + "grad_norm": 45.39756393432617, + "learning_rate": 1.3285879380446563e-07, + "logits/chosen": -0.9871773719787598, + "logits/rejected": -0.9965918064117432, + "logps/chosen": -756.7708129882812, + "logps/ref_chosen": -63.33952331542969, + "logps/ref_rejected": -83.61048126220703, + "logps/rejected": -1100.40966796875, + "loss": 1.131, + "margin_dpo/margin_mean": 323.36785888671875, + "margin_dpo/margin_std": 523.2651977539062, + "step": 471 + }, + { + "KL/chosen_KL_mean": -685.0006103515625, + "KL/mean": -886.5283203125, + "KL/rejected_KL_mean": -1088.0560302734375, + "KL/std": 587.74755859375, + "epoch": 0.6930983847283406, + "fcm_dpo/beta": 0.001157897524535656, + "fcm_dpo/delta": -0.07049451023340225, + "fcm_dpo/margin": 403.0553894042969, + "fcm_dpo/q_t": 0.40090325474739075, + "grad_norm": 32.389835357666016, + "learning_rate": 1.317266107909975e-07, + "logits/chosen": -0.9715889096260071, + "logits/rejected": -0.9529412388801575, + "logps/chosen": -768.666748046875, + "logps/ref_chosen": -83.66610717773438, + "logps/ref_rejected": -117.20919799804688, + "logps/rejected": -1205.2652587890625, + "loss": 1.0948, + "margin_dpo/margin_mean": 403.055419921875, + "margin_dpo/margin_std": 634.1861572265625, + "step": 472 + }, + { + "KL/chosen_KL_mean": -811.2958984375, + "KL/mean": -903.3029174804688, + "KL/rejected_KL_mean": -995.3099365234375, + "KL/std": 595.69921875, + "epoch": 0.6945668135095447, + "fcm_dpo/beta": 0.001172641757875681, + "fcm_dpo/delta": 0.06300715357065201, + "fcm_dpo/margin": 184.01409912109375, + "fcm_dpo/q_t": 0.45356637239456177, + "grad_norm": 114.76943969726562, + "learning_rate": 1.3059754439133002e-07, + "logits/chosen": -0.9685148596763611, + "logits/rejected": -0.9405593872070312, + "logps/chosen": -874.7928466796875, + "logps/ref_chosen": -63.49696731567383, + "logps/ref_rejected": -81.14657592773438, + "logps/rejected": -1076.45654296875, + "loss": 1.3594, + "margin_dpo/margin_mean": 184.0141143798828, + "margin_dpo/margin_std": 777.1190185546875, + "step": 473 + }, + { + "KL/chosen_KL_mean": -650.262451171875, + "KL/mean": -809.9664916992188, + "KL/rejected_KL_mean": -969.6705322265625, + "KL/std": 507.17437744140625, + "epoch": 0.6960352422907489, + "fcm_dpo/beta": 0.0011639699805527925, + "fcm_dpo/delta": -0.07450275868177414, + "fcm_dpo/margin": 319.40814208984375, + "fcm_dpo/q_t": 0.4137033224105835, + "grad_norm": 41.28981399536133, + "learning_rate": 1.2947162435741277e-07, + "logits/chosen": -0.8996328115463257, + "logits/rejected": -0.9031381607055664, + "logps/chosen": -702.8743896484375, + "logps/ref_chosen": -52.6119384765625, + "logps/ref_rejected": -90.08041381835938, + "logps/rejected": -1059.7509765625, + "loss": 1.1578, + "margin_dpo/margin_mean": 319.4081726074219, + "margin_dpo/margin_std": 566.008544921875, + "step": 474 + }, + { + "KL/chosen_KL_mean": -470.3363342285156, + "KL/mean": -680.240966796875, + "KL/rejected_KL_mean": -890.1456298828125, + "KL/std": 411.4319763183594, + "epoch": 0.697503671071953, + "fcm_dpo/beta": 0.0011392869055271149, + "fcm_dpo/delta": -0.08241432905197144, + "fcm_dpo/margin": 419.8091735839844, + "fcm_dpo/q_t": 0.3890076279640198, + "grad_norm": 38.87031936645508, + "learning_rate": 1.2834888035828596e-07, + "logits/chosen": -0.930426836013794, + "logits/rejected": -0.9583991765975952, + "logps/chosen": -512.83154296875, + "logps/ref_chosen": -42.49519348144531, + "logps/ref_rejected": -90.06294250488281, + "logps/rejected": -980.2085571289062, + "loss": 1.0215, + "margin_dpo/margin_mean": 419.8092041015625, + "margin_dpo/margin_std": 437.57635498046875, + "step": 475 + }, + { + "KL/chosen_KL_mean": -583.7200317382812, + "KL/mean": -751.1150512695312, + "KL/rejected_KL_mean": -918.5101318359375, + "KL/std": 458.8353576660156, + "epoch": 0.6989720998531571, + "fcm_dpo/beta": 0.001139120664447546, + "fcm_dpo/delta": 0.019377058371901512, + "fcm_dpo/margin": 334.79010009765625, + "fcm_dpo/q_t": 0.4132460355758667, + "grad_norm": 55.413394927978516, + "learning_rate": 1.2722934197929802e-07, + "logits/chosen": -0.9340738654136658, + "logits/rejected": -0.9469287991523743, + "logps/chosen": -626.66943359375, + "logps/ref_chosen": -42.94938278198242, + "logps/ref_rejected": -73.71023559570312, + "logps/rejected": -992.2203979492188, + "loss": 1.1033, + "margin_dpo/margin_mean": 334.79010009765625, + "margin_dpo/margin_std": 459.30084228515625, + "step": 476 + }, + { + "KL/chosen_KL_mean": -626.4135131835938, + "KL/mean": -790.4285888671875, + "KL/rejected_KL_mean": -954.4437255859375, + "KL/std": 489.424560546875, + "epoch": 0.7004405286343612, + "fcm_dpo/beta": 0.0011485903523862362, + "fcm_dpo/delta": 0.023847589269280434, + "fcm_dpo/margin": 328.0302734375, + "fcm_dpo/q_t": 0.41430675983428955, + "grad_norm": 35.95357131958008, + "learning_rate": 1.2611303872132631e-07, + "logits/chosen": -0.9906863570213318, + "logits/rejected": -0.9593477845191956, + "logps/chosen": -697.1861572265625, + "logps/ref_chosen": -70.77261352539062, + "logps/ref_rejected": -76.13737487792969, + "logps/rejected": -1030.5811767578125, + "loss": 1.1429, + "margin_dpo/margin_mean": 328.0302429199219, + "margin_dpo/margin_std": 566.1884155273438, + "step": 477 + }, + { + "KL/chosen_KL_mean": -511.62579345703125, + "KL/mean": -708.7156982421875, + "KL/rejected_KL_mean": -905.8055419921875, + "KL/std": 440.7845458984375, + "epoch": 0.7019089574155654, + "fcm_dpo/beta": 0.0011402592062950134, + "fcm_dpo/delta": -0.05179014056921005, + "fcm_dpo/margin": 394.17974853515625, + "fcm_dpo/q_t": 0.39800071716308594, + "grad_norm": 40.23908615112305, + "learning_rate": 1.2500000000000005e-07, + "logits/chosen": -0.8462599515914917, + "logits/rejected": -0.871573805809021, + "logps/chosen": -553.0662841796875, + "logps/ref_chosen": -41.440513610839844, + "logps/ref_rejected": -85.36196899414062, + "logps/rejected": -991.1675415039062, + "loss": 1.0683, + "margin_dpo/margin_mean": 394.1797790527344, + "margin_dpo/margin_std": 522.7603759765625, + "step": 478 + }, + { + "KL/chosen_KL_mean": -657.9866943359375, + "KL/mean": -846.1517333984375, + "KL/rejected_KL_mean": -1034.316650390625, + "KL/std": 541.3948974609375, + "epoch": 0.7033773861967695, + "fcm_dpo/beta": 0.0011373090092092752, + "fcm_dpo/delta": -0.030031614005565643, + "fcm_dpo/margin": 376.3300476074219, + "fcm_dpo/q_t": 0.40709632635116577, + "grad_norm": 29.177635192871094, + "learning_rate": 1.2389025514492456e-07, + "logits/chosen": -0.9058327674865723, + "logits/rejected": -0.9377299547195435, + "logps/chosen": -711.8945922851562, + "logps/ref_chosen": -53.907920837402344, + "logps/ref_rejected": -95.1163330078125, + "logps/rejected": -1129.43310546875, + "loss": 1.1171, + "margin_dpo/margin_mean": 376.3300476074219, + "margin_dpo/margin_std": 624.73388671875, + "step": 479 + }, + { + "KL/chosen_KL_mean": -804.2739868164062, + "KL/mean": -948.6687622070312, + "KL/rejected_KL_mean": -1093.0634765625, + "KL/std": 507.27008056640625, + "epoch": 0.7048458149779736, + "fcm_dpo/beta": 0.0011228574439883232, + "fcm_dpo/delta": -0.042888376861810684, + "fcm_dpo/margin": 288.78948974609375, + "fcm_dpo/q_t": 0.4276391863822937, + "grad_norm": 56.077796936035156, + "learning_rate": 1.227838333989088e-07, + "logits/chosen": -0.9502737522125244, + "logits/rejected": -0.9440046548843384, + "logps/chosen": -862.9566650390625, + "logps/ref_chosen": -58.682701110839844, + "logps/ref_rejected": -82.93248748779297, + "logps/rejected": -1175.9959716796875, + "loss": 1.188, + "margin_dpo/margin_mean": 288.78948974609375, + "margin_dpo/margin_std": 554.4617309570312, + "step": 480 + }, + { + "KL/chosen_KL_mean": -672.9927368164062, + "KL/mean": -899.4321899414062, + "KL/rejected_KL_mean": -1125.87158203125, + "KL/std": 524.7504272460938, + "epoch": 0.7063142437591777, + "fcm_dpo/beta": 0.0011024028062820435, + "fcm_dpo/delta": -0.10436421632766724, + "fcm_dpo/margin": 452.87890625, + "fcm_dpo/q_t": 0.38820528984069824, + "grad_norm": 37.83536148071289, + "learning_rate": 1.2168076391719489e-07, + "logits/chosen": -0.9408276081085205, + "logits/rejected": -0.9681203365325928, + "logps/chosen": -727.95703125, + "logps/ref_chosen": -54.964271545410156, + "logps/ref_rejected": -92.42044067382812, + "logps/rejected": -1218.2919921875, + "loss": 1.0354, + "margin_dpo/margin_mean": 452.87890625, + "margin_dpo/margin_std": 566.91455078125, + "step": 481 + }, + { + "KL/chosen_KL_mean": -738.0073852539062, + "KL/mean": -845.139892578125, + "KL/rejected_KL_mean": -952.2725219726562, + "KL/std": 542.2655029296875, + "epoch": 0.7077826725403817, + "fcm_dpo/beta": 0.0011032463517040014, + "fcm_dpo/delta": 0.07522930204868317, + "fcm_dpo/margin": 214.26512145996094, + "fcm_dpo/q_t": 0.44445592164993286, + "grad_norm": 56.61158752441406, + "learning_rate": 1.2058107576668938e-07, + "logits/chosen": -0.8482377529144287, + "logits/rejected": -0.8390638828277588, + "logps/chosen": -805.560791015625, + "logps/ref_chosen": -67.553466796875, + "logps/ref_rejected": -87.58953857421875, + "logps/rejected": -1039.862060546875, + "loss": 1.2751, + "margin_dpo/margin_mean": 214.26513671875, + "margin_dpo/margin_std": 631.7608642578125, + "step": 482 + }, + { + "KL/chosen_KL_mean": -640.1693115234375, + "KL/mean": -879.429931640625, + "KL/rejected_KL_mean": -1118.690673828125, + "KL/std": 530.899658203125, + "epoch": 0.7092511013215859, + "fcm_dpo/beta": 0.0010884404182434082, + "fcm_dpo/delta": -0.1278223842382431, + "fcm_dpo/margin": 478.5213928222656, + "fcm_dpo/q_t": 0.38443121314048767, + "grad_norm": 34.9405517578125, + "learning_rate": 1.194847979251979e-07, + "logits/chosen": -0.9293410778045654, + "logits/rejected": -0.9389553070068359, + "logps/chosen": -703.4990844726562, + "logps/ref_chosen": -63.32981872558594, + "logps/ref_rejected": -95.78697204589844, + "logps/rejected": -1214.4775390625, + "loss": 1.0221, + "margin_dpo/margin_mean": 478.5213623046875, + "margin_dpo/margin_std": 590.3174438476562, + "step": 483 + }, + { + "KL/chosen_KL_mean": -548.8181762695312, + "KL/mean": -766.58642578125, + "KL/rejected_KL_mean": -984.354736328125, + "KL/std": 516.6387939453125, + "epoch": 0.71071953010279, + "fcm_dpo/beta": 0.001076672924682498, + "fcm_dpo/delta": -0.0724029541015625, + "fcm_dpo/margin": 435.53656005859375, + "fcm_dpo/q_t": 0.39384713768959045, + "grad_norm": 53.70915985107422, + "learning_rate": 1.1839195928066101e-07, + "logits/chosen": -0.9350720643997192, + "logits/rejected": -0.9612249135971069, + "logps/chosen": -607.956298828125, + "logps/ref_chosen": -59.13812255859375, + "logps/ref_rejected": -84.37144470214844, + "logps/rejected": -1068.7261962890625, + "loss": 1.0454, + "margin_dpo/margin_mean": 435.53656005859375, + "margin_dpo/margin_std": 524.6602783203125, + "step": 484 + }, + { + "KL/chosen_KL_mean": -576.3857421875, + "KL/mean": -781.9747314453125, + "KL/rejected_KL_mean": -987.5636596679688, + "KL/std": 519.3931884765625, + "epoch": 0.7121879588839941, + "fcm_dpo/beta": 0.001064480864442885, + "fcm_dpo/delta": -0.039454929530620575, + "fcm_dpo/margin": 411.17791748046875, + "fcm_dpo/q_t": 0.4025202989578247, + "grad_norm": 35.44499588012695, + "learning_rate": 1.1730258863039347e-07, + "logits/chosen": -0.8758097887039185, + "logits/rejected": -0.8995819091796875, + "logps/chosen": -635.2353515625, + "logps/ref_chosen": -58.849571228027344, + "logps/ref_rejected": -103.36408233642578, + "logps/rejected": -1090.927734375, + "loss": 1.0858, + "margin_dpo/margin_mean": 411.17791748046875, + "margin_dpo/margin_std": 591.315185546875, + "step": 485 + }, + { + "KL/chosen_KL_mean": -663.7813720703125, + "KL/mean": -897.005126953125, + "KL/rejected_KL_mean": -1130.22900390625, + "KL/std": 587.8470458984375, + "epoch": 0.7136563876651982, + "fcm_dpo/beta": 0.0010440791957080364, + "fcm_dpo/delta": -0.09158313274383545, + "fcm_dpo/margin": 466.44757080078125, + "fcm_dpo/q_t": 0.39278823137283325, + "grad_norm": 38.644596099853516, + "learning_rate": 1.1621671468032493e-07, + "logits/chosen": -0.9534709453582764, + "logits/rejected": -0.9716538786888123, + "logps/chosen": -719.041015625, + "logps/ref_chosen": -55.25966262817383, + "logps/ref_rejected": -92.13936614990234, + "logps/rejected": -1222.368408203125, + "loss": 1.0796, + "margin_dpo/margin_mean": 466.4476013183594, + "margin_dpo/margin_std": 711.1044311523438, + "step": 486 + }, + { + "KL/chosen_KL_mean": -689.0608520507812, + "KL/mean": -862.2830200195312, + "KL/rejected_KL_mean": -1035.505126953125, + "KL/std": 522.194580078125, + "epoch": 0.7151248164464024, + "fcm_dpo/beta": 0.001049531390890479, + "fcm_dpo/delta": 0.03742973506450653, + "fcm_dpo/margin": 346.44439697265625, + "fcm_dpo/q_t": 0.4152664542198181, + "grad_norm": 34.07633590698242, + "learning_rate": 1.1513436604424378e-07, + "logits/chosen": -0.9346251487731934, + "logits/rejected": -0.9410355091094971, + "logps/chosen": -742.1241455078125, + "logps/ref_chosen": -53.06330871582031, + "logps/ref_rejected": -92.41883087158203, + "logps/rejected": -1127.924072265625, + "loss": 1.1294, + "margin_dpo/margin_mean": 346.44439697265625, + "margin_dpo/margin_std": 535.1236572265625, + "step": 487 + }, + { + "KL/chosen_KL_mean": -562.1060791015625, + "KL/mean": -746.0523681640625, + "KL/rejected_KL_mean": -929.9986572265625, + "KL/std": 474.5586853027344, + "epoch": 0.7165932452276065, + "fcm_dpo/beta": 0.0010536068584769964, + "fcm_dpo/delta": 0.012613944709300995, + "fcm_dpo/margin": 367.89251708984375, + "fcm_dpo/q_t": 0.4102671444416046, + "grad_norm": 30.069625854492188, + "learning_rate": 1.1405557124304335e-07, + "logits/chosen": -0.8993455767631531, + "logits/rejected": -0.9078420400619507, + "logps/chosen": -614.334228515625, + "logps/ref_chosen": -52.22815704345703, + "logps/ref_rejected": -84.00656127929688, + "logps/rejected": -1014.0052490234375, + "loss": 1.0911, + "margin_dpo/margin_mean": 367.89251708984375, + "margin_dpo/margin_std": 458.40264892578125, + "step": 488 + }, + { + "KL/chosen_KL_mean": -515.2543334960938, + "KL/mean": -693.9641723632812, + "KL/rejected_KL_mean": -872.674072265625, + "KL/std": 473.46533203125, + "epoch": 0.7180616740088106, + "fcm_dpo/beta": 0.0010581349488347769, + "fcm_dpo/delta": 0.02230164408683777, + "fcm_dpo/margin": 357.4197082519531, + "fcm_dpo/q_t": 0.41536301374435425, + "grad_norm": 27.20409393310547, + "learning_rate": 1.1298035870396985e-07, + "logits/chosen": -0.8895210027694702, + "logits/rejected": -0.8903396725654602, + "logps/chosen": -571.2440185546875, + "logps/ref_chosen": -55.989627838134766, + "logps/ref_rejected": -79.39812469482422, + "logps/rejected": -952.0721435546875, + "loss": 1.1131, + "margin_dpo/margin_mean": 357.419677734375, + "margin_dpo/margin_std": 521.9287719726562, + "step": 489 + }, + { + "KL/chosen_KL_mean": -629.0771484375, + "KL/mean": -812.7498779296875, + "KL/rejected_KL_mean": -996.4226684570312, + "KL/std": 568.397705078125, + "epoch": 0.7195301027900147, + "fcm_dpo/beta": 0.001059696776792407, + "fcm_dpo/delta": 0.011018646880984306, + "fcm_dpo/margin": 367.3455505371094, + "fcm_dpo/q_t": 0.4139998257160187, + "grad_norm": 36.384334564208984, + "learning_rate": 1.1190875675987355e-07, + "logits/chosen": -0.8977552652359009, + "logits/rejected": -0.9355182647705078, + "logps/chosen": -681.4434814453125, + "logps/ref_chosen": -52.36639404296875, + "logps/ref_rejected": -110.4090576171875, + "logps/rejected": -1106.831787109375, + "loss": 1.1447, + "margin_dpo/margin_mean": 367.3455505371094, + "margin_dpo/margin_std": 653.3365478515625, + "step": 490 + }, + { + "KL/chosen_KL_mean": -569.136474609375, + "KL/mean": -696.23974609375, + "KL/rejected_KL_mean": -823.343017578125, + "KL/std": 484.46240234375, + "epoch": 0.7209985315712188, + "fcm_dpo/beta": 0.0010794580448418856, + "fcm_dpo/delta": 0.1289866715669632, + "fcm_dpo/margin": 254.20654296875, + "fcm_dpo/q_t": 0.4374847710132599, + "grad_norm": 30.11342430114746, + "learning_rate": 1.1084079364846241e-07, + "logits/chosen": -0.8976389169692993, + "logits/rejected": -0.8920071125030518, + "logps/chosen": -629.2527465820312, + "logps/ref_chosen": -60.11626434326172, + "logps/ref_rejected": -73.27278900146484, + "logps/rejected": -896.6157836914062, + "loss": 1.1925, + "margin_dpo/margin_mean": 254.20654296875, + "margin_dpo/margin_std": 475.316162109375, + "step": 491 + }, + { + "KL/chosen_KL_mean": -585.7317504882812, + "KL/mean": -709.7108154296875, + "KL/rejected_KL_mean": -833.68994140625, + "KL/std": 479.278076171875, + "epoch": 0.7224669603524229, + "fcm_dpo/beta": 0.0011044761631637812, + "fcm_dpo/delta": 0.1298675835132599, + "fcm_dpo/margin": 247.95816040039062, + "fcm_dpo/q_t": 0.4389011859893799, + "grad_norm": 42.94180679321289, + "learning_rate": 1.097764975115576e-07, + "logits/chosen": -0.9602404832839966, + "logits/rejected": -0.9433440566062927, + "logps/chosen": -639.7259521484375, + "logps/ref_chosen": -53.994178771972656, + "logps/ref_rejected": -72.65962219238281, + "logps/rejected": -906.349609375, + "loss": 1.2187, + "margin_dpo/margin_mean": 247.95819091796875, + "margin_dpo/margin_std": 550.007080078125, + "step": 492 + }, + { + "KL/chosen_KL_mean": -619.7557373046875, + "KL/mean": -758.3751220703125, + "KL/rejected_KL_mean": -896.9945068359375, + "KL/std": 526.8731689453125, + "epoch": 0.723935389133627, + "fcm_dpo/beta": 0.0011116546811535954, + "fcm_dpo/delta": -0.010466049425303936, + "fcm_dpo/margin": 277.23876953125, + "fcm_dpo/q_t": 0.42805489897727966, + "grad_norm": 33.16301727294922, + "learning_rate": 1.0871589639435203e-07, + "logits/chosen": -0.9904724359512329, + "logits/rejected": -0.9639154076576233, + "logps/chosen": -695.2529296875, + "logps/ref_chosen": -75.49723815917969, + "logps/ref_rejected": -87.32301330566406, + "logps/rejected": -984.3175048828125, + "loss": 1.1813, + "margin_dpo/margin_mean": 277.2387390136719, + "margin_dpo/margin_std": 518.248046875, + "step": 493 + }, + { + "KL/chosen_KL_mean": -485.149658203125, + "KL/mean": -703.2138671875, + "KL/rejected_KL_mean": -921.278076171875, + "KL/std": 474.62786865234375, + "epoch": 0.7254038179148311, + "fcm_dpo/beta": 0.0010987753048539162, + "fcm_dpo/delta": -0.08314534276723862, + "fcm_dpo/margin": 436.12835693359375, + "fcm_dpo/q_t": 0.3895169794559479, + "grad_norm": 45.52617263793945, + "learning_rate": 1.0765901824467166e-07, + "logits/chosen": -0.8565849661827087, + "logits/rejected": -0.8936357498168945, + "logps/chosen": -526.5089111328125, + "logps/ref_chosen": -41.35926818847656, + "logps/ref_rejected": -86.09136962890625, + "logps/rejected": -1007.369384765625, + "loss": 1.0279, + "margin_dpo/margin_mean": 436.12835693359375, + "margin_dpo/margin_std": 479.73455810546875, + "step": 494 + }, + { + "KL/chosen_KL_mean": -545.6207275390625, + "KL/mean": -735.65185546875, + "KL/rejected_KL_mean": -925.6829833984375, + "KL/std": 484.06903076171875, + "epoch": 0.7268722466960352, + "fcm_dpo/beta": 0.0010912488214671612, + "fcm_dpo/delta": -0.015377325937151909, + "fcm_dpo/margin": 380.0621337890625, + "fcm_dpo/q_t": 0.40759721398353577, + "grad_norm": 32.46592330932617, + "learning_rate": 1.0660589091223854e-07, + "logits/chosen": -0.9482539892196655, + "logits/rejected": -0.9554197192192078, + "logps/chosen": -609.1558227539062, + "logps/ref_chosen": -63.53507995605469, + "logps/ref_rejected": -91.42443084716797, + "logps/rejected": -1017.1073608398438, + "loss": 1.0985, + "margin_dpo/margin_mean": 380.0621337890625, + "margin_dpo/margin_std": 567.2548217773438, + "step": 495 + }, + { + "KL/chosen_KL_mean": -678.7178955078125, + "KL/mean": -781.5296020507812, + "KL/rejected_KL_mean": -884.3413696289062, + "KL/std": 392.3121337890625, + "epoch": 0.7283406754772394, + "fcm_dpo/beta": 0.0011189571814611554, + "fcm_dpo/delta": 0.1738756000995636, + "fcm_dpo/margin": 205.62347412109375, + "fcm_dpo/q_t": 0.44650715589523315, + "grad_norm": 76.83142852783203, + "learning_rate": 1.0555654214793722e-07, + "logits/chosen": -0.9424889087677002, + "logits/rejected": -0.9166613817214966, + "logps/chosen": -751.309814453125, + "logps/ref_chosen": -72.5919189453125, + "logps/ref_rejected": -84.32933807373047, + "logps/rejected": -968.6707153320312, + "loss": 1.2282, + "margin_dpo/margin_mean": 205.62347412109375, + "margin_dpo/margin_std": 438.72454833984375, + "step": 496 + }, + { + "KL/chosen_KL_mean": -631.7765502929688, + "KL/mean": -734.545166015625, + "KL/rejected_KL_mean": -837.3136596679688, + "KL/std": 477.0352478027344, + "epoch": 0.7298091042584435, + "fcm_dpo/beta": 0.0011342904763296247, + "fcm_dpo/delta": 0.02543473243713379, + "fcm_dpo/margin": 205.537109375, + "fcm_dpo/q_t": 0.4462537467479706, + "grad_norm": 40.14469528198242, + "learning_rate": 1.0451099960308374e-07, + "logits/chosen": -0.9075003266334534, + "logits/rejected": -0.8960117101669312, + "logps/chosen": -690.3705444335938, + "logps/ref_chosen": -58.59397506713867, + "logps/ref_rejected": -76.28836822509766, + "logps/rejected": -913.60205078125, + "loss": 1.2322, + "margin_dpo/margin_mean": 205.537109375, + "margin_dpo/margin_std": 450.22613525390625, + "step": 497 + }, + { + "KL/chosen_KL_mean": -571.6414794921875, + "KL/mean": -741.5089111328125, + "KL/rejected_KL_mean": -911.3762817382812, + "KL/std": 496.2296142578125, + "epoch": 0.7312775330396476, + "fcm_dpo/beta": 0.0011364180827513337, + "fcm_dpo/delta": 0.014480667188763618, + "fcm_dpo/margin": 339.73480224609375, + "fcm_dpo/q_t": 0.4120427668094635, + "grad_norm": 27.699867248535156, + "learning_rate": 1.0346929082869641e-07, + "logits/chosen": -0.8944777250289917, + "logits/rejected": -0.8845921754837036, + "logps/chosen": -642.84716796875, + "logps/ref_chosen": -71.20565795898438, + "logps/ref_rejected": -83.95803833007812, + "logps/rejected": -995.3343505859375, + "loss": 1.1291, + "margin_dpo/margin_mean": 339.73480224609375, + "margin_dpo/margin_std": 556.2047119140625, + "step": 498 + }, + { + "KL/chosen_KL_mean": -502.248779296875, + "KL/mean": -700.484375, + "KL/rejected_KL_mean": -898.719970703125, + "KL/std": 475.0845947265625, + "epoch": 0.7327459618208517, + "fcm_dpo/beta": 0.0011275302385911345, + "fcm_dpo/delta": -0.04935740679502487, + "fcm_dpo/margin": 396.4712829589844, + "fcm_dpo/q_t": 0.39803507924079895, + "grad_norm": 35.038902282714844, + "learning_rate": 1.0243144327477013e-07, + "logits/chosen": -0.9036816954612732, + "logits/rejected": -0.9378571510314941, + "logps/chosen": -553.5039672851562, + "logps/ref_chosen": -51.25519561767578, + "logps/ref_rejected": -101.07870483398438, + "logps/rejected": -999.7987060546875, + "loss": 1.0675, + "margin_dpo/margin_mean": 396.47125244140625, + "margin_dpo/margin_std": 522.6738891601562, + "step": 499 + }, + { + "KL/chosen_KL_mean": -610.1287841796875, + "KL/mean": -780.4580078125, + "KL/rejected_KL_mean": -950.7872314453125, + "KL/std": 427.2696228027344, + "epoch": 0.7342143906020558, + "fcm_dpo/beta": 0.0011279778555035591, + "fcm_dpo/delta": 0.01636883243918419, + "fcm_dpo/margin": 340.65838623046875, + "fcm_dpo/q_t": 0.4120955467224121, + "grad_norm": 33.4288215637207, + "learning_rate": 1.0139748428955333e-07, + "logits/chosen": -0.9120993614196777, + "logits/rejected": -0.9445118308067322, + "logps/chosen": -667.15625, + "logps/ref_chosen": -57.027442932128906, + "logps/ref_rejected": -93.93421173095703, + "logps/rejected": -1044.721435546875, + "loss": 1.1223, + "margin_dpo/margin_mean": 340.65838623046875, + "margin_dpo/margin_std": 526.3189697265625, + "step": 500 + }, + { + "KL/chosen_KL_mean": -524.825439453125, + "KL/mean": -709.2818603515625, + "KL/rejected_KL_mean": -893.73828125, + "KL/std": 459.21185302734375, + "epoch": 0.73568281938326, + "fcm_dpo/beta": 0.001129691954702139, + "fcm_dpo/delta": -0.017550457268953323, + "fcm_dpo/margin": 368.912841796875, + "fcm_dpo/q_t": 0.40666812658309937, + "grad_norm": 29.30938720703125, + "learning_rate": 1.0036744111882672e-07, + "logits/chosen": -0.8794831037521362, + "logits/rejected": -0.8698313236236572, + "logps/chosen": -579.1849365234375, + "logps/ref_chosen": -54.359527587890625, + "logps/ref_rejected": -80.15670013427734, + "logps/rejected": -973.8949584960938, + "loss": 1.1026, + "margin_dpo/margin_mean": 368.912841796875, + "margin_dpo/margin_std": 553.5332641601562, + "step": 501 + }, + { + "KL/chosen_KL_mean": -475.44189453125, + "KL/mean": -656.1715087890625, + "KL/rejected_KL_mean": -836.9010009765625, + "KL/std": 405.69573974609375, + "epoch": 0.737151248164464, + "fcm_dpo/beta": 0.0011267581721767783, + "fcm_dpo/delta": -0.007630977779626846, + "fcm_dpo/margin": 361.4590759277344, + "fcm_dpo/q_t": 0.4055173695087433, + "grad_norm": 29.00743865966797, + "learning_rate": 9.934134090518592e-08, + "logits/chosen": -0.7667361497879028, + "logits/rejected": -0.7510417699813843, + "logps/chosen": -543.0424194335938, + "logps/ref_chosen": -67.60050964355469, + "logps/ref_rejected": -82.94876098632812, + "logps/rejected": -919.8497924804688, + "loss": 1.071, + "margin_dpo/margin_mean": 361.4591064453125, + "margin_dpo/margin_std": 415.29315185546875, + "step": 502 + }, + { + "KL/chosen_KL_mean": -481.5869140625, + "KL/mean": -647.6417236328125, + "KL/rejected_KL_mean": -813.696533203125, + "KL/std": 403.74560546875, + "epoch": 0.7386196769456681, + "fcm_dpo/beta": 0.001126825693063438, + "fcm_dpo/delta": 0.026751546189188957, + "fcm_dpo/margin": 332.1096496582031, + "fcm_dpo/q_t": 0.41448622941970825, + "grad_norm": 27.742767333984375, + "learning_rate": 9.831921068732571e-08, + "logits/chosen": -0.794667661190033, + "logits/rejected": -0.7808655500411987, + "logps/chosen": -536.665283203125, + "logps/ref_chosen": -55.078407287597656, + "logps/ref_rejected": -82.50544738769531, + "logps/rejected": -896.2020263671875, + "loss": 1.1014, + "margin_dpo/margin_mean": 332.10968017578125, + "margin_dpo/margin_std": 432.3536376953125, + "step": 503 + }, + { + "KL/chosen_KL_mean": -538.2932739257812, + "KL/mean": -737.7694702148438, + "KL/rejected_KL_mean": -937.2457275390625, + "KL/std": 478.7033996582031, + "epoch": 0.7400881057268722, + "fcm_dpo/beta": 0.0011236823629587889, + "fcm_dpo/delta": -0.050532855093479156, + "fcm_dpo/margin": 398.95245361328125, + "fcm_dpo/q_t": 0.39904850721359253, + "grad_norm": 32.015926361083984, + "learning_rate": 9.730107739932805e-08, + "logits/chosen": -0.8786238431930542, + "logits/rejected": -0.9033294320106506, + "logps/chosen": -598.259033203125, + "logps/ref_chosen": -59.96575164794922, + "logps/ref_rejected": -103.76212310791016, + "logps/rejected": -1041.0078125, + "loss": 1.0754, + "margin_dpo/margin_mean": 398.95245361328125, + "margin_dpo/margin_std": 538.3695678710938, + "step": 504 + }, + { + "KL/chosen_KL_mean": -600.709716796875, + "KL/mean": -710.2824096679688, + "KL/rejected_KL_mean": -819.8551025390625, + "KL/std": 460.1452331542969, + "epoch": 0.7415565345080763, + "fcm_dpo/beta": 0.0011460301466286182, + "fcm_dpo/delta": 0.1525171399116516, + "fcm_dpo/margin": 219.1453094482422, + "fcm_dpo/q_t": 0.4422228932380676, + "grad_norm": 34.641334533691406, + "learning_rate": 9.628696786995188e-08, + "logits/chosen": -0.8766049742698669, + "logits/rejected": -0.8530220985412598, + "logps/chosen": -676.8646240234375, + "logps/ref_chosen": -76.1549072265625, + "logps/ref_rejected": -88.58537292480469, + "logps/rejected": -908.4404296875, + "loss": 1.2118, + "margin_dpo/margin_mean": 219.14532470703125, + "margin_dpo/margin_std": 433.828125, + "step": 505 + }, + { + "KL/chosen_KL_mean": -490.38372802734375, + "KL/mean": -671.2208251953125, + "KL/rejected_KL_mean": -852.057861328125, + "KL/std": 451.6136169433594, + "epoch": 0.7430249632892805, + "fcm_dpo/beta": 0.0011465998832136393, + "fcm_dpo/delta": -0.015700122341513634, + "fcm_dpo/margin": 361.67413330078125, + "fcm_dpo/q_t": 0.4050843119621277, + "grad_norm": 36.54334259033203, + "learning_rate": 9.527690882192635e-08, + "logits/chosen": -0.8800439834594727, + "logits/rejected": -0.8960914611816406, + "logps/chosen": -539.34423828125, + "logps/ref_chosen": -48.96050262451172, + "logps/ref_rejected": -78.41505432128906, + "logps/rejected": -930.472900390625, + "loss": 1.0883, + "margin_dpo/margin_mean": 361.67413330078125, + "margin_dpo/margin_std": 490.28369140625, + "step": 506 + }, + { + "KL/chosen_KL_mean": -584.2061767578125, + "KL/mean": -741.224853515625, + "KL/rejected_KL_mean": -898.2435913085938, + "KL/std": 549.9444580078125, + "epoch": 0.7444933920704846, + "fcm_dpo/beta": 0.0011556025128811598, + "fcm_dpo/delta": 0.038512568920850754, + "fcm_dpo/margin": 314.0374755859375, + "fcm_dpo/q_t": 0.42233383655548096, + "grad_norm": 33.112762451171875, + "learning_rate": 9.427092687124691e-08, + "logits/chosen": -0.9117947816848755, + "logits/rejected": -0.918329119682312, + "logps/chosen": -651.0076293945312, + "logps/ref_chosen": -66.80149841308594, + "logps/ref_rejected": -95.37289428710938, + "logps/rejected": -993.616455078125, + "loss": 1.162, + "margin_dpo/margin_mean": 314.0374755859375, + "margin_dpo/margin_std": 601.7278442382812, + "step": 507 + }, + { + "KL/chosen_KL_mean": -628.045166015625, + "KL/mean": -761.368896484375, + "KL/rejected_KL_mean": -894.692626953125, + "KL/std": 525.7574462890625, + "epoch": 0.7459618208516887, + "fcm_dpo/beta": 0.0011775526218116283, + "fcm_dpo/delta": 0.08841653168201447, + "fcm_dpo/margin": 266.6474609375, + "fcm_dpo/q_t": 0.43183645606040955, + "grad_norm": 40.246891021728516, + "learning_rate": 9.326904852647344e-08, + "logits/chosen": -0.8933985233306885, + "logits/rejected": -0.8938655853271484, + "logps/chosen": -699.3486328125, + "logps/ref_chosen": -71.303466796875, + "logps/ref_rejected": -95.6275405883789, + "logps/rejected": -990.3201904296875, + "loss": 1.2157, + "margin_dpo/margin_mean": 266.6474609375, + "margin_dpo/margin_std": 611.8427734375, + "step": 508 + }, + { + "KL/chosen_KL_mean": -462.01898193359375, + "KL/mean": -608.3475952148438, + "KL/rejected_KL_mean": -754.6761474609375, + "KL/std": 369.74627685546875, + "epoch": 0.7474302496328928, + "fcm_dpo/beta": 0.0011943180579692125, + "fcm_dpo/delta": 0.051631003618240356, + "fcm_dpo/margin": 292.65716552734375, + "fcm_dpo/q_t": 0.4205209016799927, + "grad_norm": 31.764202117919922, + "learning_rate": 9.227130018803195e-08, + "logits/chosen": -0.8035761117935181, + "logits/rejected": -0.7988163232803345, + "logps/chosen": -525.8379516601562, + "logps/ref_chosen": -63.81895065307617, + "logps/ref_rejected": -83.25643920898438, + "logps/rejected": -837.9326171875, + "loss": 1.1367, + "margin_dpo/margin_mean": 292.65716552734375, + "margin_dpo/margin_std": 450.46405029296875, + "step": 509 + }, + { + "KL/chosen_KL_mean": -559.66650390625, + "KL/mean": -753.0732421875, + "KL/rejected_KL_mean": -946.47998046875, + "KL/std": 431.13751220703125, + "epoch": 0.748898678414097, + "fcm_dpo/beta": 0.0011832050513476133, + "fcm_dpo/delta": -0.060402024537324905, + "fcm_dpo/margin": 386.8134460449219, + "fcm_dpo/q_t": 0.39339399337768555, + "grad_norm": 38.86001205444336, + "learning_rate": 9.127770814751932e-08, + "logits/chosen": -0.7946321964263916, + "logits/rejected": -0.8168176412582397, + "logps/chosen": -611.544921875, + "logps/ref_chosen": -51.878448486328125, + "logps/ref_rejected": -102.7651596069336, + "logps/rejected": -1049.2451171875, + "loss": 1.039, + "margin_dpo/margin_mean": 386.8134765625, + "margin_dpo/margin_std": 423.6184997558594, + "step": 510 + }, + { + "KL/chosen_KL_mean": -525.6728515625, + "KL/mean": -678.17822265625, + "KL/rejected_KL_mean": -830.68359375, + "KL/std": 464.34112548828125, + "epoch": 0.750367107195301, + "fcm_dpo/beta": 0.0011853575706481934, + "fcm_dpo/delta": 0.03986484557390213, + "fcm_dpo/margin": 305.0107727050781, + "fcm_dpo/q_t": 0.4183180034160614, + "grad_norm": 41.20817565917969, + "learning_rate": 9.028829858700973e-08, + "logits/chosen": -0.9006566405296326, + "logits/rejected": -0.9088428616523743, + "logps/chosen": -585.9109497070312, + "logps/ref_chosen": -60.23811721801758, + "logps/ref_rejected": -92.85676574707031, + "logps/rejected": -923.5404052734375, + "loss": 1.1564, + "margin_dpo/margin_mean": 305.0107421875, + "margin_dpo/margin_std": 559.034423828125, + "step": 511 + }, + { + "KL/chosen_KL_mean": -420.6644592285156, + "KL/mean": -627.825439453125, + "KL/rejected_KL_mean": -834.9864501953125, + "KL/std": 417.6683349609375, + "epoch": 0.7518355359765051, + "fcm_dpo/beta": 0.0011696910951286554, + "fcm_dpo/delta": -0.08911710977554321, + "fcm_dpo/margin": 414.32196044921875, + "fcm_dpo/q_t": 0.387323796749115, + "grad_norm": 55.33091735839844, + "learning_rate": 8.930309757836516e-08, + "logits/chosen": -0.8243488073348999, + "logits/rejected": -0.8429218530654907, + "logps/chosen": -475.5699462890625, + "logps/ref_chosen": -54.905494689941406, + "logps/ref_rejected": -81.87586975097656, + "logps/rejected": -916.8623046875, + "loss": 1.0149, + "margin_dpo/margin_mean": 414.32196044921875, + "margin_dpo/margin_std": 421.341796875, + "step": 512 + }, + { + "KL/chosen_KL_mean": -542.2540283203125, + "KL/mean": -693.2520751953125, + "KL/rejected_KL_mean": -844.2501220703125, + "KL/std": 411.43499755859375, + "epoch": 0.7533039647577092, + "fcm_dpo/beta": 0.001167251612059772, + "fcm_dpo/delta": 0.04872651398181915, + "fcm_dpo/margin": 301.99615478515625, + "fcm_dpo/q_t": 0.42014437913894653, + "grad_norm": 42.702476501464844, + "learning_rate": 8.832213108254863e-08, + "logits/chosen": -0.8896423578262329, + "logits/rejected": -0.8757469654083252, + "logps/chosen": -607.17041015625, + "logps/ref_chosen": -64.91644287109375, + "logps/ref_rejected": -76.06245422363281, + "logps/rejected": -920.3125610351562, + "loss": 1.1408, + "margin_dpo/margin_mean": 301.9961242675781, + "margin_dpo/margin_std": 475.26885986328125, + "step": 513 + }, + { + "KL/chosen_KL_mean": -558.1743774414062, + "KL/mean": -709.8671875, + "KL/rejected_KL_mean": -861.5599365234375, + "KL/std": 448.15179443359375, + "epoch": 0.7547723935389133, + "fcm_dpo/beta": 0.0011872373288497329, + "fcm_dpo/delta": 0.04112107306718826, + "fcm_dpo/margin": 303.3856201171875, + "fcm_dpo/q_t": 0.4199420213699341, + "grad_norm": 35.660560607910156, + "learning_rate": 8.734542494893954e-08, + "logits/chosen": -0.8495243191719055, + "logits/rejected": -0.8417561054229736, + "logps/chosen": -632.4039306640625, + "logps/ref_chosen": -74.22957611083984, + "logps/ref_rejected": -78.945556640625, + "logps/rejected": -940.5054931640625, + "loss": 1.1399, + "margin_dpo/margin_mean": 303.3856201171875, + "margin_dpo/margin_std": 503.74169921875, + "step": 514 + }, + { + "KL/chosen_KL_mean": -477.5955505371094, + "KL/mean": -589.99072265625, + "KL/rejected_KL_mean": -702.3858642578125, + "KL/std": 382.209716796875, + "epoch": 0.7562408223201175, + "fcm_dpo/beta": 0.0012109719682484865, + "fcm_dpo/delta": 0.1313389241695404, + "fcm_dpo/margin": 224.79022216796875, + "fcm_dpo/q_t": 0.4361518621444702, + "grad_norm": 48.169334411621094, + "learning_rate": 8.637300491465272e-08, + "logits/chosen": -0.7953609228134155, + "logits/rejected": -0.8050397634506226, + "logps/chosen": -527.9971313476562, + "logps/ref_chosen": -50.40156555175781, + "logps/ref_rejected": -87.09774780273438, + "logps/rejected": -789.4835815429688, + "loss": 1.201, + "margin_dpo/margin_mean": 224.79022216796875, + "margin_dpo/margin_std": 446.3857421875, + "step": 515 + }, + { + "KL/chosen_KL_mean": -504.6857604980469, + "KL/mean": -675.954833984375, + "KL/rejected_KL_mean": -847.2237548828125, + "KL/std": 428.9405212402344, + "epoch": 0.7577092511013216, + "fcm_dpo/beta": 0.001219091354869306, + "fcm_dpo/delta": -0.01847529225051403, + "fcm_dpo/margin": 342.53802490234375, + "fcm_dpo/q_t": 0.40270549058914185, + "grad_norm": 45.79306411743164, + "learning_rate": 8.540489660386064e-08, + "logits/chosen": -0.883423924446106, + "logits/rejected": -0.9122099876403809, + "logps/chosen": -569.3353271484375, + "logps/ref_chosen": -64.64956665039062, + "logps/ref_rejected": -111.72237396240234, + "logps/rejected": -958.9461669921875, + "loss": 1.0738, + "margin_dpo/margin_mean": 342.53802490234375, + "margin_dpo/margin_std": 418.29913330078125, + "step": 516 + }, + { + "KL/chosen_KL_mean": -534.3342895507812, + "KL/mean": -731.6787109375, + "KL/rejected_KL_mean": -929.0232543945312, + "KL/std": 476.9851989746094, + "epoch": 0.7591776798825257, + "fcm_dpo/beta": 0.0011984179727733135, + "fcm_dpo/delta": -0.07682677358388901, + "fcm_dpo/margin": 394.6889343261719, + "fcm_dpo/q_t": 0.395224928855896, + "grad_norm": 29.204376220703125, + "learning_rate": 8.444112552711752e-08, + "logits/chosen": -0.830208420753479, + "logits/rejected": -0.8261853456497192, + "logps/chosen": -595.247802734375, + "logps/ref_chosen": -60.913551330566406, + "logps/ref_rejected": -89.08308410644531, + "logps/rejected": -1018.1063232421875, + "loss": 1.0572, + "margin_dpo/margin_mean": 394.6889343261719, + "margin_dpo/margin_std": 530.7623291015625, + "step": 517 + }, + { + "KL/chosen_KL_mean": -472.4967041015625, + "KL/mean": -634.7481689453125, + "KL/rejected_KL_mean": -796.99951171875, + "KL/std": 382.7030944824219, + "epoch": 0.7606461086637298, + "fcm_dpo/beta": 0.0011941856937482953, + "fcm_dpo/delta": 0.012808417901396751, + "fcm_dpo/margin": 324.5027770996094, + "fcm_dpo/q_t": 0.4095669090747833, + "grad_norm": 62.06501007080078, + "learning_rate": 8.348171708068747e-08, + "logits/chosen": -0.8725818395614624, + "logits/rejected": -0.8878906965255737, + "logps/chosen": -529.95263671875, + "logps/ref_chosen": -57.45589065551758, + "logps/ref_rejected": -85.31269836425781, + "logps/rejected": -882.312255859375, + "loss": 1.0945, + "margin_dpo/margin_mean": 324.5028076171875, + "margin_dpo/margin_std": 413.08428955078125, + "step": 518 + }, + { + "KL/chosen_KL_mean": -474.32818603515625, + "KL/mean": -593.5274047851562, + "KL/rejected_KL_mean": -712.7265625, + "KL/std": 341.5205078125, + "epoch": 0.762114537444934, + "fcm_dpo/beta": 0.0012234165333211422, + "fcm_dpo/delta": 0.11090720444917679, + "fcm_dpo/margin": 238.39837646484375, + "fcm_dpo/q_t": 0.4324970841407776, + "grad_norm": 33.864437103271484, + "learning_rate": 8.25266965458755e-08, + "logits/chosen": -0.8436448574066162, + "logits/rejected": -0.8289774060249329, + "logps/chosen": -548.3914794921875, + "logps/ref_chosen": -74.06331634521484, + "logps/ref_rejected": -104.44416809082031, + "logps/rejected": -817.1707763671875, + "loss": 1.1817, + "margin_dpo/margin_mean": 238.39837646484375, + "margin_dpo/margin_std": 425.4404296875, + "step": 519 + }, + { + "KL/chosen_KL_mean": -510.67193603515625, + "KL/mean": -660.7890625, + "KL/rejected_KL_mean": -810.9061889648438, + "KL/std": 406.78131103515625, + "epoch": 0.7635829662261381, + "fcm_dpo/beta": 0.001228465000167489, + "fcm_dpo/delta": 0.0323098823428154, + "fcm_dpo/margin": 300.2342529296875, + "fcm_dpo/q_t": 0.4170858561992645, + "grad_norm": 38.63158416748047, + "learning_rate": 8.15760890883607e-08, + "logits/chosen": -0.8047879934310913, + "logits/rejected": -0.8105298280715942, + "logps/chosen": -580.9718017578125, + "logps/ref_chosen": -70.2998275756836, + "logps/ref_rejected": -99.98133850097656, + "logps/rejected": -910.8875732421875, + "loss": 1.126, + "margin_dpo/margin_mean": 300.2342529296875, + "margin_dpo/margin_std": 449.99859619140625, + "step": 520 + }, + { + "KL/chosen_KL_mean": -450.53228759765625, + "KL/mean": -614.1043090820312, + "KL/rejected_KL_mean": -777.6763305664062, + "KL/std": 415.71075439453125, + "epoch": 0.7650513950073421, + "fcm_dpo/beta": 0.0012406650930643082, + "fcm_dpo/delta": -0.006974354386329651, + "fcm_dpo/margin": 327.14410400390625, + "fcm_dpo/q_t": 0.4070153832435608, + "grad_norm": 31.36321258544922, + "learning_rate": 8.062991975753378e-08, + "logits/chosen": -0.8560887575149536, + "logits/rejected": -0.8592597246170044, + "logps/chosen": -508.67523193359375, + "logps/ref_chosen": -58.14292526245117, + "logps/ref_rejected": -83.28060913085938, + "logps/rejected": -860.9569091796875, + "loss": 1.0889, + "margin_dpo/margin_mean": 327.1440734863281, + "margin_dpo/margin_std": 424.02587890625, + "step": 521 + }, + { + "KL/chosen_KL_mean": -513.3594970703125, + "KL/mean": -659.28515625, + "KL/rejected_KL_mean": -805.2109375, + "KL/std": 429.93572998046875, + "epoch": 0.7665198237885462, + "fcm_dpo/beta": 0.001239138189703226, + "fcm_dpo/delta": 0.03980087861418724, + "fcm_dpo/margin": 291.8514404296875, + "fcm_dpo/q_t": 0.41743797063827515, + "grad_norm": 32.587318420410156, + "learning_rate": 7.968821348583643e-08, + "logits/chosen": -0.8581516146659851, + "logits/rejected": -0.8615491390228271, + "logps/chosen": -559.9071655273438, + "logps/ref_chosen": -46.54766845703125, + "logps/ref_rejected": -66.01388549804688, + "logps/rejected": -871.224853515625, + "loss": 1.1325, + "margin_dpo/margin_mean": 291.8514404296875, + "margin_dpo/margin_std": 456.8363037109375, + "step": 522 + }, + { + "KL/chosen_KL_mean": -540.905517578125, + "KL/mean": -699.052490234375, + "KL/rejected_KL_mean": -857.1994018554688, + "KL/std": 499.2489929199219, + "epoch": 0.7679882525697503, + "fcm_dpo/beta": 0.0012422900181263685, + "fcm_dpo/delta": 0.007354713976383209, + "fcm_dpo/margin": 316.2938232421875, + "fcm_dpo/q_t": 0.4125151038169861, + "grad_norm": 36.84627151489258, + "learning_rate": 7.875099508810484e-08, + "logits/chosen": -0.8944802284240723, + "logits/rejected": -0.8932949304580688, + "logps/chosen": -602.6751708984375, + "logps/ref_chosen": -61.76960372924805, + "logps/ref_rejected": -83.76141357421875, + "logps/rejected": -940.9608154296875, + "loss": 1.1311, + "margin_dpo/margin_mean": 316.2938232421875, + "margin_dpo/margin_std": 531.6959228515625, + "step": 523 + }, + { + "KL/chosen_KL_mean": -534.9653930664062, + "KL/mean": -700.6502075195312, + "KL/rejected_KL_mean": -866.3349609375, + "KL/std": 458.77227783203125, + "epoch": 0.7694566813509545, + "fcm_dpo/beta": 0.0012349834432825446, + "fcm_dpo/delta": -0.010303705930709839, + "fcm_dpo/margin": 331.36962890625, + "fcm_dpo/q_t": 0.4045429229736328, + "grad_norm": 39.90791702270508, + "learning_rate": 7.781828926091535e-08, + "logits/chosen": -0.9295982122421265, + "logits/rejected": -0.914442777633667, + "logps/chosen": -613.0374755859375, + "logps/ref_chosen": -78.0720443725586, + "logps/ref_rejected": -81.30198669433594, + "logps/rejected": -947.636962890625, + "loss": 1.1012, + "margin_dpo/margin_mean": 331.36962890625, + "margin_dpo/margin_std": 464.6836853027344, + "step": 524 + }, + { + "KL/chosen_KL_mean": -519.85302734375, + "KL/mean": -731.243408203125, + "KL/rejected_KL_mean": -942.6337890625, + "KL/std": 482.66436767578125, + "epoch": 0.7709251101321586, + "fcm_dpo/beta": 0.0012153794523328543, + "fcm_dpo/delta": -0.12056128680706024, + "fcm_dpo/margin": 422.78076171875, + "fcm_dpo/q_t": 0.3845774531364441, + "grad_norm": 41.53816604614258, + "learning_rate": 7.689012058193384e-08, + "logits/chosen": -0.8437707424163818, + "logits/rejected": -0.8764776587486267, + "logps/chosen": -570.680908203125, + "logps/ref_chosen": -50.827857971191406, + "logps/ref_rejected": -100.05294036865234, + "logps/rejected": -1042.686767578125, + "loss": 1.0215, + "margin_dpo/margin_mean": 422.78076171875, + "margin_dpo/margin_std": 505.25018310546875, + "step": 525 + }, + { + "KL/chosen_KL_mean": -562.0816650390625, + "KL/mean": -768.6236572265625, + "KL/rejected_KL_mean": -975.165771484375, + "KL/std": 466.148193359375, + "epoch": 0.7723935389133627, + "fcm_dpo/beta": 0.001197699224576354, + "fcm_dpo/delta": -0.09957602620124817, + "fcm_dpo/margin": 413.0840148925781, + "fcm_dpo/q_t": 0.38653671741485596, + "grad_norm": 29.220232009887695, + "learning_rate": 7.596651350926836e-08, + "logits/chosen": -0.8780766725540161, + "logits/rejected": -0.8744189739227295, + "logps/chosen": -625.2489013671875, + "logps/ref_chosen": -63.167236328125, + "logps/ref_rejected": -86.30934143066406, + "logps/rejected": -1061.47509765625, + "loss": 1.0386, + "margin_dpo/margin_mean": 413.083984375, + "margin_dpo/margin_std": 507.3397521972656, + "step": 526 + }, + { + "KL/chosen_KL_mean": -590.2259521484375, + "KL/mean": -731.3975830078125, + "KL/rejected_KL_mean": -872.5692138671875, + "KL/std": 487.9652099609375, + "epoch": 0.7738619676945668, + "fcm_dpo/beta": 0.0011968073667958379, + "fcm_dpo/delta": 0.06430923938751221, + "fcm_dpo/margin": 282.34320068359375, + "fcm_dpo/q_t": 0.4214463233947754, + "grad_norm": 34.30127716064453, + "learning_rate": 7.504749238082414e-08, + "logits/chosen": -1.0278353691101074, + "logits/rejected": -0.9955443143844604, + "logps/chosen": -661.3546142578125, + "logps/ref_chosen": -71.12867736816406, + "logps/ref_rejected": -78.3425521850586, + "logps/rejected": -950.9118041992188, + "loss": 1.1354, + "margin_dpo/margin_mean": 282.34320068359375, + "margin_dpo/margin_std": 413.549560546875, + "step": 527 + }, + { + "KL/chosen_KL_mean": -612.8468017578125, + "KL/mean": -792.1258544921875, + "KL/rejected_KL_mean": -971.405029296875, + "KL/std": 480.26239013671875, + "epoch": 0.775330396475771, + "fcm_dpo/beta": 0.001196006080135703, + "fcm_dpo/delta": -0.030209090560674667, + "fcm_dpo/margin": 358.5581970214844, + "fcm_dpo/q_t": 0.40584173798561096, + "grad_norm": 41.24021530151367, + "learning_rate": 7.413308141366254e-08, + "logits/chosen": -0.9734677672386169, + "logits/rejected": -0.9565155506134033, + "logps/chosen": -680.9362182617188, + "logps/ref_chosen": -68.0894546508789, + "logps/ref_rejected": -93.91006469726562, + "logps/rejected": -1065.3150634765625, + "loss": 1.1046, + "margin_dpo/margin_mean": 358.5582275390625, + "margin_dpo/margin_std": 559.9374389648438, + "step": 528 + }, + { + "KL/chosen_KL_mean": -715.942626953125, + "KL/mean": -837.968994140625, + "KL/rejected_KL_mean": -959.995361328125, + "KL/std": 430.2584228515625, + "epoch": 0.7767988252569751, + "fcm_dpo/beta": 0.001211107592098415, + "fcm_dpo/delta": 0.10773831605911255, + "fcm_dpo/margin": 244.0526580810547, + "fcm_dpo/q_t": 0.4329761266708374, + "grad_norm": 45.17340087890625, + "learning_rate": 7.322330470336313e-08, + "logits/chosen": -0.9782444834709167, + "logits/rejected": -0.9867458939552307, + "logps/chosen": -771.517578125, + "logps/ref_chosen": -55.57495880126953, + "logps/ref_rejected": -89.20909118652344, + "logps/rejected": -1049.2044677734375, + "loss": 1.2136, + "margin_dpo/margin_mean": 244.05267333984375, + "margin_dpo/margin_std": 532.4437255859375, + "step": 529 + }, + { + "KL/chosen_KL_mean": -611.8297119140625, + "KL/mean": -803.12841796875, + "KL/rejected_KL_mean": -994.427001953125, + "KL/std": 524.01220703125, + "epoch": 0.7782672540381792, + "fcm_dpo/beta": 0.0012108308728784323, + "fcm_dpo/delta": -0.06629342585802078, + "fcm_dpo/margin": 382.5973205566406, + "fcm_dpo/q_t": 0.39872339367866516, + "grad_norm": 42.13739776611328, + "learning_rate": 7.231818622338822e-08, + "logits/chosen": -0.9113196730613708, + "logits/rejected": -0.9070870876312256, + "logps/chosen": -659.43115234375, + "logps/ref_chosen": -47.601417541503906, + "logps/ref_rejected": -87.2845230102539, + "logps/rejected": -1081.7115478515625, + "loss": 1.1077, + "margin_dpo/margin_mean": 382.59735107421875, + "margin_dpo/margin_std": 640.5091552734375, + "step": 530 + }, + { + "KL/chosen_KL_mean": -693.5, + "KL/mean": -858.4159545898438, + "KL/rejected_KL_mean": -1023.3319091796875, + "KL/std": 566.25390625, + "epoch": 0.7797356828193832, + "fcm_dpo/beta": 0.0012086308561265469, + "fcm_dpo/delta": 0.001310013234615326, + "fcm_dpo/margin": 329.83197021484375, + "fcm_dpo/q_t": 0.4118250906467438, + "grad_norm": 42.0710563659668, + "learning_rate": 7.141774982445147e-08, + "logits/chosen": -1.0113496780395508, + "logits/rejected": -0.9929705858230591, + "logps/chosen": -748.7460327148438, + "logps/ref_chosen": -55.246063232421875, + "logps/ref_rejected": -70.60598754882812, + "logps/rejected": -1093.93798828125, + "loss": 1.1228, + "margin_dpo/margin_mean": 329.83197021484375, + "margin_dpo/margin_std": 535.4554443359375, + "step": 531 + }, + { + "KL/chosen_KL_mean": -689.9940185546875, + "KL/mean": -870.5237426757812, + "KL/rejected_KL_mean": -1051.053466796875, + "KL/std": 547.52587890625, + "epoch": 0.7812041116005873, + "fcm_dpo/beta": 0.0011906104627996683, + "fcm_dpo/delta": -0.03280455619096756, + "fcm_dpo/margin": 361.05938720703125, + "fcm_dpo/q_t": 0.4059098958969116, + "grad_norm": 65.42340850830078, + "learning_rate": 7.052201923388953e-08, + "logits/chosen": -0.954756498336792, + "logits/rejected": -0.9292545914649963, + "logps/chosen": -760.280029296875, + "logps/ref_chosen": -70.28601837158203, + "logps/ref_rejected": -86.5913314819336, + "logps/rejected": -1137.644775390625, + "loss": 1.1397, + "margin_dpo/margin_mean": 361.05938720703125, + "margin_dpo/margin_std": 642.376953125, + "step": 532 + }, + { + "KL/chosen_KL_mean": -603.1324462890625, + "KL/mean": -732.3844604492188, + "KL/rejected_KL_mean": -861.636474609375, + "KL/std": 453.2569580078125, + "epoch": 0.7826725403817915, + "fcm_dpo/beta": 0.0011966102756559849, + "fcm_dpo/delta": -0.0130624333396554, + "fcm_dpo/margin": 258.5040283203125, + "fcm_dpo/q_t": 0.4301643371582031, + "grad_norm": 43.53575897216797, + "learning_rate": 6.963101805503646e-08, + "logits/chosen": -0.9747976064682007, + "logits/rejected": -0.9492688775062561, + "logps/chosen": -667.987548828125, + "logps/ref_chosen": -64.8551025390625, + "logps/ref_rejected": -76.58805847167969, + "logps/rejected": -938.2244873046875, + "loss": 1.1999, + "margin_dpo/margin_mean": 258.5040283203125, + "margin_dpo/margin_std": 543.6436767578125, + "step": 533 + }, + { + "KL/chosen_KL_mean": -645.685791015625, + "KL/mean": -820.00927734375, + "KL/rejected_KL_mean": -994.332763671875, + "KL/std": 505.59075927734375, + "epoch": 0.7841409691629956, + "fcm_dpo/beta": 0.0011840970255434513, + "fcm_dpo/delta": -0.014506392180919647, + "fcm_dpo/margin": 348.64691162109375, + "fcm_dpo/q_t": 0.4072118401527405, + "grad_norm": 40.776493072509766, + "learning_rate": 6.874476976660184e-08, + "logits/chosen": -0.9978982210159302, + "logits/rejected": -0.9965052008628845, + "logps/chosen": -705.80517578125, + "logps/ref_chosen": -60.119388580322266, + "logps/ref_rejected": -78.54347229003906, + "logps/rejected": -1072.876220703125, + "loss": 1.1036, + "margin_dpo/margin_mean": 348.64691162109375, + "margin_dpo/margin_std": 510.9076232910156, + "step": 534 + }, + { + "KL/chosen_KL_mean": -552.3275146484375, + "KL/mean": -750.717529296875, + "KL/rejected_KL_mean": -949.107666015625, + "KL/std": 493.9169921875, + "epoch": 0.7856093979441997, + "fcm_dpo/beta": 0.0011865987908095121, + "fcm_dpo/delta": -0.07477213442325592, + "fcm_dpo/margin": 396.780029296875, + "fcm_dpo/q_t": 0.39498764276504517, + "grad_norm": 30.75901222229004, + "learning_rate": 6.786329772205246e-08, + "logits/chosen": -0.9087494611740112, + "logits/rejected": -0.90961092710495, + "logps/chosen": -606.6577758789062, + "logps/ref_chosen": -54.330238342285156, + "logps/ref_rejected": -96.30763244628906, + "logps/rejected": -1045.415283203125, + "loss": 1.061, + "margin_dpo/margin_mean": 396.780029296875, + "margin_dpo/margin_std": 520.8341064453125, + "step": 535 + }, + { + "KL/chosen_KL_mean": -481.53271484375, + "KL/mean": -724.086181640625, + "KL/rejected_KL_mean": -966.6396484375, + "KL/std": 552.1593627929688, + "epoch": 0.7870778267254038, + "fcm_dpo/beta": 0.0011459384113550186, + "fcm_dpo/delta": -0.16502085328102112, + "fcm_dpo/margin": 485.10699462890625, + "fcm_dpo/q_t": 0.38111627101898193, + "grad_norm": 33.95513916015625, + "learning_rate": 6.698662514899638e-08, + "logits/chosen": -0.8919925689697266, + "logits/rejected": -0.9215620756149292, + "logps/chosen": -528.61328125, + "logps/ref_chosen": -47.08053207397461, + "logps/ref_rejected": -89.09783935546875, + "logps/rejected": -1055.737548828125, + "loss": 1.0176, + "margin_dpo/margin_mean": 485.10693359375, + "margin_dpo/margin_std": 655.8091430664062, + "step": 536 + }, + { + "KL/chosen_KL_mean": -525.6631469726562, + "KL/mean": -683.4346923828125, + "KL/rejected_KL_mean": -841.2061767578125, + "KL/std": 446.1978454589844, + "epoch": 0.788546255506608, + "fcm_dpo/beta": 0.0011480746325105429, + "fcm_dpo/delta": 0.038443662226200104, + "fcm_dpo/margin": 315.5430603027344, + "fcm_dpo/q_t": 0.4170358180999756, + "grad_norm": 48.758907318115234, + "learning_rate": 6.611477514857114e-08, + "logits/chosen": -0.9522344470024109, + "logits/rejected": -0.9362703561782837, + "logps/chosen": -583.41064453125, + "logps/ref_chosen": -57.747467041015625, + "logps/ref_rejected": -70.43838500976562, + "logps/rejected": -911.6445922851562, + "loss": 1.1505, + "margin_dpo/margin_mean": 315.5430603027344, + "margin_dpo/margin_std": 543.536376953125, + "step": 537 + }, + { + "KL/chosen_KL_mean": -643.0128784179688, + "KL/mean": -829.154541015625, + "KL/rejected_KL_mean": -1015.29638671875, + "KL/std": 483.1163330078125, + "epoch": 0.7900146842878121, + "fcm_dpo/beta": 0.0011375262401998043, + "fcm_dpo/delta": -0.02470758929848671, + "fcm_dpo/margin": 372.283447265625, + "fcm_dpo/q_t": 0.40403687953948975, + "grad_norm": 32.13328170776367, + "learning_rate": 6.524777069483525e-08, + "logits/chosen": -0.9377896785736084, + "logits/rejected": -0.9210348129272461, + "logps/chosen": -709.4288330078125, + "logps/ref_chosen": -66.41594696044922, + "logps/ref_rejected": -84.22808837890625, + "logps/rejected": -1099.5244140625, + "loss": 1.0819, + "margin_dpo/margin_mean": 372.283447265625, + "margin_dpo/margin_std": 503.72845458984375, + "step": 538 + }, + { + "KL/chosen_KL_mean": -555.8558349609375, + "KL/mean": -729.8284912109375, + "KL/rejected_KL_mean": -903.8011474609375, + "KL/std": 412.5657653808594, + "epoch": 0.7914831130690162, + "fcm_dpo/beta": 0.00114156911149621, + "fcm_dpo/delta": 0.002832382917404175, + "fcm_dpo/margin": 347.9453430175781, + "fcm_dpo/q_t": 0.40886110067367554, + "grad_norm": 33.58725357055664, + "learning_rate": 6.438563463416221e-08, + "logits/chosen": -0.9776486158370972, + "logits/rejected": -0.9680448174476624, + "logps/chosen": -614.3486328125, + "logps/ref_chosen": -58.492855072021484, + "logps/ref_rejected": -91.85395050048828, + "logps/rejected": -995.6550903320312, + "loss": 1.0923, + "margin_dpo/margin_mean": 347.9453430175781, + "margin_dpo/margin_std": 457.59783935546875, + "step": 539 + }, + { + "KL/chosen_KL_mean": -555.8110961914062, + "KL/mean": -786.366455078125, + "KL/rejected_KL_mean": -1016.921875, + "KL/std": 521.7627563476562, + "epoch": 0.7929515418502202, + "fcm_dpo/beta": 0.0011229969095438719, + "fcm_dpo/delta": -0.12408408522605896, + "fcm_dpo/margin": 461.1107177734375, + "fcm_dpo/q_t": 0.38754981756210327, + "grad_norm": 32.977603912353516, + "learning_rate": 6.352838968463919e-08, + "logits/chosen": -0.9218310713768005, + "logits/rejected": -0.944137692451477, + "logps/chosen": -619.2936401367188, + "logps/ref_chosen": -63.482513427734375, + "logps/ref_rejected": -116.42999267578125, + "logps/rejected": -1133.351806640625, + "loss": 1.0431, + "margin_dpo/margin_mean": 461.1107177734375, + "margin_dpo/margin_std": 613.2233276367188, + "step": 540 + }, + { + "KL/chosen_KL_mean": -667.7610473632812, + "KL/mean": -789.4322509765625, + "KL/rejected_KL_mean": -911.1033935546875, + "KL/std": 467.2005920410156, + "epoch": 0.7944199706314243, + "fcm_dpo/beta": 0.0011111920466646552, + "fcm_dpo/delta": -0.0019461165647953749, + "fcm_dpo/margin": 243.34225463867188, + "fcm_dpo/q_t": 0.43856528401374817, + "grad_norm": 51.01187515258789, + "learning_rate": 6.267605843546767e-08, + "logits/chosen": -1.004181981086731, + "logits/rejected": -0.992661714553833, + "logps/chosen": -746.0413818359375, + "logps/ref_chosen": -78.28036499023438, + "logps/ref_rejected": -103.273681640625, + "logps/rejected": -1014.3770751953125, + "loss": 1.2244, + "margin_dpo/margin_mean": 243.34228515625, + "margin_dpo/margin_std": 547.9241943359375, + "step": 541 + }, + { + "KL/chosen_KL_mean": -576.544677734375, + "KL/mean": -803.12255859375, + "KL/rejected_KL_mean": -1029.7003173828125, + "KL/std": 522.02685546875, + "epoch": 0.7958883994126285, + "fcm_dpo/beta": 0.001085467985831201, + "fcm_dpo/delta": -0.09905168414115906, + "fcm_dpo/margin": 453.1556091308594, + "fcm_dpo/q_t": 0.39055657386779785, + "grad_norm": 47.77986145019531, + "learning_rate": 6.182866334636888e-08, + "logits/chosen": -0.9915866255760193, + "logits/rejected": -1.0235321521759033, + "logps/chosen": -634.0296630859375, + "logps/ref_chosen": -57.48497009277344, + "logps/ref_rejected": -96.47506713867188, + "logps/rejected": -1126.1754150390625, + "loss": 1.0578, + "margin_dpo/margin_mean": 453.1556091308594, + "margin_dpo/margin_std": 612.7730712890625, + "step": 542 + }, + { + "KL/chosen_KL_mean": -665.509521484375, + "KL/mean": -829.4156494140625, + "KL/rejected_KL_mean": -993.3218994140625, + "KL/std": 642.0631103515625, + "epoch": 0.7973568281938326, + "fcm_dpo/beta": 0.001093997503630817, + "fcm_dpo/delta": 0.042909275740385056, + "fcm_dpo/margin": 327.8123474121094, + "fcm_dpo/q_t": 0.4311785399913788, + "grad_norm": 30.012170791625977, + "learning_rate": 6.098622674699147e-08, + "logits/chosen": -0.9615781307220459, + "logits/rejected": -0.9914584159851074, + "logps/chosen": -726.1270141601562, + "logps/ref_chosen": -60.61750793457031, + "logps/ref_rejected": -105.59896850585938, + "logps/rejected": -1098.9208984375, + "loss": 1.1968, + "margin_dpo/margin_mean": 327.8123474121094, + "margin_dpo/margin_std": 748.0994262695312, + "step": 543 + }, + { + "KL/chosen_KL_mean": -667.9552001953125, + "KL/mean": -856.1734008789062, + "KL/rejected_KL_mean": -1044.3916015625, + "KL/std": 508.0960693359375, + "epoch": 0.7988252569750367, + "fcm_dpo/beta": 0.0010954445460811257, + "fcm_dpo/delta": -0.012936984188854694, + "fcm_dpo/margin": 376.4364013671875, + "fcm_dpo/q_t": 0.4067176282405853, + "grad_norm": 32.4089469909668, + "learning_rate": 6.01487708363232e-08, + "logits/chosen": -0.9522734880447388, + "logits/rejected": -0.9718469381332397, + "logps/chosen": -727.5975341796875, + "logps/ref_chosen": -59.642303466796875, + "logps/ref_rejected": -100.95469665527344, + "logps/rejected": -1145.34619140625, + "loss": 1.1045, + "margin_dpo/margin_mean": 376.4364013671875, + "margin_dpo/margin_std": 568.992919921875, + "step": 544 + }, + { + "KL/chosen_KL_mean": -619.7081298828125, + "KL/mean": -844.5244140625, + "KL/rejected_KL_mean": -1069.3408203125, + "KL/std": 505.60400390625, + "epoch": 0.8002936857562408, + "fcm_dpo/beta": 0.0010812245309352875, + "fcm_dpo/delta": -0.09056208282709122, + "fcm_dpo/margin": 449.6327209472656, + "fcm_dpo/q_t": 0.3911089301109314, + "grad_norm": 32.737972259521484, + "learning_rate": 5.9316317682106294e-08, + "logits/chosen": -0.9047988653182983, + "logits/rejected": -0.938023567199707, + "logps/chosen": -687.356689453125, + "logps/ref_chosen": -67.64859771728516, + "logps/ref_rejected": -95.90800476074219, + "logps/rejected": -1165.248779296875, + "loss": 1.0441, + "margin_dpo/margin_mean": 449.6326904296875, + "margin_dpo/margin_std": 570.2744140625, + "step": 545 + }, + { + "KL/chosen_KL_mean": -595.77880859375, + "KL/mean": -746.6336669921875, + "KL/rejected_KL_mean": -897.488525390625, + "KL/std": 452.455322265625, + "epoch": 0.801762114537445, + "fcm_dpo/beta": 0.0010882640490308404, + "fcm_dpo/delta": 0.07409149408340454, + "fcm_dpo/margin": 301.70965576171875, + "fcm_dpo/q_t": 0.4228670001029968, + "grad_norm": 35.74420928955078, + "learning_rate": 5.848888922025552e-08, + "logits/chosen": -0.9621337652206421, + "logits/rejected": -0.9499717354774475, + "logps/chosen": -646.5230712890625, + "logps/ref_chosen": -50.744232177734375, + "logps/ref_rejected": -81.86622619628906, + "logps/rejected": -979.354736328125, + "loss": 1.1537, + "margin_dpo/margin_mean": 301.70965576171875, + "margin_dpo/margin_std": 491.32568359375, + "step": 546 + }, + { + "KL/chosen_KL_mean": -605.8523559570312, + "KL/mean": -803.1954345703125, + "KL/rejected_KL_mean": -1000.5384521484375, + "KL/std": 501.6481628417969, + "epoch": 0.8032305433186491, + "fcm_dpo/beta": 0.0010885847732424736, + "fcm_dpo/delta": -0.030972033739089966, + "fcm_dpo/margin": 394.6861267089844, + "fcm_dpo/q_t": 0.40285325050354004, + "grad_norm": 43.03551483154297, + "learning_rate": 5.7666507254280265e-08, + "logits/chosen": -0.9044293165206909, + "logits/rejected": -0.9168886542320251, + "logps/chosen": -679.5400390625, + "logps/ref_chosen": -73.6877212524414, + "logps/ref_rejected": -90.76136779785156, + "logps/rejected": -1091.2998046875, + "loss": 1.0824, + "margin_dpo/margin_mean": 394.68609619140625, + "margin_dpo/margin_std": 543.5963745117188, + "step": 547 + }, + { + "KL/chosen_KL_mean": -626.2013549804688, + "KL/mean": -800.2800903320312, + "KL/rejected_KL_mean": -974.35888671875, + "KL/std": 522.8885498046875, + "epoch": 0.8046989720998532, + "fcm_dpo/beta": 0.001088649732992053, + "fcm_dpo/delta": 0.021810464560985565, + "fcm_dpo/margin": 348.1575622558594, + "fcm_dpo/q_t": 0.41777533292770386, + "grad_norm": 30.352699279785156, + "learning_rate": 5.684919345471029e-08, + "logits/chosen": -0.9622275829315186, + "logits/rejected": -0.9603374004364014, + "logps/chosen": -691.4476928710938, + "logps/ref_chosen": -65.24634552001953, + "logps/ref_rejected": -94.11807250976562, + "logps/rejected": -1068.47705078125, + "loss": 1.1264, + "margin_dpo/margin_mean": 348.1575622558594, + "margin_dpo/margin_std": 577.4847412109375, + "step": 548 + }, + { + "KL/chosen_KL_mean": -650.4908447265625, + "KL/mean": -785.2673950195312, + "KL/rejected_KL_mean": -920.0439453125, + "KL/std": 439.36041259765625, + "epoch": 0.8061674008810573, + "fcm_dpo/beta": 0.0010918962070718408, + "fcm_dpo/delta": 0.014238527044653893, + "fcm_dpo/margin": 269.5531005859375, + "fcm_dpo/q_t": 0.43372684717178345, + "grad_norm": 48.91337203979492, + "learning_rate": 5.603696935852426e-08, + "logits/chosen": -0.9819549918174744, + "logits/rejected": -0.9728246927261353, + "logps/chosen": -699.7032470703125, + "logps/ref_chosen": -49.21235656738281, + "logps/ref_rejected": -73.91031646728516, + "logps/rejected": -993.9542846679688, + "loss": 1.1922, + "margin_dpo/margin_mean": 269.5531005859375, + "margin_dpo/margin_std": 526.7017822265625, + "step": 549 + }, + { + "KL/chosen_KL_mean": -643.39013671875, + "KL/mean": -804.6975708007812, + "KL/rejected_KL_mean": -966.0050048828125, + "KL/std": 482.32452392578125, + "epoch": 0.8076358296622613, + "fcm_dpo/beta": 0.0010998416692018509, + "fcm_dpo/delta": 0.04686359316110611, + "fcm_dpo/margin": 322.6148376464844, + "fcm_dpo/q_t": 0.41901546716690063, + "grad_norm": 45.43336868286133, + "learning_rate": 5.5229856368582376e-08, + "logits/chosen": -0.9264281988143921, + "logits/rejected": -0.9501577615737915, + "logps/chosen": -700.1971435546875, + "logps/ref_chosen": -56.80695343017578, + "logps/ref_rejected": -95.12580871582031, + "logps/rejected": -1061.130859375, + "loss": 1.1343, + "margin_dpo/margin_mean": 322.6148376464844, + "margin_dpo/margin_std": 508.62127685546875, + "step": 550 + }, + { + "KL/chosen_KL_mean": -564.6527099609375, + "KL/mean": -833.1539306640625, + "KL/rejected_KL_mean": -1101.6552734375, + "KL/std": 532.0479736328125, + "epoch": 0.8091042584434655, + "fcm_dpo/beta": 0.0010715980315580964, + "fcm_dpo/delta": -0.18658655881881714, + "fcm_dpo/margin": 537.0025634765625, + "fcm_dpo/q_t": 0.3684789538383484, + "grad_norm": 60.08759689331055, + "learning_rate": 5.4427875753062734e-08, + "logits/chosen": -0.9065227508544922, + "logits/rejected": -0.9625868797302246, + "logps/chosen": -623.759033203125, + "logps/ref_chosen": -59.10633087158203, + "logps/ref_rejected": -111.67280578613281, + "logps/rejected": -1213.3280029296875, + "loss": 0.9628, + "margin_dpo/margin_mean": 537.0025634765625, + "margin_dpo/margin_std": 528.0545654296875, + "step": 551 + }, + { + "KL/chosen_KL_mean": -535.4678344726562, + "KL/mean": -828.958984375, + "KL/rejected_KL_mean": -1122.449951171875, + "KL/std": 597.85693359375, + "epoch": 0.8105726872246696, + "fcm_dpo/beta": 0.0010204364079982042, + "fcm_dpo/delta": -0.21537676453590393, + "fcm_dpo/margin": 586.982177734375, + "fcm_dpo/q_t": 0.3694334626197815, + "grad_norm": 48.343841552734375, + "learning_rate": 5.363104864490034e-08, + "logits/chosen": -0.937026858329773, + "logits/rejected": -0.9775291681289673, + "logps/chosen": -597.8224487304688, + "logps/ref_chosen": -62.35459899902344, + "logps/ref_rejected": -104.56210327148438, + "logps/rejected": -1227.0120849609375, + "loss": 0.9784, + "margin_dpo/margin_mean": 586.982177734375, + "margin_dpo/margin_std": 677.5487060546875, + "step": 552 + }, + { + "KL/chosen_KL_mean": -622.9216918945312, + "KL/mean": -777.06005859375, + "KL/rejected_KL_mean": -931.198486328125, + "KL/std": 503.83489990234375, + "epoch": 0.8120411160058737, + "fcm_dpo/beta": 0.0010282043367624283, + "fcm_dpo/delta": 0.08580633997917175, + "fcm_dpo/margin": 308.27679443359375, + "fcm_dpo/q_t": 0.43020299077033997, + "grad_norm": 28.72612762451172, + "learning_rate": 5.2839396041230415e-08, + "logits/chosen": -0.9406133890151978, + "logits/rejected": -0.9367384910583496, + "logps/chosen": -691.1805419921875, + "logps/ref_chosen": -68.25881958007812, + "logps/ref_rejected": -98.0971450805664, + "logps/rejected": -1029.295654296875, + "loss": 1.168, + "margin_dpo/margin_mean": 308.27679443359375, + "margin_dpo/margin_std": 553.154541015625, + "step": 553 + }, + { + "KL/chosen_KL_mean": -654.8176879882812, + "KL/mean": -860.4717407226562, + "KL/rejected_KL_mean": -1066.125732421875, + "KL/std": 554.7606811523438, + "epoch": 0.8135095447870778, + "fcm_dpo/beta": 0.00103902374394238, + "fcm_dpo/delta": -0.029413558542728424, + "fcm_dpo/margin": 411.3079833984375, + "fcm_dpo/q_t": 0.4067898094654083, + "grad_norm": 94.50716400146484, + "learning_rate": 5.205293880283551e-08, + "logits/chosen": -0.9185788035392761, + "logits/rejected": -0.8953433036804199, + "logps/chosen": -722.765380859375, + "logps/ref_chosen": -67.94767761230469, + "logps/ref_rejected": -89.78272247314453, + "logps/rejected": -1155.908447265625, + "loss": 1.1295, + "margin_dpo/margin_mean": 411.3079833984375, + "margin_dpo/margin_std": 707.3028564453125, + "step": 554 + }, + { + "KL/chosen_KL_mean": -656.3858642578125, + "KL/mean": -897.638916015625, + "KL/rejected_KL_mean": -1138.89208984375, + "KL/std": 570.92724609375, + "epoch": 0.8149779735682819, + "fcm_dpo/beta": 0.0010173844639211893, + "fcm_dpo/delta": -0.09548080712556839, + "fcm_dpo/margin": 482.5062255859375, + "fcm_dpo/q_t": 0.39459365606307983, + "grad_norm": 39.124813079833984, + "learning_rate": 5.127169765359515e-08, + "logits/chosen": -0.9555931687355042, + "logits/rejected": -1.0082941055297852, + "logps/chosen": -709.71630859375, + "logps/ref_chosen": -53.33049011230469, + "logps/ref_rejected": -108.47937774658203, + "logps/rejected": -1247.3714599609375, + "loss": 1.0765, + "margin_dpo/margin_mean": 482.5062255859375, + "margin_dpo/margin_std": 734.37890625, + "step": 555 + }, + { + "KL/chosen_KL_mean": -621.6959228515625, + "KL/mean": -768.7464599609375, + "KL/rejected_KL_mean": -915.7969970703125, + "KL/std": 436.3918762207031, + "epoch": 0.8164464023494861, + "fcm_dpo/beta": 0.0010262987343594432, + "fcm_dpo/delta": 0.10118204355239868, + "fcm_dpo/margin": 294.1011047363281, + "fcm_dpo/q_t": 0.4302240312099457, + "grad_norm": 35.36925506591797, + "learning_rate": 5.049569317994012e-08, + "logits/chosen": -0.9388109445571899, + "logits/rejected": -0.9352363348007202, + "logps/chosen": -680.3403930664062, + "logps/ref_chosen": -58.64447021484375, + "logps/ref_rejected": -101.34040832519531, + "logps/rejected": -1017.137451171875, + "loss": 1.1551, + "margin_dpo/margin_mean": 294.1011047363281, + "margin_dpo/margin_std": 439.78289794921875, + "step": 556 + }, + { + "KL/chosen_KL_mean": -696.53369140625, + "KL/mean": -917.8419799804688, + "KL/rejected_KL_mean": -1139.150390625, + "KL/std": 616.8480224609375, + "epoch": 0.8179148311306902, + "fcm_dpo/beta": 0.0010207702871412039, + "fcm_dpo/delta": -0.05446251481771469, + "fcm_dpo/margin": 442.61669921875, + "fcm_dpo/q_t": 0.401253342628479, + "grad_norm": 69.26335906982422, + "learning_rate": 4.9724945830310144e-08, + "logits/chosen": -0.9779127836227417, + "logits/rejected": -1.0124623775482178, + "logps/chosen": -764.374267578125, + "logps/ref_chosen": -67.84066009521484, + "logps/ref_rejected": -109.93965911865234, + "logps/rejected": -1249.090087890625, + "loss": 1.1, + "margin_dpo/margin_mean": 442.61669921875, + "margin_dpo/margin_std": 697.1099853515625, + "step": 557 + }, + { + "KL/chosen_KL_mean": -624.0965576171875, + "KL/mean": -916.3228759765625, + "KL/rejected_KL_mean": -1208.549072265625, + "KL/std": 568.7822265625, + "epoch": 0.8193832599118943, + "fcm_dpo/beta": 0.0009917229181155562, + "fcm_dpo/delta": -0.19064576923847198, + "fcm_dpo/margin": 584.4525146484375, + "fcm_dpo/q_t": 0.3662768006324768, + "grad_norm": 35.57923126220703, + "learning_rate": 4.8959475914614554e-08, + "logits/chosen": -1.0224618911743164, + "logits/rejected": -1.0410199165344238, + "logps/chosen": -686.46484375, + "logps/ref_chosen": -62.36824035644531, + "logps/ref_rejected": -102.16102600097656, + "logps/rejected": -1310.710205078125, + "loss": 0.9776, + "margin_dpo/margin_mean": 584.4525146484375, + "margin_dpo/margin_std": 628.419677734375, + "step": 558 + }, + { + "KL/chosen_KL_mean": -702.362060546875, + "KL/mean": -949.4503173828125, + "KL/rejected_KL_mean": -1196.53857421875, + "KL/std": 592.9994506835938, + "epoch": 0.8208516886930984, + "fcm_dpo/beta": 0.000971162342466414, + "fcm_dpo/delta": -0.08387550711631775, + "fcm_dpo/margin": 494.1765441894531, + "fcm_dpo/q_t": 0.39221078157424927, + "grad_norm": 28.913423538208008, + "learning_rate": 4.8199303603697614e-08, + "logits/chosen": -1.1046611070632935, + "logits/rejected": -1.1130573749542236, + "logps/chosen": -763.1143798828125, + "logps/ref_chosen": -60.752323150634766, + "logps/ref_rejected": -93.44229125976562, + "logps/rejected": -1289.9808349609375, + "loss": 1.0485, + "margin_dpo/margin_mean": 494.1765441894531, + "margin_dpo/margin_std": 634.8758544921875, + "step": 559 + }, + { + "KL/chosen_KL_mean": -646.1812744140625, + "KL/mean": -825.5607299804688, + "KL/rejected_KL_mean": -1004.9402465820312, + "KL/std": 523.7559204101562, + "epoch": 0.8223201174743024, + "fcm_dpo/beta": 0.0009687429992482066, + "fcm_dpo/delta": 0.05413452535867691, + "fcm_dpo/margin": 358.7589111328125, + "fcm_dpo/q_t": 0.42102909088134766, + "grad_norm": 29.267030715942383, + "learning_rate": 4.7444448928806615e-08, + "logits/chosen": -0.9093506336212158, + "logits/rejected": -0.8941901326179504, + "logps/chosen": -704.2850952148438, + "logps/ref_chosen": -58.10382080078125, + "logps/ref_rejected": -79.99122619628906, + "logps/rejected": -1084.931396484375, + "loss": 1.1422, + "margin_dpo/margin_mean": 358.7589111328125, + "margin_dpo/margin_std": 570.3694458007812, + "step": 560 + }, + { + "KL/chosen_KL_mean": -744.1492919921875, + "KL/mean": -893.382568359375, + "KL/rejected_KL_mean": -1042.61572265625, + "KL/std": 518.4768676757812, + "epoch": 0.8237885462555066, + "fcm_dpo/beta": 0.0009945239871740341, + "fcm_dpo/delta": 0.10583681613206863, + "fcm_dpo/margin": 298.46636962890625, + "fcm_dpo/q_t": 0.4313068687915802, + "grad_norm": 46.64256286621094, + "learning_rate": 4.669493178106432e-08, + "logits/chosen": -1.0665897130966187, + "logits/rejected": -1.0877900123596191, + "logps/chosen": -795.062255859375, + "logps/ref_chosen": -50.912879943847656, + "logps/ref_rejected": -99.06856536865234, + "logps/rejected": -1141.684326171875, + "loss": 1.2066, + "margin_dpo/margin_mean": 298.46636962890625, + "margin_dpo/margin_std": 644.6820678710938, + "step": 561 + }, + { + "KL/chosen_KL_mean": -701.2473754882812, + "KL/mean": -912.952392578125, + "KL/rejected_KL_mean": -1124.657470703125, + "KL/std": 575.4769287109375, + "epoch": 0.8252569750367107, + "fcm_dpo/beta": 0.0009885327890515327, + "fcm_dpo/delta": -0.02009068801999092, + "fcm_dpo/margin": 423.4101257324219, + "fcm_dpo/q_t": 0.4067729115486145, + "grad_norm": 40.539154052734375, + "learning_rate": 4.5950771910944596e-08, + "logits/chosen": -1.0031108856201172, + "logits/rejected": -1.0146968364715576, + "logps/chosen": -760.7117919921875, + "logps/ref_chosen": -59.46440124511719, + "logps/ref_rejected": -96.54266357421875, + "logps/rejected": -1221.2001953125, + "loss": 1.0985, + "margin_dpo/margin_mean": 423.41015625, + "margin_dpo/margin_std": 631.9222412109375, + "step": 562 + }, + { + "KL/chosen_KL_mean": -767.9593505859375, + "KL/mean": -925.231689453125, + "KL/rejected_KL_mean": -1082.504150390625, + "KL/std": 606.1444091796875, + "epoch": 0.8267254038179148, + "fcm_dpo/beta": 0.0009880930883809924, + "fcm_dpo/delta": -0.04907416179776192, + "fcm_dpo/margin": 314.54473876953125, + "fcm_dpo/q_t": 0.42376065254211426, + "grad_norm": 46.25846481323242, + "learning_rate": 4.521198892775202e-08, + "logits/chosen": -0.9742862582206726, + "logits/rejected": -0.9843175411224365, + "logps/chosen": -828.5675659179688, + "logps/ref_chosen": -60.60819625854492, + "logps/ref_rejected": -94.56770324707031, + "logps/rejected": -1177.07177734375, + "loss": 1.2224, + "margin_dpo/margin_mean": 314.54473876953125, + "margin_dpo/margin_std": 707.6341552734375, + "step": 563 + }, + { + "KL/chosen_KL_mean": -685.3291015625, + "KL/mean": -886.4181518554688, + "KL/rejected_KL_mean": -1087.50732421875, + "KL/std": 537.7996826171875, + "epoch": 0.8281938325991189, + "fcm_dpo/beta": 0.0009863328887149692, + "fcm_dpo/delta": 0.0033044088631868362, + "fcm_dpo/margin": 402.17816162109375, + "fcm_dpo/q_t": 0.40960630774497986, + "grad_norm": 47.38982009887695, + "learning_rate": 4.447860229910544e-08, + "logits/chosen": -1.0377906560897827, + "logits/rejected": -1.0290945768356323, + "logps/chosen": -759.5974731445312, + "logps/ref_chosen": -74.26837921142578, + "logps/ref_rejected": -93.23818969726562, + "logps/rejected": -1180.7454833984375, + "loss": 1.091, + "margin_dpo/margin_mean": 402.17816162109375, + "margin_dpo/margin_std": 517.3626098632812, + "step": 564 + }, + { + "KL/chosen_KL_mean": -727.7404174804688, + "KL/mean": -939.229736328125, + "KL/rejected_KL_mean": -1150.718994140625, + "KL/std": 615.11181640625, + "epoch": 0.8296622613803231, + "fcm_dpo/beta": 0.0009813096839934587, + "fcm_dpo/delta": -0.015731915831565857, + "fcm_dpo/margin": 422.97857666015625, + "fcm_dpo/q_t": 0.41049522161483765, + "grad_norm": 44.25437927246094, + "learning_rate": 4.375063135042445e-08, + "logits/chosen": -0.9896056652069092, + "logits/rejected": -0.9939931035041809, + "logps/chosen": -796.7603759765625, + "logps/ref_chosen": -69.0199203491211, + "logps/ref_rejected": -85.7789306640625, + "logps/rejected": -1236.4979248046875, + "loss": 1.1303, + "margin_dpo/margin_mean": 422.9785461425781, + "margin_dpo/margin_std": 737.2132568359375, + "step": 565 + }, + { + "KL/chosen_KL_mean": -672.3824462890625, + "KL/mean": -907.526611328125, + "KL/rejected_KL_mean": -1142.6708984375, + "KL/std": 631.5331420898438, + "epoch": 0.8311306901615272, + "fcm_dpo/beta": 0.0009774458594620228, + "fcm_dpo/delta": -0.06289710104465485, + "fcm_dpo/margin": 470.2884216308594, + "fcm_dpo/q_t": 0.3984670639038086, + "grad_norm": 35.89476013183594, + "learning_rate": 4.3028095264420525e-08, + "logits/chosen": -0.9764306545257568, + "logits/rejected": -1.0030491352081299, + "logps/chosen": -738.927734375, + "logps/ref_chosen": -66.5453109741211, + "logps/ref_rejected": -103.86932373046875, + "logps/rejected": -1246.5401611328125, + "loss": 1.0977, + "margin_dpo/margin_mean": 470.28839111328125, + "margin_dpo/margin_std": 727.6954345703125, + "step": 566 + }, + { + "KL/chosen_KL_mean": -654.3175048828125, + "KL/mean": -835.689453125, + "KL/rejected_KL_mean": -1017.0613403320312, + "KL/std": 442.9603576660156, + "epoch": 0.8325991189427313, + "fcm_dpo/beta": 0.0009747430449351668, + "fcm_dpo/delta": 0.04814485087990761, + "fcm_dpo/margin": 362.7438659667969, + "fcm_dpo/q_t": 0.41720670461654663, + "grad_norm": 37.64247512817383, + "learning_rate": 4.231101308059165e-08, + "logits/chosen": -1.0903135538101196, + "logits/rejected": -1.1013944149017334, + "logps/chosen": -707.17578125, + "logps/ref_chosen": -52.85829544067383, + "logps/ref_rejected": -85.37095642089844, + "logps/rejected": -1102.432373046875, + "loss": 1.1168, + "margin_dpo/margin_mean": 362.7438659667969, + "margin_dpo/margin_std": 488.75982666015625, + "step": 567 + }, + { + "KL/chosen_KL_mean": -640.649169921875, + "KL/mean": -880.9572143554688, + "KL/rejected_KL_mean": -1121.2652587890625, + "KL/std": 509.034912109375, + "epoch": 0.8340675477239354, + "fcm_dpo/beta": 0.0009669936262071133, + "fcm_dpo/delta": -0.06800977885723114, + "fcm_dpo/margin": 480.6161804199219, + "fcm_dpo/q_t": 0.39191970229148865, + "grad_norm": 32.80691146850586, + "learning_rate": 4.1599403694720145e-08, + "logits/chosen": -0.951264500617981, + "logits/rejected": -0.9899559020996094, + "logps/chosen": -685.841552734375, + "logps/ref_chosen": -45.1923828125, + "logps/ref_rejected": -89.09236907958984, + "logps/rejected": -1210.357666015625, + "loss": 1.0349, + "margin_dpo/margin_mean": 480.61614990234375, + "margin_dpo/margin_std": 529.2095947265625, + "step": 568 + }, + { + "KL/chosen_KL_mean": -726.1346435546875, + "KL/mean": -927.0540161132812, + "KL/rejected_KL_mean": -1127.973388671875, + "KL/std": 663.6150512695312, + "epoch": 0.8355359765051396, + "fcm_dpo/beta": 0.0009723026305437088, + "fcm_dpo/delta": 0.008742645382881165, + "fcm_dpo/margin": 401.8387451171875, + "fcm_dpo/q_t": 0.4109431803226471, + "grad_norm": 49.85871505737305, + "learning_rate": 4.089328585837512e-08, + "logits/chosen": -1.000624179840088, + "logits/rejected": -1.0081329345703125, + "logps/chosen": -789.855224609375, + "logps/ref_chosen": -63.72056198120117, + "logps/ref_rejected": -79.10325622558594, + "logps/rejected": -1207.07666015625, + "loss": 1.142, + "margin_dpo/margin_mean": 401.8387451171875, + "margin_dpo/margin_std": 688.9107055664062, + "step": 569 + }, + { + "KL/chosen_KL_mean": -679.8856811523438, + "KL/mean": -865.4696044921875, + "KL/rejected_KL_mean": -1051.053466796875, + "KL/std": 531.23974609375, + "epoch": 0.8370044052863436, + "fcm_dpo/beta": 0.000971082947216928, + "fcm_dpo/delta": 0.04106369987130165, + "fcm_dpo/margin": 371.16790771484375, + "fcm_dpo/q_t": 0.41922780871391296, + "grad_norm": 30.256921768188477, + "learning_rate": 4.019267817841834e-08, + "logits/chosen": -1.0755581855773926, + "logits/rejected": -1.0673818588256836, + "logps/chosen": -741.500244140625, + "logps/ref_chosen": -61.61454391479492, + "logps/ref_rejected": -82.14186096191406, + "logps/rejected": -1133.1954345703125, + "loss": 1.134, + "margin_dpo/margin_mean": 371.16790771484375, + "margin_dpo/margin_std": 586.2235717773438, + "step": 570 + }, + { + "KL/chosen_KL_mean": -704.7464599609375, + "KL/mean": -927.1207275390625, + "KL/rejected_KL_mean": -1149.4949951171875, + "KL/std": 556.0410766601562, + "epoch": 0.8384728340675477, + "fcm_dpo/beta": 0.0009676171466708183, + "fcm_dpo/delta": -0.031879834830760956, + "fcm_dpo/margin": 444.74859619140625, + "fcm_dpo/q_t": 0.4041333794593811, + "grad_norm": 37.00971221923828, + "learning_rate": 3.9497599116513705e-08, + "logits/chosen": -0.9698342680931091, + "logits/rejected": -0.984066903591156, + "logps/chosen": -757.800537109375, + "logps/ref_chosen": -53.05406188964844, + "logps/ref_rejected": -91.33682250976562, + "logps/rejected": -1240.831787109375, + "loss": 1.103, + "margin_dpo/margin_mean": 444.74853515625, + "margin_dpo/margin_std": 689.9317016601562, + "step": 571 + }, + { + "KL/chosen_KL_mean": -760.4070434570312, + "KL/mean": -986.117431640625, + "KL/rejected_KL_mean": -1211.82763671875, + "KL/std": 642.9850463867188, + "epoch": 0.8399412628487518, + "fcm_dpo/beta": 0.0009603890357539058, + "fcm_dpo/delta": -0.03531990945339203, + "fcm_dpo/margin": 451.42071533203125, + "fcm_dpo/q_t": 0.40735888481140137, + "grad_norm": 33.24798583984375, + "learning_rate": 3.880806698864086e-08, + "logits/chosen": -1.01137113571167, + "logits/rejected": -1.0419948101043701, + "logps/chosen": -808.8663330078125, + "logps/ref_chosen": -48.45928955078125, + "logps/ref_rejected": -83.55703735351562, + "logps/rejected": -1295.384765625, + "loss": 1.1207, + "margin_dpo/margin_mean": 451.42071533203125, + "margin_dpo/margin_std": 778.9022216796875, + "step": 572 + }, + { + "KL/chosen_KL_mean": -738.4405517578125, + "KL/mean": -941.948486328125, + "KL/rejected_KL_mean": -1145.4564208984375, + "KL/std": 572.2998657226562, + "epoch": 0.8414096916299559, + "fcm_dpo/beta": 0.0009635947062633932, + "fcm_dpo/delta": 0.008085294626653194, + "fcm_dpo/margin": 407.01580810546875, + "fcm_dpo/q_t": 0.4119398593902588, + "grad_norm": 30.2346248626709, + "learning_rate": 3.812409996461275e-08, + "logits/chosen": -1.0823559761047363, + "logits/rejected": -1.0929925441741943, + "logps/chosen": -790.063232421875, + "logps/ref_chosen": -51.62262725830078, + "logps/ref_rejected": -85.32499694824219, + "logps/rejected": -1230.7813720703125, + "loss": 1.0995, + "margin_dpo/margin_mean": 407.01580810546875, + "margin_dpo/margin_std": 571.386474609375, + "step": 573 + }, + { + "KL/chosen_KL_mean": -669.0106201171875, + "KL/mean": -891.2628784179688, + "KL/rejected_KL_mean": -1113.51513671875, + "KL/std": 523.285888671875, + "epoch": 0.8428781204111601, + "fcm_dpo/beta": 0.0009609279222786427, + "fcm_dpo/delta": -0.02838175743818283, + "fcm_dpo/margin": 444.5045166015625, + "fcm_dpo/q_t": 0.4030435085296631, + "grad_norm": 39.73578643798828, + "learning_rate": 3.74457160675965e-08, + "logits/chosen": -1.0616734027862549, + "logits/rejected": -1.087287187576294, + "logps/chosen": -720.0551147460938, + "logps/ref_chosen": -51.04446029663086, + "logps/ref_rejected": -92.80640411376953, + "logps/rejected": -1206.321533203125, + "loss": 1.0826, + "margin_dpo/margin_mean": 444.5045166015625, + "margin_dpo/margin_std": 604.4505004882812, + "step": 574 + }, + { + "KL/chosen_KL_mean": -715.7283325195312, + "KL/mean": -918.717529296875, + "KL/rejected_KL_mean": -1121.70654296875, + "KL/std": 521.6762084960938, + "epoch": 0.8443465491923642, + "fcm_dpo/beta": 0.0009517880389466882, + "fcm_dpo/delta": 0.01296766847372055, + "fcm_dpo/margin": 405.9783020019531, + "fcm_dpo/q_t": 0.41225284337997437, + "grad_norm": 34.28059005737305, + "learning_rate": 3.677293317363864e-08, + "logits/chosen": -0.95893394947052, + "logits/rejected": -0.9681143760681152, + "logps/chosen": -787.5184326171875, + "logps/ref_chosen": -71.7901382446289, + "logps/ref_rejected": -95.38619995117188, + "logps/rejected": -1217.0927734375, + "loss": 1.1349, + "margin_dpo/margin_mean": 405.97833251953125, + "margin_dpo/margin_std": 661.7513427734375, + "step": 575 + }, + { + "KL/chosen_KL_mean": -700.2211303710938, + "KL/mean": -855.5504150390625, + "KL/rejected_KL_mean": -1010.8797607421875, + "KL/std": 483.954345703125, + "epoch": 0.8458149779735683, + "fcm_dpo/beta": 0.0009712062310427427, + "fcm_dpo/delta": 0.10144974291324615, + "fcm_dpo/margin": 310.6585998535156, + "fcm_dpo/q_t": 0.4315390884876251, + "grad_norm": 37.09640884399414, + "learning_rate": 3.6105769011194224e-08, + "logits/chosen": -1.0683985948562622, + "logits/rejected": -1.0978965759277344, + "logps/chosen": -754.484130859375, + "logps/ref_chosen": -54.262962341308594, + "logps/ref_rejected": -100.75428009033203, + "logps/rejected": -1111.634033203125, + "loss": 1.1802, + "margin_dpo/margin_mean": 310.6585998535156, + "margin_dpo/margin_std": 565.0277709960938, + "step": 576 + }, + { + "KL/chosen_KL_mean": -617.461669921875, + "KL/mean": -820.9848022460938, + "KL/rejected_KL_mean": -1024.5079345703125, + "KL/std": 550.1454467773438, + "epoch": 0.8472834067547724, + "fcm_dpo/beta": 0.0009785243310034275, + "fcm_dpo/delta": 0.0017292937263846397, + "fcm_dpo/margin": 407.0462341308594, + "fcm_dpo/q_t": 0.4103270471096039, + "grad_norm": 28.927133560180664, + "learning_rate": 3.5444241160659304e-08, + "logits/chosen": -1.0405795574188232, + "logits/rejected": -1.0324490070343018, + "logps/chosen": -679.371337890625, + "logps/ref_chosen": -61.909706115722656, + "logps/ref_rejected": -84.07069396972656, + "logps/rejected": -1108.57861328125, + "loss": 1.1141, + "margin_dpo/margin_mean": 407.0462646484375, + "margin_dpo/margin_std": 594.7308959960938, + "step": 577 + }, + { + "KL/chosen_KL_mean": -613.3778076171875, + "KL/mean": -831.669189453125, + "KL/rejected_KL_mean": -1049.9605712890625, + "KL/std": 529.0792236328125, + "epoch": 0.8487518355359766, + "fcm_dpo/beta": 0.0009709839941933751, + "fcm_dpo/delta": -0.02570383995771408, + "fcm_dpo/margin": 436.5827331542969, + "fcm_dpo/q_t": 0.4028467535972595, + "grad_norm": 37.58174133300781, + "learning_rate": 3.478836705390808e-08, + "logits/chosen": -0.949306845664978, + "logits/rejected": -0.9812426567077637, + "logps/chosen": -662.6415405273438, + "logps/ref_chosen": -49.26368713378906, + "logps/ref_rejected": -83.4362564086914, + "logps/rejected": -1133.396728515625, + "loss": 1.073, + "margin_dpo/margin_mean": 436.5827331542969, + "margin_dpo/margin_std": 540.6303100585938, + "step": 578 + }, + { + "KL/chosen_KL_mean": -723.9249267578125, + "KL/mean": -857.285888671875, + "KL/rejected_KL_mean": -990.6468505859375, + "KL/std": 545.2241821289062, + "epoch": 0.8502202643171806, + "fcm_dpo/beta": 0.0009836649987846613, + "fcm_dpo/delta": 0.0445760153234005, + "fcm_dpo/margin": 266.721923828125, + "fcm_dpo/q_t": 0.43926477432250977, + "grad_norm": 57.85899353027344, + "learning_rate": 3.41381639738331e-08, + "logits/chosen": -1.0406593084335327, + "logits/rejected": -1.0456761121749878, + "logps/chosen": -782.810791015625, + "logps/ref_chosen": -58.88581848144531, + "logps/ref_rejected": -94.78762817382812, + "logps/rejected": -1085.4345703125, + "loss": 1.2246, + "margin_dpo/margin_mean": 266.721923828125, + "margin_dpo/margin_std": 599.7505493164062, + "step": 579 + }, + { + "KL/chosen_KL_mean": -523.7979125976562, + "KL/mean": -777.8994140625, + "KL/rejected_KL_mean": -1032.0008544921875, + "KL/std": 608.378662109375, + "epoch": 0.8516886930983847, + "fcm_dpo/beta": 0.0009660617797635496, + "fcm_dpo/delta": -0.09629727900028229, + "fcm_dpo/margin": 508.2030029296875, + "fcm_dpo/q_t": 0.39319556951522827, + "grad_norm": 28.836139678955078, + "learning_rate": 3.349364905389032e-08, + "logits/chosen": -0.8956875205039978, + "logits/rejected": -0.930424153804779, + "logps/chosen": -572.5047607421875, + "logps/ref_chosen": -48.70683670043945, + "logps/ref_rejected": -81.7583999633789, + "logps/rejected": -1113.75927734375, + "loss": 1.0505, + "margin_dpo/margin_mean": 508.2029724121094, + "margin_dpo/margin_std": 699.3994140625, + "step": 580 + }, + { + "KL/chosen_KL_mean": -719.4566650390625, + "KL/mean": -892.114501953125, + "KL/rejected_KL_mean": -1064.7723388671875, + "KL/std": 567.6749877929688, + "epoch": 0.8531571218795888, + "fcm_dpo/beta": 0.0009752740152180195, + "fcm_dpo/delta": 0.06534610688686371, + "fcm_dpo/margin": 345.31573486328125, + "fcm_dpo/q_t": 0.4253264367580414, + "grad_norm": 42.403324127197266, + "learning_rate": 3.285483927764726e-08, + "logits/chosen": -1.105149269104004, + "logits/rejected": -1.113175630569458, + "logps/chosen": -781.6790161132812, + "logps/ref_chosen": -62.22235107421875, + "logps/ref_rejected": -91.73568725585938, + "logps/rejected": -1156.508056640625, + "loss": 1.1609, + "margin_dpo/margin_mean": 345.31573486328125, + "margin_dpo/margin_std": 624.8870849609375, + "step": 581 + }, + { + "KL/chosen_KL_mean": -628.026611328125, + "KL/mean": -828.7762451171875, + "KL/rejected_KL_mean": -1029.52587890625, + "KL/std": 499.4111022949219, + "epoch": 0.8546255506607929, + "fcm_dpo/beta": 0.000987016363069415, + "fcm_dpo/delta": 0.00210629403591156, + "fcm_dpo/margin": 401.499267578125, + "fcm_dpo/q_t": 0.4085530638694763, + "grad_norm": 30.787132263183594, + "learning_rate": 3.222175147833556e-08, + "logits/chosen": -1.0466606616973877, + "logits/rejected": -1.0699677467346191, + "logps/chosen": -686.2553100585938, + "logps/ref_chosen": -58.228660583496094, + "logps/ref_rejected": -110.06959533691406, + "logps/rejected": -1139.595458984375, + "loss": 1.1048, + "margin_dpo/margin_mean": 401.499267578125, + "margin_dpo/margin_std": 544.4295654296875, + "step": 582 + }, + { + "KL/chosen_KL_mean": -720.7072143554688, + "KL/mean": -845.5337524414062, + "KL/rejected_KL_mean": -970.3602905273438, + "KL/std": 540.9931640625, + "epoch": 0.856093979441997, + "fcm_dpo/beta": 0.0009754466009326279, + "fcm_dpo/delta": -0.013105042278766632, + "fcm_dpo/margin": 249.6529998779297, + "fcm_dpo/q_t": 0.44551074504852295, + "grad_norm": 57.56175231933594, + "learning_rate": 3.159440233840763e-08, + "logits/chosen": -0.9852885007858276, + "logits/rejected": -0.9814597368240356, + "logps/chosen": -777.570068359375, + "logps/ref_chosen": -56.86286163330078, + "logps/ref_rejected": -88.4039306640625, + "logps/rejected": -1058.76416015625, + "loss": 1.2576, + "margin_dpo/margin_mean": 249.65298461914062, + "margin_dpo/margin_std": 645.1383056640625, + "step": 583 + }, + { + "KL/chosen_KL_mean": -624.9401245117188, + "KL/mean": -870.3372802734375, + "KL/rejected_KL_mean": -1115.734619140625, + "KL/std": 560.4860229492188, + "epoch": 0.8575624082232012, + "fcm_dpo/beta": 0.0009648328414186835, + "fcm_dpo/delta": -0.07715471088886261, + "fcm_dpo/margin": 490.79443359375, + "fcm_dpo/q_t": 0.3933956027030945, + "grad_norm": 40.76878356933594, + "learning_rate": 3.0972808389096635e-08, + "logits/chosen": -1.0293495655059814, + "logits/rejected": -1.0419096946716309, + "logps/chosen": -681.8408203125, + "logps/ref_chosen": -56.90068054199219, + "logps/ref_rejected": -97.63606262207031, + "logps/rejected": -1213.37060546875, + "loss": 1.0435, + "margin_dpo/margin_mean": 490.79443359375, + "margin_dpo/margin_std": 595.7258911132812, + "step": 584 + }, + { + "KL/chosen_KL_mean": -713.83837890625, + "KL/mean": -934.8936157226562, + "KL/rejected_KL_mean": -1155.948974609375, + "KL/std": 615.5269775390625, + "epoch": 0.8590308370044053, + "fcm_dpo/beta": 0.0009544256026856601, + "fcm_dpo/delta": -0.023169085383415222, + "fcm_dpo/margin": 442.110595703125, + "fcm_dpo/q_t": 0.40594780445098877, + "grad_norm": 35.20669937133789, + "learning_rate": 3.035698600998121e-08, + "logits/chosen": -1.0367913246154785, + "logits/rejected": -1.058849811553955, + "logps/chosen": -774.8123168945312, + "logps/ref_chosen": -60.973968505859375, + "logps/ref_rejected": -84.16952514648438, + "logps/rejected": -1240.118408203125, + "loss": 1.1181, + "margin_dpo/margin_mean": 442.110595703125, + "margin_dpo/margin_std": 730.5751953125, + "step": 585 + }, + { + "KL/chosen_KL_mean": -746.9095458984375, + "KL/mean": -891.6566162109375, + "KL/rejected_KL_mean": -1036.403564453125, + "KL/std": 537.6516723632812, + "epoch": 0.8604992657856094, + "fcm_dpo/beta": 0.0009719742811284959, + "fcm_dpo/delta": 0.12220651656389236, + "fcm_dpo/margin": 289.494140625, + "fcm_dpo/q_t": 0.4358428120613098, + "grad_norm": 31.211702346801758, + "learning_rate": 2.974695142855388e-08, + "logits/chosen": -1.0327489376068115, + "logits/rejected": -1.0522578954696655, + "logps/chosen": -803.76513671875, + "logps/ref_chosen": -56.85559844970703, + "logps/ref_rejected": -91.80261993408203, + "logps/rejected": -1128.206298828125, + "loss": 1.2039, + "margin_dpo/margin_mean": 289.494140625, + "margin_dpo/margin_std": 594.351806640625, + "step": 586 + }, + { + "KL/chosen_KL_mean": -506.798583984375, + "KL/mean": -714.7243041992188, + "KL/rejected_KL_mean": -922.6500244140625, + "KL/std": 565.3491821289062, + "epoch": 0.8619676945668135, + "fcm_dpo/beta": 0.0009805042063817382, + "fcm_dpo/delta": -0.008139118552207947, + "fcm_dpo/margin": 415.85150146484375, + "fcm_dpo/q_t": 0.40643125772476196, + "grad_norm": 38.273529052734375, + "learning_rate": 2.9142720719793122e-08, + "logits/chosen": -1.03069269657135, + "logits/rejected": -1.058362603187561, + "logps/chosen": -551.4901733398438, + "logps/ref_chosen": -44.69159698486328, + "logps/ref_rejected": -82.62385559082031, + "logps/rejected": -1005.27392578125, + "loss": 1.0918, + "margin_dpo/margin_mean": 415.85150146484375, + "margin_dpo/margin_std": 569.570068359375, + "step": 587 + }, + { + "KL/chosen_KL_mean": -664.525390625, + "KL/mean": -837.8515014648438, + "KL/rejected_KL_mean": -1011.1776123046875, + "KL/std": 480.2796630859375, + "epoch": 0.8634361233480177, + "fcm_dpo/beta": 0.000982759054750204, + "fcm_dpo/delta": 0.061286523938179016, + "fcm_dpo/margin": 346.6522216796875, + "fcm_dpo/q_t": 0.4209359884262085, + "grad_norm": 31.023210525512695, + "learning_rate": 2.8544309805740018e-08, + "logits/chosen": -1.0278799533843994, + "logits/rejected": -1.05256187915802, + "logps/chosen": -714.8203735351562, + "logps/ref_chosen": -50.29494857788086, + "logps/ref_rejected": -107.36988067626953, + "logps/rejected": -1118.5474853515625, + "loss": 1.1364, + "margin_dpo/margin_mean": 346.6522521972656, + "margin_dpo/margin_std": 510.10968017578125, + "step": 588 + }, + { + "KL/chosen_KL_mean": -673.4818115234375, + "KL/mean": -904.097900390625, + "KL/rejected_KL_mean": -1134.7139892578125, + "KL/std": 546.526611328125, + "epoch": 0.8649045521292217, + "fcm_dpo/beta": 0.0009825675515457988, + "fcm_dpo/delta": -0.05569233000278473, + "fcm_dpo/margin": 461.232177734375, + "fcm_dpo/q_t": 0.3966492712497711, + "grad_norm": 30.869823455810547, + "learning_rate": 2.7951734455078786e-08, + "logits/chosen": -0.9944198131561279, + "logits/rejected": -1.005649447441101, + "logps/chosen": -733.4117431640625, + "logps/ref_chosen": -59.929908752441406, + "logps/ref_rejected": -111.65534973144531, + "logps/rejected": -1246.369384765625, + "loss": 1.0576, + "margin_dpo/margin_mean": 461.2321472167969, + "margin_dpo/margin_std": 579.5751953125, + "step": 589 + }, + { + "KL/chosen_KL_mean": -577.0634765625, + "KL/mean": -803.4448852539062, + "KL/rejected_KL_mean": -1029.8262939453125, + "KL/std": 528.4348754882812, + "epoch": 0.8663729809104258, + "fcm_dpo/beta": 0.0009721757378429174, + "fcm_dpo/delta": -0.042073942720890045, + "fcm_dpo/margin": 452.7629089355469, + "fcm_dpo/q_t": 0.40020644664764404, + "grad_norm": 33.35025405883789, + "learning_rate": 2.736501028272095e-08, + "logits/chosen": -0.9723612070083618, + "logits/rejected": -1.0005714893341064, + "logps/chosen": -632.8732299804688, + "logps/ref_chosen": -55.80979537963867, + "logps/ref_rejected": -106.06282043457031, + "logps/rejected": -1135.88916015625, + "loss": 1.0653, + "margin_dpo/margin_mean": 452.762939453125, + "margin_dpo/margin_std": 576.941162109375, + "step": 590 + }, + { + "KL/chosen_KL_mean": -649.7230224609375, + "KL/mean": -853.7227783203125, + "KL/rejected_KL_mean": -1057.722412109375, + "KL/std": 513.365478515625, + "epoch": 0.8678414096916299, + "fcm_dpo/beta": 0.0009712845785543323, + "fcm_dpo/delta": 0.003869034815579653, + "fcm_dpo/margin": 407.99945068359375, + "fcm_dpo/q_t": 0.40842798352241516, + "grad_norm": 34.23089599609375, + "learning_rate": 2.678415274939408e-08, + "logits/chosen": -1.0266298055648804, + "logits/rejected": -1.0202120542526245, + "logps/chosen": -705.963623046875, + "logps/ref_chosen": -56.24061965942383, + "logps/ref_rejected": -83.78629302978516, + "logps/rejected": -1141.5087890625, + "loss": 1.1021, + "margin_dpo/margin_mean": 407.9994812011719, + "margin_dpo/margin_std": 576.1143798828125, + "step": 591 + }, + { + "KL/chosen_KL_mean": -680.3071899414062, + "KL/mean": -866.3721923828125, + "KL/rejected_KL_mean": -1052.4371337890625, + "KL/std": 528.517822265625, + "epoch": 0.869309838472834, + "fcm_dpo/beta": 0.0009767988231033087, + "fcm_dpo/delta": 0.0378945954144001, + "fcm_dpo/margin": 372.12994384765625, + "fcm_dpo/q_t": 0.4183656573295593, + "grad_norm": 36.562538146972656, + "learning_rate": 2.6209177161234442e-08, + "logits/chosen": -1.0103018283843994, + "logits/rejected": -1.0133998394012451, + "logps/chosen": -728.2474365234375, + "logps/ref_chosen": -47.94025421142578, + "logps/ref_rejected": -75.73287963867188, + "logps/rejected": -1128.169921875, + "loss": 1.1739, + "margin_dpo/margin_mean": 372.1299133300781, + "margin_dpo/margin_std": 716.3402099609375, + "step": 592 + }, + { + "KL/chosen_KL_mean": -643.9327392578125, + "KL/mean": -789.9352416992188, + "KL/rejected_KL_mean": -935.937744140625, + "KL/std": 571.6775512695312, + "epoch": 0.8707782672540382, + "fcm_dpo/beta": 0.000984064768999815, + "fcm_dpo/delta": 0.02495434135198593, + "fcm_dpo/margin": 292.0050048828125, + "fcm_dpo/q_t": 0.4358568787574768, + "grad_norm": 52.18275451660156, + "learning_rate": 2.564009866938349e-08, + "logits/chosen": -0.895799994468689, + "logits/rejected": -0.8876699209213257, + "logps/chosen": -692.62353515625, + "logps/ref_chosen": -48.690757751464844, + "logps/ref_rejected": -60.90800094604492, + "logps/rejected": -996.845703125, + "loss": 1.2122, + "margin_dpo/margin_mean": 292.0050048828125, + "margin_dpo/margin_std": 635.2003173828125, + "step": 593 + }, + { + "KL/chosen_KL_mean": -632.062255859375, + "KL/mean": -817.6357421875, + "KL/rejected_KL_mean": -1003.2091064453125, + "KL/std": 560.24951171875, + "epoch": 0.8722466960352423, + "fcm_dpo/beta": 0.000995452981442213, + "fcm_dpo/delta": 0.030517850071191788, + "fcm_dpo/margin": 371.14691162109375, + "fcm_dpo/q_t": 0.4163949191570282, + "grad_norm": 37.190494537353516, + "learning_rate": 2.5076932269588708e-08, + "logits/chosen": -0.9844435453414917, + "logits/rejected": -0.9757124185562134, + "logps/chosen": -686.9970703125, + "logps/ref_chosen": -54.93488693237305, + "logps/ref_rejected": -86.09967803955078, + "logps/rejected": -1089.308837890625, + "loss": 1.1384, + "margin_dpo/margin_mean": 371.1469421386719, + "margin_dpo/margin_std": 597.141845703125, + "step": 594 + }, + { + "KL/chosen_KL_mean": -576.7757568359375, + "KL/mean": -783.5555419921875, + "KL/rejected_KL_mean": -990.33544921875, + "KL/std": 514.779541015625, + "epoch": 0.8737151248164464, + "fcm_dpo/beta": 0.0009872771333903074, + "fcm_dpo/delta": -0.008713661693036556, + "fcm_dpo/margin": 413.5597229003906, + "fcm_dpo/q_t": 0.4093300700187683, + "grad_norm": 42.089027404785156, + "learning_rate": 2.451969280180849e-08, + "logits/chosen": -0.9605817794799805, + "logits/rejected": -0.979555606842041, + "logps/chosen": -626.1961669921875, + "logps/ref_chosen": -49.4204216003418, + "logps/ref_rejected": -80.62731170654297, + "logps/rejected": -1070.962646484375, + "loss": 1.0916, + "margin_dpo/margin_mean": 413.5596923828125, + "margin_dpo/margin_std": 573.5089721679688, + "step": 595 + }, + { + "KL/chosen_KL_mean": -680.4862060546875, + "KL/mean": -826.2808837890625, + "KL/rejected_KL_mean": -972.0756225585938, + "KL/std": 520.2750244140625, + "epoch": 0.8751835535976505, + "fcm_dpo/beta": 0.001006106031127274, + "fcm_dpo/delta": 0.10965707898139954, + "fcm_dpo/margin": 291.58941650390625, + "fcm_dpo/q_t": 0.4358452558517456, + "grad_norm": 58.43737030029297, + "learning_rate": 2.396839494982103e-08, + "logits/chosen": -0.995140552520752, + "logits/rejected": -0.9648805856704712, + "logps/chosen": -740.27783203125, + "logps/ref_chosen": -59.791683197021484, + "logps/ref_rejected": -80.09111785888672, + "logps/rejected": -1052.166748046875, + "loss": 1.2046, + "margin_dpo/margin_mean": 291.58941650390625, + "margin_dpo/margin_std": 623.7481689453125, + "step": 596 + }, + { + "KL/chosen_KL_mean": -646.8114624023438, + "KL/mean": -897.362060546875, + "KL/rejected_KL_mean": -1147.91259765625, + "KL/std": 604.0191650390625, + "epoch": 0.8766519823788547, + "fcm_dpo/beta": 0.0009849161142483354, + "fcm_dpo/delta": -0.10163434594869614, + "fcm_dpo/margin": 501.10113525390625, + "fcm_dpo/q_t": 0.3910897970199585, + "grad_norm": 27.942764282226562, + "learning_rate": 2.3423053240837514e-08, + "logits/chosen": -0.9165897369384766, + "logits/rejected": -0.9646108746528625, + "logps/chosen": -704.072265625, + "logps/ref_chosen": -57.26078796386719, + "logps/ref_rejected": -100.6937255859375, + "logps/rejected": -1248.6063232421875, + "loss": 1.057, + "margin_dpo/margin_mean": 501.1011047363281, + "margin_dpo/margin_std": 665.6370849609375, + "step": 597 + }, + { + "KL/chosen_KL_mean": -640.52880859375, + "KL/mean": -837.5374755859375, + "KL/rejected_KL_mean": -1034.5460205078125, + "KL/std": 517.078857421875, + "epoch": 0.8781204111600588, + "fcm_dpo/beta": 0.0009835727978497744, + "fcm_dpo/delta": 0.011667370796203613, + "fcm_dpo/margin": 394.01727294921875, + "fcm_dpo/q_t": 0.4106915593147278, + "grad_norm": 44.37862777709961, + "learning_rate": 2.2883682045119062e-08, + "logits/chosen": -1.0113518238067627, + "logits/rejected": -1.023685336112976, + "logps/chosen": -693.0473022460938, + "logps/ref_chosen": -52.51850509643555, + "logps/ref_rejected": -89.44385528564453, + "logps/rejected": -1123.989990234375, + "loss": 1.1145, + "margin_dpo/margin_mean": 394.01727294921875, + "margin_dpo/margin_std": 565.1383666992188, + "step": 598 + }, + { + "KL/chosen_KL_mean": -643.5155029296875, + "KL/mean": -810.739990234375, + "KL/rejected_KL_mean": -977.9644775390625, + "KL/std": 501.908935546875, + "epoch": 0.8795888399412628, + "fcm_dpo/beta": 0.0009813719661906362, + "fcm_dpo/delta": -0.055194415152072906, + "fcm_dpo/margin": 334.44903564453125, + "fcm_dpo/q_t": 0.4202921986579895, + "grad_norm": 33.33484649658203, + "learning_rate": 2.2350295575598367e-08, + "logits/chosen": -0.9392881989479065, + "logits/rejected": -0.9477603435516357, + "logps/chosen": -693.3181762695312, + "logps/ref_chosen": -49.802677154541016, + "logps/ref_rejected": -82.978515625, + "logps/rejected": -1060.943115234375, + "loss": 1.1434, + "margin_dpo/margin_mean": 334.448974609375, + "margin_dpo/margin_std": 476.6112060546875, + "step": 599 + }, + { + "KL/chosen_KL_mean": -708.796142578125, + "KL/mean": -865.4288330078125, + "KL/rejected_KL_mean": -1022.0615844726562, + "KL/std": 508.1895446777344, + "epoch": 0.8810572687224669, + "fcm_dpo/beta": 0.0009930970845744014, + "fcm_dpo/delta": 0.09185181558132172, + "fcm_dpo/margin": 313.265380859375, + "fcm_dpo/q_t": 0.4303100109100342, + "grad_norm": 31.344772338867188, + "learning_rate": 2.1822907887504932e-08, + "logits/chosen": -1.0468547344207764, + "logits/rejected": -1.0439157485961914, + "logps/chosen": -775.2310791015625, + "logps/ref_chosen": -66.43487548828125, + "logps/ref_rejected": -85.45649719238281, + "logps/rejected": -1107.51806640625, + "loss": 1.1858, + "margin_dpo/margin_mean": 313.265380859375, + "margin_dpo/margin_std": 613.1177978515625, + "step": 600 + }, + { + "KL/chosen_KL_mean": -709.7387084960938, + "KL/mean": -912.3361206054688, + "KL/rejected_KL_mean": -1114.933349609375, + "KL/std": 543.34033203125, + "epoch": 0.882525697503671, + "fcm_dpo/beta": 0.0009970087558031082, + "fcm_dpo/delta": -0.004234878346323967, + "fcm_dpo/margin": 405.19476318359375, + "fcm_dpo/q_t": 0.4060080647468567, + "grad_norm": 33.107521057128906, + "learning_rate": 2.1301532877994742e-08, + "logits/chosen": -0.9763351678848267, + "logits/rejected": -0.9957572221755981, + "logps/chosen": -768.872314453125, + "logps/ref_chosen": -59.13361358642578, + "logps/ref_rejected": -94.69093322753906, + "logps/rejected": -1209.6243896484375, + "loss": 1.0885, + "margin_dpo/margin_mean": 405.19476318359375, + "margin_dpo/margin_std": 531.271728515625, + "step": 601 + }, + { + "KL/chosen_KL_mean": -501.87945556640625, + "KL/mean": -752.0775146484375, + "KL/rejected_KL_mean": -1002.2755126953125, + "KL/std": 506.71160888671875, + "epoch": 0.8839941262848752, + "fcm_dpo/beta": 0.0009898185962811112, + "fcm_dpo/delta": -0.10039174556732178, + "fcm_dpo/margin": 500.39605712890625, + "fcm_dpo/q_t": 0.38656604290008545, + "grad_norm": 53.6025276184082, + "learning_rate": 2.0786184285784298e-08, + "logits/chosen": -1.0038371086120605, + "logits/rejected": -1.0388686656951904, + "logps/chosen": -550.4730224609375, + "logps/ref_chosen": -48.59352111816406, + "logps/ref_rejected": -87.6685562133789, + "logps/rejected": -1089.944091796875, + "loss": 1.0169, + "margin_dpo/margin_mean": 500.39605712890625, + "margin_dpo/margin_std": 533.11669921875, + "step": 602 + }, + { + "KL/chosen_KL_mean": -630.3939819335938, + "KL/mean": -857.3939208984375, + "KL/rejected_KL_mean": -1084.393798828125, + "KL/std": 564.5196533203125, + "epoch": 0.8854625550660793, + "fcm_dpo/beta": 0.0009718415094539523, + "fcm_dpo/delta": -0.04316433519124985, + "fcm_dpo/margin": 453.99981689453125, + "fcm_dpo/q_t": 0.40223926305770874, + "grad_norm": 34.395442962646484, + "learning_rate": 2.0276875690788204e-08, + "logits/chosen": -1.011382818222046, + "logits/rejected": -1.0053396224975586, + "logps/chosen": -700.80859375, + "logps/ref_chosen": -70.41461944580078, + "logps/ref_rejected": -100.32559967041016, + "logps/rejected": -1184.719482421875, + "loss": 1.0851, + "margin_dpo/margin_mean": 453.99981689453125, + "margin_dpo/margin_std": 659.65673828125, + "step": 603 + }, + { + "KL/chosen_KL_mean": -605.8519897460938, + "KL/mean": -852.4639892578125, + "KL/rejected_KL_mean": -1099.075927734375, + "KL/std": 546.70166015625, + "epoch": 0.8869309838472834, + "fcm_dpo/beta": 0.0009594437433406711, + "fcm_dpo/delta": -0.07685627043247223, + "fcm_dpo/margin": 493.223876953125, + "fcm_dpo/q_t": 0.3948679566383362, + "grad_norm": 32.600433349609375, + "learning_rate": 1.977362051376158e-08, + "logits/chosen": -1.0046117305755615, + "logits/rejected": -1.0419707298278809, + "logps/chosen": -652.31005859375, + "logps/ref_chosen": -46.45808029174805, + "logps/ref_rejected": -91.8544921875, + "logps/rejected": -1190.930419921875, + "loss": 1.0625, + "margin_dpo/margin_mean": 493.223876953125, + "margin_dpo/margin_std": 673.2548828125, + "step": 604 + }, + { + "KL/chosen_KL_mean": -636.7569580078125, + "KL/mean": -819.03857421875, + "KL/rejected_KL_mean": -1001.3201904296875, + "KL/std": 516.325927734375, + "epoch": 0.8883994126284875, + "fcm_dpo/beta": 0.0009607453248463571, + "fcm_dpo/delta": 0.051560450345277786, + "fcm_dpo/margin": 364.5632019042969, + "fcm_dpo/q_t": 0.42265427112579346, + "grad_norm": 36.99518966674805, + "learning_rate": 1.9276432015946446e-08, + "logits/chosen": -0.9250746965408325, + "logits/rejected": -0.9353400468826294, + "logps/chosen": -703.0062866210938, + "logps/ref_chosen": -66.24933624267578, + "logps/ref_rejected": -102.30496978759766, + "logps/rejected": -1103.625244140625, + "loss": 1.1441, + "margin_dpo/margin_mean": 364.563232421875, + "margin_dpo/margin_std": 616.972412109375, + "step": 605 + }, + { + "KL/chosen_KL_mean": -655.916015625, + "KL/mean": -859.0430908203125, + "KL/rejected_KL_mean": -1062.170166015625, + "KL/std": 534.576171875, + "epoch": 0.8898678414096917, + "fcm_dpo/beta": 0.0009698671055957675, + "fcm_dpo/delta": 0.005952846258878708, + "fcm_dpo/margin": 406.254150390625, + "fcm_dpo/q_t": 0.41086679697036743, + "grad_norm": 30.389057159423828, + "learning_rate": 1.8785323298722093e-08, + "logits/chosen": -0.9856992363929749, + "logits/rejected": -1.002555012702942, + "logps/chosen": -710.735107421875, + "logps/ref_chosen": -54.819122314453125, + "logps/ref_rejected": -98.37146759033203, + "logps/rejected": -1160.5416259765625, + "loss": 1.1022, + "margin_dpo/margin_mean": 406.254150390625, + "margin_dpo/margin_std": 574.14404296875, + "step": 606 + }, + { + "KL/chosen_KL_mean": -664.7274780273438, + "KL/mean": -838.5308837890625, + "KL/rejected_KL_mean": -1012.334228515625, + "KL/std": 534.4456176757812, + "epoch": 0.8913362701908958, + "fcm_dpo/beta": 0.0009783967398107052, + "fcm_dpo/delta": 0.06167557090520859, + "fcm_dpo/margin": 347.606689453125, + "fcm_dpo/q_t": 0.42285820841789246, + "grad_norm": 26.738142013549805, + "learning_rate": 1.8300307303259904e-08, + "logits/chosen": -0.9779009222984314, + "logits/rejected": -0.9702655076980591, + "logps/chosen": -722.8115234375, + "logps/ref_chosen": -58.08403778076172, + "logps/ref_rejected": -79.777099609375, + "logps/rejected": -1092.111328125, + "loss": 1.1479, + "margin_dpo/margin_mean": 347.606689453125, + "margin_dpo/margin_std": 570.4724731445312, + "step": 607 + }, + { + "KL/chosen_KL_mean": -588.347900390625, + "KL/mean": -785.5659790039062, + "KL/rejected_KL_mean": -982.7840576171875, + "KL/std": 475.997314453125, + "epoch": 0.8928046989720999, + "fcm_dpo/beta": 0.0009828273905441165, + "fcm_dpo/delta": 0.012696724385023117, + "fcm_dpo/margin": 394.4361267089844, + "fcm_dpo/q_t": 0.40994399785995483, + "grad_norm": 33.70753479003906, + "learning_rate": 1.7821396810182437e-08, + "logits/chosen": -1.0321998596191406, + "logits/rejected": -1.0463464260101318, + "logps/chosen": -645.7987060546875, + "logps/ref_chosen": -57.450836181640625, + "logps/ref_rejected": -94.77339172363281, + "logps/rejected": -1077.557373046875, + "loss": 1.0892, + "margin_dpo/margin_mean": 394.4361572265625, + "margin_dpo/margin_std": 485.2535705566406, + "step": 608 + }, + { + "KL/chosen_KL_mean": -616.16552734375, + "KL/mean": -871.0028076171875, + "KL/rejected_KL_mean": -1125.840087890625, + "KL/std": 653.2919311523438, + "epoch": 0.8942731277533039, + "fcm_dpo/beta": 0.0009676434565335512, + "fcm_dpo/delta": -0.09796243906021118, + "fcm_dpo/margin": 509.67462158203125, + "fcm_dpo/q_t": 0.3957828879356384, + "grad_norm": 30.651371002197266, + "learning_rate": 1.7348604439226617e-08, + "logits/chosen": -1.0793794393539429, + "logits/rejected": -1.107104778289795, + "logps/chosen": -674.9708862304688, + "logps/ref_chosen": -58.805355072021484, + "logps/ref_rejected": -88.81600952148438, + "logps/rejected": -1214.6561279296875, + "loss": 1.065, + "margin_dpo/margin_mean": 509.67462158203125, + "margin_dpo/margin_std": 778.646728515625, + "step": 609 + }, + { + "KL/chosen_KL_mean": -621.6527099609375, + "KL/mean": -778.48388671875, + "KL/rejected_KL_mean": -935.3150634765625, + "KL/std": 504.77032470703125, + "epoch": 0.895741556534508, + "fcm_dpo/beta": 0.0009778111707419157, + "fcm_dpo/delta": 0.09600942581892014, + "fcm_dpo/margin": 313.6623229980469, + "fcm_dpo/q_t": 0.4288497567176819, + "grad_norm": 39.5369987487793, + "learning_rate": 1.6881942648911074e-08, + "logits/chosen": -0.9661835432052612, + "logits/rejected": -0.9449666738510132, + "logps/chosen": -687.3477783203125, + "logps/ref_chosen": -65.69503784179688, + "logps/ref_rejected": -83.40538787841797, + "logps/rejected": -1018.720458984375, + "loss": 1.18, + "margin_dpo/margin_mean": 313.662353515625, + "margin_dpo/margin_std": 588.4511108398438, + "step": 610 + }, + { + "KL/chosen_KL_mean": -647.6074829101562, + "KL/mean": -917.2225952148438, + "KL/rejected_KL_mean": -1186.837646484375, + "KL/std": 658.553466796875, + "epoch": 0.8972099853157122, + "fcm_dpo/beta": 0.0009621235076338053, + "fcm_dpo/delta": -0.12539134919643402, + "fcm_dpo/margin": 539.230224609375, + "fcm_dpo/q_t": 0.3878698945045471, + "grad_norm": 32.128814697265625, + "learning_rate": 1.6421423736208e-08, + "logits/chosen": -1.0239993333816528, + "logits/rejected": -1.0698425769805908, + "logps/chosen": -700.2069091796875, + "logps/ref_chosen": -52.59946823120117, + "logps/ref_rejected": -86.33099365234375, + "logps/rejected": -1273.168701171875, + "loss": 1.0452, + "margin_dpo/margin_mean": 539.230224609375, + "margin_dpo/margin_std": 746.280029296875, + "step": 611 + }, + { + "KL/chosen_KL_mean": -688.6466674804688, + "KL/mean": -894.856201171875, + "KL/rejected_KL_mean": -1101.065673828125, + "KL/std": 530.3755493164062, + "epoch": 0.8986784140969163, + "fcm_dpo/beta": 0.0009597926400601864, + "fcm_dpo/delta": 0.004164084792137146, + "fcm_dpo/margin": 412.4189453125, + "fcm_dpo/q_t": 0.40916839241981506, + "grad_norm": 29.971281051635742, + "learning_rate": 1.5967059836219042e-08, + "logits/chosen": -1.008693814277649, + "logits/rejected": -1.013035535812378, + "logps/chosen": -747.9703979492188, + "logps/ref_chosen": -59.32372283935547, + "logps/ref_rejected": -88.31239318847656, + "logps/rejected": -1189.3780517578125, + "loss": 1.0975, + "margin_dpo/margin_mean": 412.4189758300781, + "margin_dpo/margin_std": 563.840087890625, + "step": 612 + }, + { + "KL/chosen_KL_mean": -607.6298217773438, + "KL/mean": -856.0455932617188, + "KL/rejected_KL_mean": -1104.4613037109375, + "KL/std": 585.9758911132812, + "epoch": 0.9001468428781204, + "fcm_dpo/beta": 0.0009444322204217315, + "fcm_dpo/delta": -0.07297656685113907, + "fcm_dpo/margin": 496.83154296875, + "fcm_dpo/q_t": 0.3927931785583496, + "grad_norm": 32.904354095458984, + "learning_rate": 1.551886292185553e-08, + "logits/chosen": -0.9863793849945068, + "logits/rejected": -1.0393249988555908, + "logps/chosen": -667.3597412109375, + "logps/ref_chosen": -59.72996520996094, + "logps/ref_rejected": -105.10752868652344, + "logps/rejected": -1209.56884765625, + "loss": 1.0444, + "margin_dpo/margin_mean": 496.83154296875, + "margin_dpo/margin_std": 600.5185546875, + "step": 613 + }, + { + "KL/chosen_KL_mean": -677.3827514648438, + "KL/mean": -920.0013427734375, + "KL/rejected_KL_mean": -1162.619873046875, + "KL/std": 576.0763549804688, + "epoch": 0.9016152716593245, + "fcm_dpo/beta": 0.000936733849812299, + "fcm_dpo/delta": -0.05709536373615265, + "fcm_dpo/margin": 485.2372131347656, + "fcm_dpo/q_t": 0.3984524607658386, + "grad_norm": 37.47915267944336, + "learning_rate": 1.507684480352292e-08, + "logits/chosen": -0.9485939741134644, + "logits/rejected": -1.0217413902282715, + "logps/chosen": -730.3217163085938, + "logps/ref_chosen": -52.93898010253906, + "logps/ref_rejected": -104.67938232421875, + "logps/rejected": -1267.29931640625, + "loss": 1.0746, + "margin_dpo/margin_mean": 485.2372131347656, + "margin_dpo/margin_std": 681.718505859375, + "step": 614 + }, + { + "KL/chosen_KL_mean": -648.6558837890625, + "KL/mean": -851.766845703125, + "KL/rejected_KL_mean": -1054.877685546875, + "KL/std": 605.865234375, + "epoch": 0.9030837004405287, + "fcm_dpo/beta": 0.0009383243741467595, + "fcm_dpo/delta": 0.01932334341108799, + "fcm_dpo/margin": 406.2218017578125, + "fcm_dpo/q_t": 0.4133886396884918, + "grad_norm": 23.292583465576172, + "learning_rate": 1.4641017128809801e-08, + "logits/chosen": -0.9777463674545288, + "logits/rejected": -0.999763548374176, + "logps/chosen": -714.4732055664062, + "logps/ref_chosen": -65.81727600097656, + "logps/ref_rejected": -95.17749786376953, + "logps/rejected": -1150.05517578125, + "loss": 1.1261, + "margin_dpo/margin_mean": 406.22174072265625, + "margin_dpo/margin_std": 651.3701171875, + "step": 615 + }, + { + "KL/chosen_KL_mean": -753.4498901367188, + "KL/mean": -922.0699462890625, + "KL/rejected_KL_mean": -1090.6898193359375, + "KL/std": 498.5460205078125, + "epoch": 0.9045521292217328, + "fcm_dpo/beta": 0.0009485027985647321, + "fcm_dpo/delta": 0.0827227309346199, + "fcm_dpo/margin": 337.23992919921875, + "fcm_dpo/q_t": 0.4269304871559143, + "grad_norm": 29.635278701782227, + "learning_rate": 1.4211391382180637e-08, + "logits/chosen": -1.066072702407837, + "logits/rejected": -1.0544729232788086, + "logps/chosen": -818.582763671875, + "logps/ref_chosen": -65.13285827636719, + "logps/ref_rejected": -74.70050048828125, + "logps/rejected": -1165.390380859375, + "loss": 1.1591, + "margin_dpo/margin_mean": 337.2399597167969, + "margin_dpo/margin_std": 560.1079711914062, + "step": 616 + }, + { + "KL/chosen_KL_mean": -710.6087646484375, + "KL/mean": -830.27587890625, + "KL/rejected_KL_mean": -949.943115234375, + "KL/std": 471.6356506347656, + "epoch": 0.9060205580029369, + "fcm_dpo/beta": 0.0009591138223186135, + "fcm_dpo/delta": 0.0745362788438797, + "fcm_dpo/margin": 239.3343048095703, + "fcm_dpo/q_t": 0.4473581910133362, + "grad_norm": 49.84982681274414, + "learning_rate": 1.378797888467345e-08, + "logits/chosen": -0.9499881267547607, + "logits/rejected": -0.9198344945907593, + "logps/chosen": -773.6142578125, + "logps/ref_chosen": -63.005550384521484, + "logps/ref_rejected": -64.234130859375, + "logps/rejected": -1014.1771850585938, + "loss": 1.2347, + "margin_dpo/margin_mean": 239.3343048095703, + "margin_dpo/margin_std": 541.6489868164062, + "step": 617 + }, + { + "KL/chosen_KL_mean": -755.9691162109375, + "KL/mean": -1014.7872314453125, + "KL/rejected_KL_mean": -1273.605224609375, + "KL/std": 649.2423095703125, + "epoch": 0.9074889867841409, + "fcm_dpo/beta": 0.0009580876212567091, + "fcm_dpo/delta": -0.1011531874537468, + "fcm_dpo/margin": 517.6361694335938, + "fcm_dpo/q_t": 0.39342206716537476, + "grad_norm": 40.80027389526367, + "learning_rate": 1.3370790793601371e-08, + "logits/chosen": -1.010411024093628, + "logits/rejected": -1.0457146167755127, + "logps/chosen": -823.0704345703125, + "logps/ref_chosen": -67.10134887695312, + "logps/ref_rejected": -92.15340423583984, + "logps/rejected": -1365.7586669921875, + "loss": 1.0908, + "margin_dpo/margin_mean": 517.6361694335938, + "margin_dpo/margin_std": 831.257080078125, + "step": 618 + }, + { + "KL/chosen_KL_mean": -744.197021484375, + "KL/mean": -942.0189819335938, + "KL/rejected_KL_mean": -1139.841064453125, + "KL/std": 602.969970703125, + "epoch": 0.908957415565345, + "fcm_dpo/beta": 0.0009506435599178076, + "fcm_dpo/delta": 0.024809934198856354, + "fcm_dpo/margin": 395.64404296875, + "fcm_dpo/q_t": 0.4217052459716797, + "grad_norm": 47.4921875, + "learning_rate": 1.2959838102258535e-08, + "logits/chosen": -0.9666841626167297, + "logits/rejected": -0.9786205291748047, + "logps/chosen": -800.1752319335938, + "logps/ref_chosen": -55.978233337402344, + "logps/ref_rejected": -93.1854019165039, + "logps/rejected": -1233.0263671875, + "loss": 1.1713, + "margin_dpo/margin_mean": 395.64404296875, + "margin_dpo/margin_std": 793.0050048828125, + "step": 619 + }, + { + "KL/chosen_KL_mean": -677.560546875, + "KL/mean": -861.231689453125, + "KL/rejected_KL_mean": -1044.9027099609375, + "KL/std": 534.52587890625, + "epoch": 0.9104258443465492, + "fcm_dpo/beta": 0.0009550647810101509, + "fcm_dpo/delta": 0.05083069950342178, + "fcm_dpo/margin": 367.34222412109375, + "fcm_dpo/q_t": 0.42078667879104614, + "grad_norm": 38.08302307128906, + "learning_rate": 1.2555131639630567e-08, + "logits/chosen": -1.0355204343795776, + "logits/rejected": -1.041193962097168, + "logps/chosen": -737.3580322265625, + "logps/ref_chosen": -59.79750061035156, + "logps/ref_rejected": -78.41075134277344, + "logps/rejected": -1123.3134765625, + "loss": 1.1439, + "margin_dpo/margin_mean": 367.34222412109375, + "margin_dpo/margin_std": 601.9281005859375, + "step": 620 + }, + { + "KL/chosen_KL_mean": -640.847900390625, + "KL/mean": -920.02197265625, + "KL/rejected_KL_mean": -1199.1959228515625, + "KL/std": 635.8433227539062, + "epoch": 0.9118942731277533, + "fcm_dpo/beta": 0.0009489471558481455, + "fcm_dpo/delta": -0.13720259070396423, + "fcm_dpo/margin": 558.3480834960938, + "fcm_dpo/q_t": 0.3808121085166931, + "grad_norm": 41.10859298706055, + "learning_rate": 1.2156682070109086e-08, + "logits/chosen": -1.0572166442871094, + "logits/rejected": -1.106241226196289, + "logps/chosen": -694.7816772460938, + "logps/ref_chosen": -53.93375778198242, + "logps/ref_rejected": -88.36951446533203, + "logps/rejected": -1287.5654296875, + "loss": 1.0292, + "margin_dpo/margin_mean": 558.3480834960938, + "margin_dpo/margin_std": 704.4603271484375, + "step": 621 + }, + { + "KL/chosen_KL_mean": -638.4073486328125, + "KL/mean": -838.403076171875, + "KL/rejected_KL_mean": -1038.39892578125, + "KL/std": 490.0394592285156, + "epoch": 0.9133627019089574, + "fcm_dpo/beta": 0.0009346996666863561, + "fcm_dpo/delta": 0.026821225881576538, + "fcm_dpo/margin": 399.9915771484375, + "fcm_dpo/q_t": 0.41576099395751953, + "grad_norm": 29.741287231445312, + "learning_rate": 1.1764499893210878e-08, + "logits/chosen": -0.913569986820221, + "logits/rejected": -0.9021658897399902, + "logps/chosen": -698.693115234375, + "logps/ref_chosen": -60.28582000732422, + "logps/ref_rejected": -85.51873779296875, + "logps/rejected": -1123.9176025390625, + "loss": 1.1208, + "margin_dpo/margin_mean": 399.9915771484375, + "margin_dpo/margin_std": 609.2018432617188, + "step": 622 + }, + { + "KL/chosen_KL_mean": -704.8582763671875, + "KL/mean": -860.2454833984375, + "KL/rejected_KL_mean": -1015.6328125, + "KL/std": 504.6408386230469, + "epoch": 0.9148311306901615, + "fcm_dpo/beta": 0.0009554900461807847, + "fcm_dpo/delta": 0.10620071738958359, + "fcm_dpo/margin": 310.77447509765625, + "fcm_dpo/q_t": 0.43424922227859497, + "grad_norm": 35.98710250854492, + "learning_rate": 1.1378595443300998e-08, + "logits/chosen": -1.0679330825805664, + "logits/rejected": -1.069124460220337, + "logps/chosen": -769.0152587890625, + "logps/ref_chosen": -64.1569595336914, + "logps/ref_rejected": -85.08304595947266, + "logps/rejected": -1100.7158203125, + "loss": 1.188, + "margin_dpo/margin_mean": 310.77447509765625, + "margin_dpo/margin_std": 607.74267578125, + "step": 623 + }, + { + "KL/chosen_KL_mean": -696.2843627929688, + "KL/mean": -931.2808837890625, + "KL/rejected_KL_mean": -1166.27734375, + "KL/std": 522.650146484375, + "epoch": 0.9162995594713657, + "fcm_dpo/beta": 0.0009546733344905078, + "fcm_dpo/delta": -0.050946250557899475, + "fcm_dpo/margin": 469.9930419921875, + "fcm_dpo/q_t": 0.39522331953048706, + "grad_norm": 39.16311264038086, + "learning_rate": 1.0998978889320582e-08, + "logits/chosen": -1.0855488777160645, + "logits/rejected": -1.0924354791641235, + "logps/chosen": -768.2030029296875, + "logps/ref_chosen": -71.91862487792969, + "logps/ref_rejected": -97.13203430175781, + "logps/rejected": -1263.409423828125, + "loss": 1.0557, + "margin_dpo/margin_mean": 469.9930419921875, + "margin_dpo/margin_std": 563.95068359375, + "step": 624 + }, + { + "KL/chosen_KL_mean": -648.822509765625, + "KL/mean": -904.4124755859375, + "KL/rejected_KL_mean": -1160.00244140625, + "KL/std": 575.8782958984375, + "epoch": 0.9177679882525698, + "fcm_dpo/beta": 0.0009412041981704533, + "fcm_dpo/delta": -0.08514019101858139, + "fcm_dpo/margin": 511.17987060546875, + "fcm_dpo/q_t": 0.389559268951416, + "grad_norm": 60.529544830322266, + "learning_rate": 1.0625660234518913e-08, + "logits/chosen": -0.9935369491577148, + "logits/rejected": -1.0175690650939941, + "logps/chosen": -707.16455078125, + "logps/ref_chosen": -58.342071533203125, + "logps/ref_rejected": -86.09038543701172, + "logps/rejected": -1246.0927734375, + "loss": 1.0202, + "margin_dpo/margin_mean": 511.17987060546875, + "margin_dpo/margin_std": 540.048095703125, + "step": 625 + }, + { + "KL/chosen_KL_mean": -831.2083740234375, + "KL/mean": -975.5403442382812, + "KL/rejected_KL_mean": -1119.872314453125, + "KL/std": 650.2010498046875, + "epoch": 0.9192364170337739, + "fcm_dpo/beta": 0.0009531835094094276, + "fcm_dpo/delta": 0.1283356249332428, + "fcm_dpo/margin": 288.66387939453125, + "fcm_dpo/q_t": 0.4358038902282715, + "grad_norm": 34.610740661621094, + "learning_rate": 1.0258649316189721e-08, + "logits/chosen": -0.9936656951904297, + "logits/rejected": -0.986907422542572, + "logps/chosen": -906.321044921875, + "logps/ref_chosen": -75.11260986328125, + "logps/ref_rejected": -99.188720703125, + "logps/rejected": -1219.06103515625, + "loss": 1.2216, + "margin_dpo/margin_mean": 288.66387939453125, + "margin_dpo/margin_std": 637.5816650390625, + "step": 626 + }, + { + "KL/chosen_KL_mean": -562.23046875, + "KL/mean": -855.1490478515625, + "KL/rejected_KL_mean": -1148.0675048828125, + "KL/std": 695.8411254882812, + "epoch": 0.920704845814978, + "fcm_dpo/beta": 0.0009427897166460752, + "fcm_dpo/delta": -0.1610720157623291, + "fcm_dpo/margin": 585.8370361328125, + "fcm_dpo/q_t": 0.3851046562194824, + "grad_norm": 25.602148056030273, + "learning_rate": 9.897955805412e-09, + "logits/chosen": -0.9343521595001221, + "logits/rejected": -1.0077568292617798, + "logps/chosen": -609.9736328125, + "logps/ref_chosen": -47.74314880371094, + "logps/ref_rejected": -106.75448608398438, + "logps/rejected": -1254.822021484375, + "loss": 1.0308, + "margin_dpo/margin_mean": 585.8370361328125, + "margin_dpo/margin_std": 801.694580078125, + "step": 627 + }, + { + "KL/chosen_KL_mean": -718.6756591796875, + "KL/mean": -939.210205078125, + "KL/rejected_KL_mean": -1159.744873046875, + "KL/std": 562.9912109375, + "epoch": 0.922173274596182, + "fcm_dpo/beta": 0.0009263536194339395, + "fcm_dpo/delta": -0.008988456800580025, + "fcm_dpo/margin": 441.0692138671875, + "fcm_dpo/q_t": 0.40781164169311523, + "grad_norm": 31.12415313720703, + "learning_rate": 9.543589206795238e-09, + "logits/chosen": -1.0448391437530518, + "logits/rejected": -1.059401273727417, + "logps/chosen": -778.8585815429688, + "logps/ref_chosen": -60.182945251464844, + "logps/ref_rejected": -101.55467224121094, + "logps/rejected": -1261.299560546875, + "loss": 1.1016, + "margin_dpo/margin_mean": 441.0692138671875, + "margin_dpo/margin_std": 644.30810546875, + "step": 628 + }, + { + "KL/chosen_KL_mean": -718.2134399414062, + "KL/mean": -919.1171875, + "KL/rejected_KL_mean": -1120.0211181640625, + "KL/std": 549.9234619140625, + "epoch": 0.9236417033773862, + "fcm_dpo/beta": 0.0009307701839134097, + "fcm_dpo/delta": 0.027018554508686066, + "fcm_dpo/margin": 401.8076171875, + "fcm_dpo/q_t": 0.4127495288848877, + "grad_norm": 37.944095611572266, + "learning_rate": 9.19555885822887e-09, + "logits/chosen": -1.0313966274261475, + "logits/rejected": -1.0439316034317017, + "logps/chosen": -782.427001953125, + "logps/ref_chosen": -64.21354675292969, + "logps/ref_rejected": -91.65367126464844, + "logps/rejected": -1211.6748046875, + "loss": 1.1066, + "margin_dpo/margin_mean": 401.8076171875, + "margin_dpo/margin_std": 538.9395751953125, + "step": 629 + }, + { + "KL/chosen_KL_mean": -656.501953125, + "KL/mean": -770.98876953125, + "KL/rejected_KL_mean": -885.4755249023438, + "KL/std": 560.9046630859375, + "epoch": 0.9251101321585903, + "fcm_dpo/beta": 0.0009370100451633334, + "fcm_dpo/delta": 0.05341341719031334, + "fcm_dpo/margin": 228.9735565185547, + "fcm_dpo/q_t": 0.4552198052406311, + "grad_norm": 48.24060821533203, + "learning_rate": 8.85387393063622e-09, + "logits/chosen": -1.0444166660308838, + "logits/rejected": -1.0228123664855957, + "logps/chosen": -715.79296875, + "logps/ref_chosen": -59.29100036621094, + "logps/ref_rejected": -83.59829711914062, + "logps/rejected": -969.0738525390625, + "loss": 1.2752, + "margin_dpo/margin_mean": 228.9735565185547, + "margin_dpo/margin_std": 674.7388916015625, + "step": 630 + }, + { + "KL/chosen_KL_mean": -798.7627563476562, + "KL/mean": -982.8570556640625, + "KL/rejected_KL_mean": -1166.951416015625, + "KL/std": 575.3311767578125, + "epoch": 0.9265785609397944, + "fcm_dpo/beta": 0.00095040921587497, + "fcm_dpo/delta": 0.051799606531858444, + "fcm_dpo/margin": 368.1885986328125, + "fcm_dpo/q_t": 0.41983652114868164, + "grad_norm": 30.060415267944336, + "learning_rate": 8.518543427732949e-09, + "logits/chosen": -1.1124560832977295, + "logits/rejected": -1.1201171875, + "logps/chosen": -858.2163696289062, + "logps/ref_chosen": -59.45360565185547, + "logps/ref_rejected": -80.95156860351562, + "logps/rejected": -1247.9029541015625, + "loss": 1.1601, + "margin_dpo/margin_mean": 368.1886291503906, + "margin_dpo/margin_std": 662.2765502929688, + "step": 631 + }, + { + "KL/chosen_KL_mean": -693.1824951171875, + "KL/mean": -889.6626586914062, + "KL/rejected_KL_mean": -1086.142822265625, + "KL/std": 519.53759765625, + "epoch": 0.9280469897209985, + "fcm_dpo/beta": 0.0009511418174952269, + "fcm_dpo/delta": 0.026925835758447647, + "fcm_dpo/margin": 392.960205078125, + "fcm_dpo/q_t": 0.4149981141090393, + "grad_norm": 43.195838928222656, + "learning_rate": 8.189576185789637e-09, + "logits/chosen": -1.0623399019241333, + "logits/rejected": -1.0656976699829102, + "logps/chosen": -754.5340576171875, + "logps/ref_chosen": -61.35155487060547, + "logps/ref_rejected": -86.16017150878906, + "logps/rejected": -1172.302978515625, + "loss": 1.1353, + "margin_dpo/margin_mean": 392.960205078125, + "margin_dpo/margin_std": 638.5850830078125, + "step": 632 + }, + { + "KL/chosen_KL_mean": -757.2740478515625, + "KL/mean": -897.8375244140625, + "KL/rejected_KL_mean": -1038.4010009765625, + "KL/std": 511.4078369140625, + "epoch": 0.9295154185022027, + "fcm_dpo/beta": 0.0009779944084584713, + "fcm_dpo/delta": 0.12812459468841553, + "fcm_dpo/margin": 281.12689208984375, + "fcm_dpo/q_t": 0.43694406747817993, + "grad_norm": 51.9892692565918, + "learning_rate": 7.866980873399015e-09, + "logits/chosen": -1.1087684631347656, + "logits/rejected": -1.1197929382324219, + "logps/chosen": -814.55224609375, + "logps/ref_chosen": -57.27816390991211, + "logps/ref_rejected": -91.58395385742188, + "logps/rejected": -1129.98486328125, + "loss": 1.2081, + "margin_dpo/margin_mean": 281.1269226074219, + "margin_dpo/margin_std": 584.89990234375, + "step": 633 + }, + { + "KL/chosen_KL_mean": -870.9852905273438, + "KL/mean": -992.6185302734375, + "KL/rejected_KL_mean": -1114.251708984375, + "KL/std": 614.6510009765625, + "epoch": 0.9309838472834068, + "fcm_dpo/beta": 0.0009954730048775673, + "fcm_dpo/delta": 0.06913463771343231, + "fcm_dpo/margin": 243.26641845703125, + "fcm_dpo/q_t": 0.44643303751945496, + "grad_norm": 50.689117431640625, + "learning_rate": 7.550765991247654e-09, + "logits/chosen": -0.9997051358222961, + "logits/rejected": -0.9940841197967529, + "logps/chosen": -937.604248046875, + "logps/ref_chosen": -66.61896514892578, + "logps/ref_rejected": -107.12564849853516, + "logps/rejected": -1221.37744140625, + "loss": 1.2403, + "margin_dpo/margin_mean": 243.2664337158203, + "margin_dpo/margin_std": 593.8778076171875, + "step": 634 + }, + { + "KL/chosen_KL_mean": -731.5853271484375, + "KL/mean": -910.15234375, + "KL/rejected_KL_mean": -1088.71923828125, + "KL/std": 636.81201171875, + "epoch": 0.9324522760646109, + "fcm_dpo/beta": 0.0010012383572757244, + "fcm_dpo/delta": 0.04401912912726402, + "fcm_dpo/margin": 357.1339416503906, + "fcm_dpo/q_t": 0.42191681265830994, + "grad_norm": 40.568695068359375, + "learning_rate": 7.240939871891699e-09, + "logits/chosen": -1.059622049331665, + "logits/rejected": -1.0404071807861328, + "logps/chosen": -805.5408935546875, + "logps/ref_chosen": -73.95551300048828, + "logps/ref_rejected": -82.50045776367188, + "logps/rejected": -1171.2197265625, + "loss": 1.1529, + "margin_dpo/margin_mean": 357.1339416503906, + "margin_dpo/margin_std": 645.6236572265625, + "step": 635 + }, + { + "KL/chosen_KL_mean": -694.026123046875, + "KL/mean": -906.4205322265625, + "KL/rejected_KL_mean": -1118.81494140625, + "KL/std": 625.3888549804688, + "epoch": 0.933920704845815, + "fcm_dpo/beta": 0.0010070966091006994, + "fcm_dpo/delta": -0.029895581305027008, + "fcm_dpo/margin": 424.7888488769531, + "fcm_dpo/q_t": 0.40687400102615356, + "grad_norm": 29.367713928222656, + "learning_rate": 6.937510679537628e-09, + "logits/chosen": -0.9780547618865967, + "logits/rejected": -0.9799286127090454, + "logps/chosen": -753.655029296875, + "logps/ref_chosen": -59.628910064697266, + "logps/ref_rejected": -81.97883605957031, + "logps/rejected": -1200.793701171875, + "loss": 1.0945, + "margin_dpo/margin_mean": 424.78887939453125, + "margin_dpo/margin_std": 647.4033203125, + "step": 636 + }, + { + "KL/chosen_KL_mean": -701.5499267578125, + "KL/mean": -936.32275390625, + "KL/rejected_KL_mean": -1171.095703125, + "KL/std": 619.800537109375, + "epoch": 0.9353891336270191, + "fcm_dpo/beta": 0.0009838433470577002, + "fcm_dpo/delta": -0.06574591249227524, + "fcm_dpo/margin": 469.54571533203125, + "fcm_dpo/q_t": 0.3974034786224365, + "grad_norm": 28.718305587768555, + "learning_rate": 6.640486409826785e-09, + "logits/chosen": -1.07195246219635, + "logits/rejected": -1.1217677593231201, + "logps/chosen": -751.2025756835938, + "logps/ref_chosen": -49.652687072753906, + "logps/ref_rejected": -98.40513610839844, + "logps/rejected": -1269.500732421875, + "loss": 1.0659, + "margin_dpo/margin_mean": 469.54571533203125, + "margin_dpo/margin_std": 641.02294921875, + "step": 637 + }, + { + "KL/chosen_KL_mean": -683.7498779296875, + "KL/mean": -866.938720703125, + "KL/rejected_KL_mean": -1050.1275634765625, + "KL/std": 584.358154296875, + "epoch": 0.9368575624082232, + "fcm_dpo/beta": 0.000977477291598916, + "fcm_dpo/delta": -0.07950125634670258, + "fcm_dpo/margin": 366.377685546875, + "fcm_dpo/q_t": 0.41288208961486816, + "grad_norm": 35.89247131347656, + "learning_rate": 6.349874889624962e-09, + "logits/chosen": -0.9825940728187561, + "logits/rejected": -0.9679138660430908, + "logps/chosen": -741.9065551757812, + "logps/ref_chosen": -58.156639099121094, + "logps/ref_rejected": -79.3014907836914, + "logps/rejected": -1129.4290771484375, + "loss": 1.1675, + "margin_dpo/margin_mean": 366.377685546875, + "margin_dpo/margin_std": 677.9688720703125, + "step": 638 + }, + { + "KL/chosen_KL_mean": -931.100830078125, + "KL/mean": -1014.3240966796875, + "KL/rejected_KL_mean": -1097.54736328125, + "KL/std": 560.0640869140625, + "epoch": 0.9383259911894273, + "fcm_dpo/beta": 0.0009697063360363245, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 166.44647216796875, + "fcm_dpo/q_t": 0.4638892412185669, + "grad_norm": 106.00291442871094, + "learning_rate": 6.065683776815933e-09, + "logits/chosen": -0.96961510181427, + "logits/rejected": -0.9110531806945801, + "logps/chosen": -1003.424072265625, + "logps/ref_chosen": -72.32319641113281, + "logps/ref_rejected": -74.2749252319336, + "logps/rejected": -1171.822265625, + "loss": 1.3398, + "margin_dpo/margin_mean": 166.44647216796875, + "margin_dpo/margin_std": 696.6536865234375, + "step": 639 + }, + { + "KL/chosen_KL_mean": -727.3014526367188, + "KL/mean": -1004.7110595703125, + "KL/rejected_KL_mean": -1282.1207275390625, + "KL/std": 652.2802734375, + "epoch": 0.9397944199706314, + "fcm_dpo/beta": 0.0009544800268486142, + "fcm_dpo/delta": -0.13660603761672974, + "fcm_dpo/margin": 554.8192138671875, + "fcm_dpo/q_t": 0.3860216438770294, + "grad_norm": 35.883907318115234, + "learning_rate": 5.7879205600998296e-09, + "logits/chosen": -0.9869524240493774, + "logits/rejected": -1.01767098903656, + "logps/chosen": -783.435791015625, + "logps/ref_chosen": -56.13436508178711, + "logps/ref_rejected": -108.60014343261719, + "logps/rejected": -1390.7208251953125, + "loss": 1.042, + "margin_dpo/margin_mean": 554.8192138671875, + "margin_dpo/margin_std": 769.599853515625, + "step": 640 + }, + { + "KL/chosen_KL_mean": -833.5911865234375, + "KL/mean": -1017.5059814453125, + "KL/rejected_KL_mean": -1201.420654296875, + "KL/std": 561.353759765625, + "epoch": 0.9412628487518355, + "fcm_dpo/beta": 0.0009492564713582397, + "fcm_dpo/delta": 0.052702054381370544, + "fcm_dpo/margin": 367.82965087890625, + "fcm_dpo/q_t": 0.4235016107559204, + "grad_norm": 43.7893180847168, + "learning_rate": 5.516592558795746e-09, + "logits/chosen": -1.0607787370681763, + "logits/rejected": -1.0730290412902832, + "logps/chosen": -898.5880126953125, + "logps/ref_chosen": -64.99689483642578, + "logps/ref_rejected": -86.99232482910156, + "logps/rejected": -1288.4130859375, + "loss": 1.1796, + "margin_dpo/margin_mean": 367.82965087890625, + "margin_dpo/margin_std": 744.2984619140625, + "step": 641 + }, + { + "KL/chosen_KL_mean": -781.8062744140625, + "KL/mean": -1004.431396484375, + "KL/rejected_KL_mean": -1227.056640625, + "KL/std": 735.1798706054688, + "epoch": 0.9427312775330396, + "fcm_dpo/beta": 0.0009536816505715251, + "fcm_dpo/delta": -0.025937873870134354, + "fcm_dpo/margin": 445.2503662109375, + "fcm_dpo/q_t": 0.41450613737106323, + "grad_norm": 38.126136779785156, + "learning_rate": 5.251706922648868e-09, + "logits/chosen": -0.9798089861869812, + "logits/rejected": -1.0176451206207275, + "logps/chosen": -847.4955444335938, + "logps/ref_chosen": -65.68924713134766, + "logps/ref_rejected": -110.24205017089844, + "logps/rejected": -1337.2987060546875, + "loss": 1.1489, + "margin_dpo/margin_mean": 445.2503662109375, + "margin_dpo/margin_std": 889.4637451171875, + "step": 642 + }, + { + "KL/chosen_KL_mean": -700.666748046875, + "KL/mean": -871.712890625, + "KL/rejected_KL_mean": -1042.759033203125, + "KL/std": 530.568603515625, + "epoch": 0.9441997063142438, + "fcm_dpo/beta": 0.0009416728862561285, + "fcm_dpo/delta": -0.0368349552154541, + "fcm_dpo/margin": 342.09228515625, + "fcm_dpo/q_t": 0.42535167932510376, + "grad_norm": 40.722110748291016, + "learning_rate": 4.993270631642038e-09, + "logits/chosen": -1.0998975038528442, + "logits/rejected": -1.1016184091567993, + "logps/chosen": -752.61669921875, + "logps/ref_chosen": -51.94999694824219, + "logps/ref_rejected": -87.46833801269531, + "logps/rejected": -1130.227294921875, + "loss": 1.1546, + "margin_dpo/margin_mean": 342.09228515625, + "margin_dpo/margin_std": 534.0775146484375, + "step": 643 + }, + { + "KL/chosen_KL_mean": -692.7113037109375, + "KL/mean": -867.8548583984375, + "KL/rejected_KL_mean": -1042.9984130859375, + "KL/std": 628.4418334960938, + "epoch": 0.9456681350954479, + "fcm_dpo/beta": 0.0009508873336017132, + "fcm_dpo/delta": 0.06925636529922485, + "fcm_dpo/margin": 350.287109375, + "fcm_dpo/q_t": 0.42657724022865295, + "grad_norm": 48.49483871459961, + "learning_rate": 4.741290495811873e-09, + "logits/chosen": -1.0195714235305786, + "logits/rejected": -1.0287786722183228, + "logps/chosen": -751.72900390625, + "logps/ref_chosen": -59.017662048339844, + "logps/ref_rejected": -87.13668823242188, + "logps/rejected": -1130.135009765625, + "loss": 1.1883, + "margin_dpo/margin_mean": 350.287109375, + "margin_dpo/margin_std": 713.6632080078125, + "step": 644 + }, + { + "KL/chosen_KL_mean": -731.31298828125, + "KL/mean": -818.722412109375, + "KL/rejected_KL_mean": -906.1319580078125, + "KL/std": 500.92510986328125, + "epoch": 0.947136563876652, + "fcm_dpo/beta": 0.0009679758222773671, + "fcm_dpo/delta": 0.06848917156457901, + "fcm_dpo/margin": 174.8188934326172, + "fcm_dpo/q_t": 0.4633423388004303, + "grad_norm": 94.91219329833984, + "learning_rate": 4.495773155069299e-09, + "logits/chosen": -1.0337581634521484, + "logits/rejected": -1.0212106704711914, + "logps/chosen": -787.1890258789062, + "logps/ref_chosen": -55.87602233886719, + "logps/ref_rejected": -97.78080749511719, + "logps/rejected": -1003.9127197265625, + "loss": 1.3337, + "margin_dpo/margin_mean": 174.81890869140625, + "margin_dpo/margin_std": 677.0361328125, + "step": 645 + }, + { + "KL/chosen_KL_mean": -678.8671875, + "KL/mean": -832.9037475585938, + "KL/rejected_KL_mean": -986.9403686523438, + "KL/std": 467.6722412109375, + "epoch": 0.9486049926578561, + "fcm_dpo/beta": 0.000979509437456727, + "fcm_dpo/delta": 0.10140877962112427, + "fcm_dpo/margin": 308.0731201171875, + "fcm_dpo/q_t": 0.4318525791168213, + "grad_norm": 50.75778579711914, + "learning_rate": 4.256725079024553e-09, + "logits/chosen": -1.0464283227920532, + "logits/rejected": -1.0351706743240356, + "logps/chosen": -740.1429443359375, + "logps/ref_chosen": -61.275787353515625, + "logps/ref_rejected": -77.50580596923828, + "logps/rejected": -1064.4461669921875, + "loss": 1.1804, + "margin_dpo/margin_mean": 308.0731201171875, + "margin_dpo/margin_std": 558.4867553710938, + "step": 646 + }, + { + "KL/chosen_KL_mean": -601.7261962890625, + "KL/mean": -791.6389770507812, + "KL/rejected_KL_mean": -981.5518188476562, + "KL/std": 535.917724609375, + "epoch": 0.9500734214390602, + "fcm_dpo/beta": 0.0009915875270962715, + "fcm_dpo/delta": 0.024278640747070312, + "fcm_dpo/margin": 379.82562255859375, + "fcm_dpo/q_t": 0.41298890113830566, + "grad_norm": 31.015090942382812, + "learning_rate": 4.024152566816791e-09, + "logits/chosen": -0.9510085582733154, + "logits/rejected": -0.9799119830131531, + "logps/chosen": -656.57861328125, + "logps/ref_chosen": -54.8524169921875, + "logps/ref_rejected": -93.5194091796875, + "logps/rejected": -1075.0712890625, + "loss": 1.1086, + "margin_dpo/margin_mean": 379.82562255859375, + "margin_dpo/margin_std": 518.4892578125, + "step": 647 + }, + { + "KL/chosen_KL_mean": -650.6891479492188, + "KL/mean": -929.8692016601562, + "KL/rejected_KL_mean": -1209.0491943359375, + "KL/std": 650.9400024414062, + "epoch": 0.9515418502202643, + "fcm_dpo/beta": 0.0009713097242638469, + "fcm_dpo/delta": -0.15041759610176086, + "fcm_dpo/margin": 558.360107421875, + "fcm_dpo/q_t": 0.3838508427143097, + "grad_norm": 26.95428466796875, + "learning_rate": 3.798061746947995e-09, + "logits/chosen": -1.0590667724609375, + "logits/rejected": -1.1171326637268066, + "logps/chosen": -704.860595703125, + "logps/ref_chosen": -54.17146682739258, + "logps/ref_rejected": -98.7127914428711, + "logps/rejected": -1307.761962890625, + "loss": 1.0313, + "margin_dpo/margin_mean": 558.360107421875, + "margin_dpo/margin_std": 770.7064208984375, + "step": 648 + }, + { + "KL/chosen_KL_mean": -675.1800537109375, + "KL/mean": -811.64111328125, + "KL/rejected_KL_mean": -948.1021118164062, + "KL/std": 509.8891296386719, + "epoch": 0.9530102790014684, + "fcm_dpo/beta": 0.0009851048234850168, + "fcm_dpo/delta": 0.13432249426841736, + "fcm_dpo/margin": 272.9219970703125, + "fcm_dpo/q_t": 0.44306886196136475, + "grad_norm": 30.479537963867188, + "learning_rate": 3.5784585771215235e-09, + "logits/chosen": -1.0864759683609009, + "logits/rejected": -1.081239938735962, + "logps/chosen": -737.660400390625, + "logps/ref_chosen": -62.480350494384766, + "logps/ref_rejected": -80.07717895507812, + "logps/rejected": -1028.1793212890625, + "loss": 1.2299, + "margin_dpo/margin_mean": 272.9219665527344, + "margin_dpo/margin_std": 634.4632568359375, + "step": 649 + }, + { + "KL/chosen_KL_mean": -719.4322509765625, + "KL/mean": -930.966552734375, + "KL/rejected_KL_mean": -1142.5008544921875, + "KL/std": 623.9461059570312, + "epoch": 0.9544787077826725, + "fcm_dpo/beta": 0.000984450918622315, + "fcm_dpo/delta": -0.017372816801071167, + "fcm_dpo/margin": 423.06866455078125, + "fcm_dpo/q_t": 0.4085081219673157, + "grad_norm": 33.085140228271484, + "learning_rate": 3.3653488440851253e-09, + "logits/chosen": -1.0022144317626953, + "logits/rejected": -1.0228140354156494, + "logps/chosen": -775.5250244140625, + "logps/ref_chosen": -56.09281921386719, + "logps/ref_rejected": -98.26483917236328, + "logps/rejected": -1240.765625, + "loss": 1.125, + "margin_dpo/margin_mean": 423.06866455078125, + "margin_dpo/margin_std": 715.4996337890625, + "step": 650 + }, + { + "KL/chosen_KL_mean": -485.9007263183594, + "KL/mean": -757.1597900390625, + "KL/rejected_KL_mean": -1028.4189453125, + "KL/std": 586.595703125, + "epoch": 0.9559471365638766, + "fcm_dpo/beta": 0.0009662117809057236, + "fcm_dpo/delta": -0.13117295503616333, + "fcm_dpo/margin": 542.51806640625, + "fcm_dpo/q_t": 0.3815712332725525, + "grad_norm": 39.065223693847656, + "learning_rate": 3.158738163478475e-09, + "logits/chosen": -1.039862871170044, + "logits/rejected": -1.0981051921844482, + "logps/chosen": -529.326171875, + "logps/ref_chosen": -43.42544937133789, + "logps/ref_rejected": -99.95791625976562, + "logps/rejected": -1128.376708984375, + "loss": 1.0046, + "margin_dpo/margin_mean": 542.5181274414062, + "margin_dpo/margin_std": 605.7119140625, + "step": 651 + }, + { + "KL/chosen_KL_mean": -620.2460327148438, + "KL/mean": -826.7354736328125, + "KL/rejected_KL_mean": -1033.224853515625, + "KL/std": 587.8150024414062, + "epoch": 0.9574155653450808, + "fcm_dpo/beta": 0.0009623857913538814, + "fcm_dpo/delta": 0.0026037218049168587, + "fcm_dpo/margin": 412.978759765625, + "fcm_dpo/q_t": 0.41180309653282166, + "grad_norm": 32.109920501708984, + "learning_rate": 2.9586319796851555e-09, + "logits/chosen": -1.0359432697296143, + "logits/rejected": -1.061659812927246, + "logps/chosen": -682.8228759765625, + "logps/ref_chosen": -62.57680892944336, + "logps/ref_rejected": -111.76779174804688, + "logps/rejected": -1144.99267578125, + "loss": 1.1236, + "margin_dpo/margin_mean": 412.978759765625, + "margin_dpo/margin_std": 679.6004638671875, + "step": 652 + }, + { + "KL/chosen_KL_mean": -751.1220703125, + "KL/mean": -942.161376953125, + "KL/rejected_KL_mean": -1133.20068359375, + "KL/std": 632.44580078125, + "epoch": 0.9588839941262849, + "fcm_dpo/beta": 0.0009670084109529853, + "fcm_dpo/delta": 0.031598955392837524, + "fcm_dpo/margin": 382.0786437988281, + "fcm_dpo/q_t": 0.418673038482666, + "grad_norm": 33.81359100341797, + "learning_rate": 2.7650355656892166e-09, + "logits/chosen": -1.081420660018921, + "logits/rejected": -1.106847882270813, + "logps/chosen": -812.2350463867188, + "logps/ref_chosen": -61.11295700073242, + "logps/ref_rejected": -103.24960327148438, + "logps/rejected": -1236.4503173828125, + "loss": 1.1426, + "margin_dpo/margin_mean": 382.07867431640625, + "margin_dpo/margin_std": 657.617431640625, + "step": 653 + }, + { + "KL/chosen_KL_mean": -700.0304565429688, + "KL/mean": -884.8853759765625, + "KL/rejected_KL_mean": -1069.740234375, + "KL/std": 530.6134643554688, + "epoch": 0.960352422907489, + "fcm_dpo/beta": 0.0009726278949528933, + "fcm_dpo/delta": 0.041933320462703705, + "fcm_dpo/margin": 369.70977783203125, + "fcm_dpo/q_t": 0.42105910181999207, + "grad_norm": 36.13345718383789, + "learning_rate": 2.577954022936174e-09, + "logits/chosen": -1.0889091491699219, + "logits/rejected": -1.1061911582946777, + "logps/chosen": -761.7586059570312, + "logps/ref_chosen": -61.7281379699707, + "logps/ref_rejected": -98.7738037109375, + "logps/rejected": -1168.5140380859375, + "loss": 1.1425, + "margin_dpo/margin_mean": 369.70977783203125, + "margin_dpo/margin_std": 623.5421142578125, + "step": 654 + }, + { + "KL/chosen_KL_mean": -637.3580322265625, + "KL/mean": -832.098876953125, + "KL/rejected_KL_mean": -1026.83984375, + "KL/std": 523.131103515625, + "epoch": 0.9618208516886931, + "fcm_dpo/beta": 0.0009784356225281954, + "fcm_dpo/delta": 0.01965608447790146, + "fcm_dpo/margin": 389.4818115234375, + "fcm_dpo/q_t": 0.4150038957595825, + "grad_norm": 30.601289749145508, + "learning_rate": 2.397392281198729e-09, + "logits/chosen": -1.062340259552002, + "logits/rejected": -1.1041678190231323, + "logps/chosen": -686.934814453125, + "logps/ref_chosen": -49.576812744140625, + "logps/ref_rejected": -98.29183197021484, + "logps/rejected": -1125.131591796875, + "loss": 1.1251, + "margin_dpo/margin_mean": 389.4818115234375, + "margin_dpo/margin_std": 619.3175659179688, + "step": 655 + }, + { + "KL/chosen_KL_mean": -698.026123046875, + "KL/mean": -1021.94140625, + "KL/rejected_KL_mean": -1345.856689453125, + "KL/std": 696.60986328125, + "epoch": 0.9632892804698973, + "fcm_dpo/beta": 0.0009496092097833753, + "fcm_dpo/delta": -0.2287594974040985, + "fcm_dpo/margin": 647.8305053710938, + "fcm_dpo/q_t": 0.36486658453941345, + "grad_norm": 84.34307861328125, + "learning_rate": 2.223355098446622e-09, + "logits/chosen": -0.9495760202407837, + "logits/rejected": -1.0183899402618408, + "logps/chosen": -750.5755615234375, + "logps/ref_chosen": -52.54943084716797, + "logps/ref_rejected": -113.67464447021484, + "logps/rejected": -1459.5313720703125, + "loss": 0.9582, + "margin_dpo/margin_mean": 647.8305053710938, + "margin_dpo/margin_std": 702.5700073242188, + "step": 656 + }, + { + "KL/chosen_KL_mean": -649.0306396484375, + "KL/mean": -907.1767578125, + "KL/rejected_KL_mean": -1165.3228759765625, + "KL/std": 657.1854248046875, + "epoch": 0.9647577092511013, + "fcm_dpo/beta": 0.0009227419504895806, + "fcm_dpo/delta": -0.08039526641368866, + "fcm_dpo/margin": 516.2921142578125, + "fcm_dpo/q_t": 0.39258694648742676, + "grad_norm": 39.40578079223633, + "learning_rate": 2.055847060721566e-09, + "logits/chosen": -1.1030490398406982, + "logits/rejected": -1.1474685668945312, + "logps/chosen": -695.731201171875, + "logps/ref_chosen": -46.700538635253906, + "logps/ref_rejected": -97.91487121582031, + "logps/rejected": -1263.23779296875, + "loss": 1.0539, + "margin_dpo/margin_mean": 516.2921142578125, + "margin_dpo/margin_std": 684.8028564453125, + "step": 657 + }, + { + "KL/chosen_KL_mean": -707.4656982421875, + "KL/mean": -911.14208984375, + "KL/rejected_KL_mean": -1114.818603515625, + "KL/std": 517.7315063476562, + "epoch": 0.9662261380323054, + "fcm_dpo/beta": 0.0009188736439682543, + "fcm_dpo/delta": 0.026215653866529465, + "fcm_dpo/margin": 407.352783203125, + "fcm_dpo/q_t": 0.41408517956733704, + "grad_norm": 35.71732711791992, + "learning_rate": 1.8948725820160662e-09, + "logits/chosen": -1.0394493341445923, + "logits/rejected": -1.0688188076019287, + "logps/chosen": -768.4239501953125, + "logps/ref_chosen": -60.95820999145508, + "logps/ref_rejected": -95.93949127197266, + "logps/rejected": -1210.758056640625, + "loss": 1.1197, + "margin_dpo/margin_mean": 407.352783203125, + "margin_dpo/margin_std": 595.02197265625, + "step": 658 + }, + { + "KL/chosen_KL_mean": -623.0128784179688, + "KL/mean": -827.2291259765625, + "KL/rejected_KL_mean": -1031.4453125, + "KL/std": 528.8475341796875, + "epoch": 0.9676945668135095, + "fcm_dpo/beta": 0.000925220490898937, + "fcm_dpo/delta": 0.022823944687843323, + "fcm_dpo/margin": 408.4324645996094, + "fcm_dpo/q_t": 0.414547324180603, + "grad_norm": 32.48310852050781, + "learning_rate": 1.7404359041573723e-09, + "logits/chosen": -0.990066409111023, + "logits/rejected": -0.9657001495361328, + "logps/chosen": -699.755859375, + "logps/ref_chosen": -76.74298095703125, + "logps/ref_rejected": -87.4709701538086, + "logps/rejected": -1118.916259765625, + "loss": 1.1117, + "margin_dpo/margin_mean": 408.4324645996094, + "margin_dpo/margin_std": 585.33984375, + "step": 659 + }, + { + "KL/chosen_KL_mean": -673.4608154296875, + "KL/mean": -933.520263671875, + "KL/rejected_KL_mean": -1193.5797119140625, + "KL/std": 613.996826171875, + "epoch": 0.9691629955947136, + "fcm_dpo/beta": 0.0009196768514811993, + "fcm_dpo/delta": -0.08219671249389648, + "fcm_dpo/margin": 520.118896484375, + "fcm_dpo/q_t": 0.39171260595321655, + "grad_norm": 41.30915451049805, + "learning_rate": 1.592541096695571e-09, + "logits/chosen": -1.0613317489624023, + "logits/rejected": -1.0815818309783936, + "logps/chosen": -732.5086669921875, + "logps/ref_chosen": -59.04788589477539, + "logps/ref_rejected": -75.96005249023438, + "logps/rejected": -1269.539794921875, + "loss": 1.0459, + "margin_dpo/margin_mean": 520.118896484375, + "margin_dpo/margin_std": 653.3119506835938, + "step": 660 + }, + { + "KL/chosen_KL_mean": -606.4295654296875, + "KL/mean": -838.5125732421875, + "KL/rejected_KL_mean": -1070.595458984375, + "KL/std": 678.2152709960938, + "epoch": 0.9706314243759178, + "fcm_dpo/beta": 0.0009147179080173373, + "fcm_dpo/delta": -0.02594481222331524, + "fcm_dpo/margin": 464.16583251953125, + "fcm_dpo/q_t": 0.4064168334007263, + "grad_norm": 51.29008865356445, + "learning_rate": 1.4511920567963908e-09, + "logits/chosen": -1.0680885314941406, + "logits/rejected": -1.084218978881836, + "logps/chosen": -657.1035766601562, + "logps/ref_chosen": -50.673973083496094, + "logps/ref_rejected": -86.00569152832031, + "logps/rejected": -1156.6011962890625, + "loss": 1.0866, + "margin_dpo/margin_mean": 464.16583251953125, + "margin_dpo/margin_std": 674.4207153320312, + "step": 661 + }, + { + "KL/chosen_KL_mean": -692.4723510742188, + "KL/mean": -874.324462890625, + "KL/rejected_KL_mean": -1056.1767578125, + "KL/std": 554.696044921875, + "epoch": 0.9720998531571219, + "fcm_dpo/beta": 0.0009189635748043656, + "fcm_dpo/delta": 0.06803098320960999, + "fcm_dpo/margin": 363.70428466796875, + "fcm_dpo/q_t": 0.42484885454177856, + "grad_norm": 30.058595657348633, + "learning_rate": 1.3163925091384532e-09, + "logits/chosen": -0.986479640007019, + "logits/rejected": -0.9849323034286499, + "logps/chosen": -761.7333984375, + "logps/ref_chosen": -69.26106262207031, + "logps/ref_rejected": -89.05593872070312, + "logps/rejected": -1145.232666015625, + "loss": 1.1704, + "margin_dpo/margin_mean": 363.70428466796875, + "margin_dpo/margin_std": 682.1209106445312, + "step": 662 + }, + { + "KL/chosen_KL_mean": -647.2999267578125, + "KL/mean": -866.9969482421875, + "KL/rejected_KL_mean": -1086.6939697265625, + "KL/std": 632.6029052734375, + "epoch": 0.973568281938326, + "fcm_dpo/beta": 0.0009198928019031882, + "fcm_dpo/delta": -0.004432424902915955, + "fcm_dpo/margin": 439.39404296875, + "fcm_dpo/q_t": 0.4112858176231384, + "grad_norm": 27.409046173095703, + "learning_rate": 1.1881460058152382e-09, + "logits/chosen": -1.0472636222839355, + "logits/rejected": -1.0724174976348877, + "logps/chosen": -712.1788330078125, + "logps/ref_chosen": -64.87890625, + "logps/ref_rejected": -113.92536926269531, + "logps/rejected": -1200.619384765625, + "loss": 1.1207, + "margin_dpo/margin_mean": 439.39404296875, + "margin_dpo/margin_std": 730.9638671875, + "step": 663 + }, + { + "KL/chosen_KL_mean": -672.3688354492188, + "KL/mean": -912.7847900390625, + "KL/rejected_KL_mean": -1153.2008056640625, + "KL/std": 606.9482421875, + "epoch": 0.9750367107195301, + "fcm_dpo/beta": 0.0009121259208768606, + "fcm_dpo/delta": -0.04079785570502281, + "fcm_dpo/margin": 480.83197021484375, + "fcm_dpo/q_t": 0.401122510433197, + "grad_norm": 32.19367218017578, + "learning_rate": 1.066455926241383e-09, + "logits/chosen": -1.0128577947616577, + "logits/rejected": -1.0457968711853027, + "logps/chosen": -733.25732421875, + "logps/ref_chosen": -60.88847351074219, + "logps/ref_rejected": -105.521728515625, + "logps/rejected": -1258.7225341796875, + "loss": 1.0745, + "margin_dpo/margin_mean": 480.83197021484375, + "margin_dpo/margin_std": 644.64111328125, + "step": 664 + }, + { + "KL/chosen_KL_mean": -613.8486328125, + "KL/mean": -820.011474609375, + "KL/rejected_KL_mean": -1026.17431640625, + "KL/std": 517.1300048828125, + "epoch": 0.9765051395007343, + "fcm_dpo/beta": 0.0009164921357296407, + "fcm_dpo/delta": 0.022980544716119766, + "fcm_dpo/margin": 412.32568359375, + "fcm_dpo/q_t": 0.41263529658317566, + "grad_norm": 42.685340881347656, + "learning_rate": 9.513254770636137e-10, + "logits/chosen": -1.1305358409881592, + "logits/rejected": -1.1555566787719727, + "logps/chosen": -674.4127807617188, + "logps/ref_chosen": -60.56413269042969, + "logps/ref_rejected": -84.80882263183594, + "logps/rejected": -1110.983154296875, + "loss": 1.0985, + "margin_dpo/margin_mean": 412.32568359375, + "margin_dpo/margin_std": 522.885009765625, + "step": 665 + }, + { + "KL/chosen_KL_mean": -656.64306640625, + "KL/mean": -868.8929443359375, + "KL/rejected_KL_mean": -1081.1427001953125, + "KL/std": 534.3782348632812, + "epoch": 0.9779735682819384, + "fcm_dpo/beta": 0.0009179958724416792, + "fcm_dpo/delta": 0.010700155980885029, + "fcm_dpo/margin": 424.4996643066406, + "fcm_dpo/q_t": 0.4115890562534332, + "grad_norm": 31.538972854614258, + "learning_rate": 8.427576920763956e-10, + "logits/chosen": -0.9657202959060669, + "logits/rejected": -0.9787443280220032, + "logps/chosen": -721.06298828125, + "logps/ref_chosen": -64.41996002197266, + "logps/ref_rejected": -95.8916244506836, + "logps/rejected": -1177.034423828125, + "loss": 1.1055, + "margin_dpo/margin_mean": 424.4996643066406, + "margin_dpo/margin_std": 592.3191528320312, + "step": 666 + }, + { + "KL/chosen_KL_mean": -739.8983764648438, + "KL/mean": -986.7392578125, + "KL/rejected_KL_mean": -1233.580078125, + "KL/std": 586.67529296875, + "epoch": 0.9794419970631424, + "fcm_dpo/beta": 0.0009104580385610461, + "fcm_dpo/delta": -0.05206644535064697, + "fcm_dpo/margin": 493.6817321777344, + "fcm_dpo/q_t": 0.3979244828224182, + "grad_norm": 36.603797912597656, + "learning_rate": 7.407554321417764e-10, + "logits/chosen": -0.9688647389411926, + "logits/rejected": -0.9707045555114746, + "logps/chosen": -809.1754150390625, + "logps/ref_chosen": -69.27702331542969, + "logps/ref_rejected": -87.83549499511719, + "logps/rejected": -1321.41552734375, + "loss": 1.0643, + "margin_dpo/margin_mean": 493.68170166015625, + "margin_dpo/margin_std": 641.2989501953125, + "step": 667 + }, + { + "KL/chosen_KL_mean": -802.19189453125, + "KL/mean": -976.08984375, + "KL/rejected_KL_mean": -1149.98779296875, + "KL/std": 634.327392578125, + "epoch": 0.9809104258443465, + "fcm_dpo/beta": 0.0009259539656341076, + "fcm_dpo/delta": 0.07960406690835953, + "fcm_dpo/margin": 347.7958984375, + "fcm_dpo/q_t": 0.4307333827018738, + "grad_norm": 52.49308395385742, + "learning_rate": 6.453213851142225e-10, + "logits/chosen": -1.0459859371185303, + "logits/rejected": -1.0500774383544922, + "logps/chosen": -874.7958984375, + "logps/ref_chosen": -72.60400390625, + "logps/ref_rejected": -103.73905944824219, + "logps/rejected": -1253.726806640625, + "loss": 1.2043, + "margin_dpo/margin_mean": 347.7958984375, + "margin_dpo/margin_std": 759.7967529296875, + "step": 668 + }, + { + "KL/chosen_KL_mean": -591.334228515625, + "KL/mean": -840.5963134765625, + "KL/rejected_KL_mean": -1089.8583984375, + "KL/std": 579.8489990234375, + "epoch": 0.9823788546255506, + "fcm_dpo/beta": 0.0009176377207040787, + "fcm_dpo/delta": -0.06017923727631569, + "fcm_dpo/margin": 498.524169921875, + "fcm_dpo/q_t": 0.3957340717315674, + "grad_norm": 30.24985122680664, + "learning_rate": 5.564580657695939e-10, + "logits/chosen": -1.0195106267929077, + "logits/rejected": -1.0302537679672241, + "logps/chosen": -637.45068359375, + "logps/ref_chosen": -46.116416931152344, + "logps/ref_rejected": -77.92434692382812, + "logps/rejected": -1167.78271484375, + "loss": 1.0627, + "margin_dpo/margin_mean": 498.524169921875, + "margin_dpo/margin_std": 653.9466552734375, + "step": 669 + }, + { + "KL/chosen_KL_mean": -564.9832763671875, + "KL/mean": -813.625732421875, + "KL/rejected_KL_mean": -1062.26806640625, + "KL/std": 539.938232421875, + "epoch": 0.9838472834067548, + "fcm_dpo/beta": 0.0009101468604058027, + "fcm_dpo/delta": -0.055214740335941315, + "fcm_dpo/margin": 497.284912109375, + "fcm_dpo/q_t": 0.39692699909210205, + "grad_norm": 27.409191131591797, + "learning_rate": 4.741678157389739e-10, + "logits/chosen": -0.9492954015731812, + "logits/rejected": -0.9691870212554932, + "logps/chosen": -627.3289794921875, + "logps/ref_chosen": -62.34575271606445, + "logps/ref_rejected": -96.9405517578125, + "logps/rejected": -1159.208740234375, + "loss": 1.0676, + "margin_dpo/margin_mean": 497.284912109375, + "margin_dpo/margin_std": 651.83740234375, + "step": 670 + }, + { + "KL/chosen_KL_mean": -729.5746459960938, + "KL/mean": -929.4378662109375, + "KL/rejected_KL_mean": -1129.301025390625, + "KL/std": 544.60791015625, + "epoch": 0.9853157121879589, + "fcm_dpo/beta": 0.000911594950594008, + "fcm_dpo/delta": 0.03665146976709366, + "fcm_dpo/margin": 399.72637939453125, + "fcm_dpo/q_t": 0.4158746898174286, + "grad_norm": 29.13888168334961, + "learning_rate": 3.9845280344705245e-10, + "logits/chosen": -1.0544450283050537, + "logits/rejected": -1.084800362586975, + "logps/chosen": -777.5747680664062, + "logps/ref_chosen": -48.00010681152344, + "logps/ref_rejected": -83.81932067871094, + "logps/rejected": -1213.120361328125, + "loss": 1.1388, + "margin_dpo/margin_mean": 399.7263488769531, + "margin_dpo/margin_std": 654.765869140625, + "step": 671 + }, + { + "KL/chosen_KL_mean": -805.3826904296875, + "KL/mean": -1007.677001953125, + "KL/rejected_KL_mean": -1209.97119140625, + "KL/std": 662.1884155273438, + "epoch": 0.986784140969163, + "fcm_dpo/beta": 0.0009131274418905377, + "fcm_dpo/delta": 0.03172078728675842, + "fcm_dpo/margin": 404.588623046875, + "fcm_dpo/q_t": 0.4185020923614502, + "grad_norm": 53.180294036865234, + "learning_rate": 3.293150240547549e-10, + "logits/chosen": -1.1241331100463867, + "logits/rejected": -1.1317377090454102, + "logps/chosen": -863.9659423828125, + "logps/ref_chosen": -58.58328628540039, + "logps/ref_rejected": -93.14015197753906, + "logps/rejected": -1303.1114501953125, + "loss": 1.156, + "margin_dpo/margin_mean": 404.588623046875, + "margin_dpo/margin_std": 734.8128662109375, + "step": 672 + }, + { + "KL/chosen_KL_mean": -725.3074951171875, + "KL/mean": -918.955078125, + "KL/rejected_KL_mean": -1112.602783203125, + "KL/std": 561.255859375, + "epoch": 0.9882525697503671, + "fcm_dpo/beta": 0.000922200852073729, + "fcm_dpo/delta": 0.04443016275763512, + "fcm_dpo/margin": 387.2952880859375, + "fcm_dpo/q_t": 0.41957566142082214, + "grad_norm": 33.6239128112793, + "learning_rate": 2.6675629940689504e-10, + "logits/chosen": -1.0476765632629395, + "logits/rejected": -1.0524837970733643, + "logps/chosen": -772.0306396484375, + "logps/ref_chosen": -46.72320556640625, + "logps/ref_rejected": -85.29623413085938, + "logps/rejected": -1197.89892578125, + "loss": 1.1365, + "margin_dpo/margin_mean": 387.2952880859375, + "margin_dpo/margin_std": 632.4697875976562, + "step": 673 + }, + { + "KL/chosen_KL_mean": -585.1640625, + "KL/mean": -831.7803955078125, + "KL/rejected_KL_mean": -1078.396728515625, + "KL/std": 547.6962890625, + "epoch": 0.9897209985315712, + "fcm_dpo/beta": 0.0009187724208459258, + "fcm_dpo/delta": -0.055665817111730576, + "fcm_dpo/margin": 493.232666015625, + "fcm_dpo/q_t": 0.39939314126968384, + "grad_norm": 33.85618209838867, + "learning_rate": 2.1077827798404725e-10, + "logits/chosen": -0.9810643196105957, + "logits/rejected": -1.0035473108291626, + "logps/chosen": -630.609619140625, + "logps/ref_chosen": -45.445526123046875, + "logps/ref_rejected": -70.04593658447266, + "logps/rejected": -1148.442626953125, + "loss": 1.0637, + "margin_dpo/margin_mean": 493.232666015625, + "margin_dpo/margin_std": 653.6361083984375, + "step": 674 + }, + { + "KL/chosen_KL_mean": -663.6976318359375, + "KL/mean": -916.9816284179688, + "KL/rejected_KL_mean": -1170.265625, + "KL/std": 600.1426391601562, + "epoch": 0.9911894273127754, + "fcm_dpo/beta": 0.0008998748380690813, + "fcm_dpo/delta": -0.0599069781601429, + "fcm_dpo/margin": 506.56787109375, + "fcm_dpo/q_t": 0.39825230836868286, + "grad_norm": 27.58077049255371, + "learning_rate": 1.6138243485910863e-10, + "logits/chosen": -1.038741111755371, + "logits/rejected": -1.0553760528564453, + "logps/chosen": -707.8739624023438, + "logps/ref_chosen": -44.17628479003906, + "logps/ref_rejected": -74.09197998046875, + "logps/rejected": -1244.357666015625, + "loss": 1.0612, + "margin_dpo/margin_mean": 506.56787109375, + "margin_dpo/margin_std": 637.0610961914062, + "step": 675 + }, + { + "KL/chosen_KL_mean": -725.62841796875, + "KL/mean": -966.3801879882812, + "KL/rejected_KL_mean": -1207.1319580078125, + "KL/std": 584.3846435546875, + "epoch": 0.9926578560939795, + "fcm_dpo/beta": 0.0009004472522065043, + "fcm_dpo/delta": -0.03510238975286484, + "fcm_dpo/margin": 481.5036315917969, + "fcm_dpo/q_t": 0.4012463092803955, + "grad_norm": 27.7912654876709, + "learning_rate": 1.1857007165852472e-10, + "logits/chosen": -0.995841920375824, + "logits/rejected": -1.0157501697540283, + "logps/chosen": -797.0269165039062, + "logps/ref_chosen": -71.39852905273438, + "logps/ref_rejected": -88.3587646484375, + "logps/rejected": -1295.49072265625, + "loss": 1.0661, + "margin_dpo/margin_mean": 481.50360107421875, + "margin_dpo/margin_std": 591.6707763671875, + "step": 676 + }, + { + "KL/chosen_KL_mean": -729.6778564453125, + "KL/mean": -962.5052490234375, + "KL/rejected_KL_mean": -1195.332763671875, + "KL/std": 609.0902099609375, + "epoch": 0.9941262848751835, + "fcm_dpo/beta": 0.0008935732766985893, + "fcm_dpo/delta": -0.016854500398039818, + "fcm_dpo/margin": 465.65484619140625, + "fcm_dpo/q_t": 0.41068124771118164, + "grad_norm": 29.935705184936523, + "learning_rate": 8.23423165278725e-11, + "logits/chosen": -1.073415756225586, + "logits/rejected": -1.0710859298706055, + "logps/chosen": -786.2052612304688, + "logps/ref_chosen": -56.527435302734375, + "logps/ref_rejected": -78.22654724121094, + "logps/rejected": -1273.559326171875, + "loss": 1.1046, + "margin_dpo/margin_mean": 465.65484619140625, + "margin_dpo/margin_std": 735.0831298828125, + "step": 677 + }, + { + "KL/chosen_KL_mean": -608.3975830078125, + "KL/mean": -880.777587890625, + "KL/rejected_KL_mean": -1153.1575927734375, + "KL/std": 650.5914306640625, + "epoch": 0.9955947136563876, + "fcm_dpo/beta": 0.00088664231589064, + "fcm_dpo/delta": -0.08726058155298233, + "fcm_dpo/margin": 544.7600708007812, + "fcm_dpo/q_t": 0.39224404096603394, + "grad_norm": 33.50616455078125, + "learning_rate": 5.270012410216185e-11, + "logits/chosen": -1.0082026720046997, + "logits/rejected": -1.0482615232467651, + "logps/chosen": -654.531982421875, + "logps/ref_chosen": -46.13447570800781, + "logps/ref_rejected": -80.60462951660156, + "logps/rejected": -1233.76220703125, + "loss": 1.0566, + "margin_dpo/margin_mean": 544.7600708007812, + "margin_dpo/margin_std": 738.8284912109375, + "step": 678 + }, + { + "KL/chosen_KL_mean": -707.9012451171875, + "KL/mean": -900.2404174804688, + "KL/rejected_KL_mean": -1092.57958984375, + "KL/std": 537.857177734375, + "epoch": 0.9970631424375918, + "fcm_dpo/beta": 0.0008858998189680278, + "fcm_dpo/delta": 0.06133866682648659, + "fcm_dpo/margin": 384.67840576171875, + "fcm_dpo/q_t": 0.42340487241744995, + "grad_norm": 31.147796630859375, + "learning_rate": 2.9644275480772416e-11, + "logits/chosen": -1.03668212890625, + "logits/rejected": -1.0291433334350586, + "logps/chosen": -758.1961669921875, + "logps/ref_chosen": -50.294921875, + "logps/ref_rejected": -76.59813690185547, + "logps/rejected": -1169.177734375, + "loss": 1.1451, + "margin_dpo/margin_mean": 384.678466796875, + "margin_dpo/margin_std": 626.0743408203125, + "step": 679 + }, + { + "KL/chosen_KL_mean": -720.6077880859375, + "KL/mean": -977.27392578125, + "KL/rejected_KL_mean": -1233.93994140625, + "KL/std": 704.15478515625, + "epoch": 0.9985315712187959, + "fcm_dpo/beta": 0.0008777154725976288, + "fcm_dpo/delta": -0.05377676337957382, + "fcm_dpo/margin": 513.332275390625, + "fcm_dpo/q_t": 0.39790278673171997, + "grad_norm": 36.256160736083984, + "learning_rate": 1.31753782067201e-11, + "logits/chosen": -1.0335376262664795, + "logits/rejected": -1.0608773231506348, + "logps/chosen": -797.5235595703125, + "logps/ref_chosen": -76.91569519042969, + "logps/ref_rejected": -112.384765625, + "logps/rejected": -1346.32470703125, + "loss": 1.0875, + "margin_dpo/margin_mean": 513.332275390625, + "margin_dpo/margin_std": 758.932373046875, + "step": 680 + }, + { + "KL/chosen_KL_mean": -707.947998046875, + "KL/mean": -907.2955322265625, + "KL/rejected_KL_mean": -1106.64306640625, + "KL/std": 573.4292602539062, + "epoch": 1.0, + "fcm_dpo/beta": 0.0008919438696466386, + "fcm_dpo/delta": 0.04472469165921211, + "fcm_dpo/margin": 398.6950988769531, + "fcm_dpo/q_t": 0.41937246918678284, + "grad_norm": 42.854522705078125, + "learning_rate": 3.2938662507808745e-12, + "logits/chosen": -1.0845022201538086, + "logits/rejected": -1.1038618087768555, + "logps/chosen": -768.9052734375, + "logps/ref_chosen": -60.957279205322266, + "logps/ref_rejected": -88.55797576904297, + "logps/rejected": -1195.200927734375, + "loss": 1.1404, + "margin_dpo/margin_mean": 398.6950988769531, + "margin_dpo/margin_std": 622.65087890625, + "step": 681 + }, + { + "epoch": 1.0, + "step": 681, + "total_flos": 0.0, + "train_loss": 1.095842420442164, + "train_runtime": 1736.9553, + "train_samples_per_second": 25.1, + "train_steps_per_second": 0.392 + } + ], + "logging_steps": 1, + "max_steps": 681, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}