Files
llama-3-8b-base-new-dpo-hh-…/margin_logs/margins.jsonl
ModelHub XC b5cca84e50 初始化项目,由ModelHub XC社区提供模型
Model: jackf857/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4
Source: Original Platform
2026-05-09 11:52:35 +08:00

682 lines
1.1 MiB

{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.02287048101425171, "std": 0.42023447155952454, "min": -1.4034271240234375, "p10": -0.46674575805664065, "median": 0.04234886169433594, "p90": 0.4323463439941407, "max": 0.89263916015625, "pos_frac": 0.53125, "sample": [-0.06523895263671875, 0.436798095703125, 0.27811431884765625, -0.9194221496582031, 0.018890380859375, 0.20587158203125, 0.18878173828125, -0.3968696594238281, 0.26206207275390625, 0.2470550537109375, -0.040912628173828125, 0.4394989013671875, -0.44133758544921875, -0.39148712158203125, 0.2764854431152344, 0.89263916015625, -0.42584991455078125, -0.46125030517578125, -0.8638992309570312, -0.3508758544921875, 0.371368408203125, 0.887847900390625, -0.382904052734375, 0.36145782470703125, -0.4890003204345703, 0.052455902099609375, -0.036136627197265625, 0.23079299926757812, 0.2469482421875, 0.1643218994140625, -0.07129669189453125, 0.2790794372558594, 0.3637123107910156, -0.8916168212890625, 0.03298759460449219, -0.2790107727050781, -0.17860984802246094, 0.23892593383789062, 0.05171012878417969, -0.2564239501953125, -0.14655303955078125, 0.27777862548828125, 0.0810394287109375, -1.4034271240234375, -0.28739166259765625, -0.1489429473876953, 0.44918060302734375, 0.1693286895751953, 0.10933303833007812, -0.14766693115234375, -0.40944671630859375, -0.18532562255859375, 0.6261310577392578, -0.20856857299804688, 0.602569580078125, 0.05538177490234375, 0.1505279541015625, 0.1313800811767578, -0.006317138671875, 0.42195892333984375, -0.29936981201171875, -0.4691009521484375, 0.16705322265625, -0.5789260864257812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000001.npy"}
{"epoch": 0.0014684287812041115, "step": 2, "batch_size": 64, "mean": -0.06572240591049194, "std": 0.3523969054222107, "min": -0.9291305541992188, "p10": -0.46334152221679686, "median": -0.05502510070800781, "p90": 0.3672500610351563, "max": 1.0444793701171875, "pos_frac": 0.4375, "sample": [-0.2829437255859375, 0.3027191162109375, -0.19867706298828125, -0.3062286376953125, 0.10318756103515625, 0.20131683349609375, -0.34906005859375, 0.2802886962890625, 0.1914520263671875, -0.31072998046875, 0.08922195434570312, 0.10284614562988281, -0.03655242919921875, -0.0604095458984375, -0.06208038330078125, 0.32562255859375, -0.37982177734375, 0.2746162414550781, -0.049640655517578125, 0.3752174377441406, -0.103973388671875, 0.0699462890625, 0.36417388916015625, -0.033428192138671875, 0.37265777587890625, -0.3787078857421875, -0.6610565185546875, 0.4720420837402344, 0.47701263427734375, -0.27928924560546875, -0.44719696044921875, -0.0965118408203125, -0.7628555297851562, 0.046764373779296875, 0.06670379638671875, -0.9291305541992188, -0.7122802734375, -0.16554832458496094, 0.1485595703125, -0.07539939880371094, 0.2588920593261719, 0.039890289306640625, 0.201690673828125, 0.0623016357421875, 1.0444793701171875, -0.37696075439453125, -0.02794647216796875, -0.223297119140625, -0.35730743408203125, -0.1309051513671875, -0.3106689453125, -0.11409187316894531, -0.1669769287109375, 0.131317138671875, -0.2361297607421875, 0.4093780517578125, -0.6485977172851562, 0.36856842041015625, -0.1951904296875, -0.4702606201171875, -0.7624168395996094, 0.008928298950195312, -0.31630706787109375, 0.022550582885742188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000002.npy"}
{"epoch": 0.002936857562408223, "step": 3, "batch_size": 64, "mean": -0.004781663417816162, "std": 0.33962053060531616, "min": -0.7390060424804688, "p10": -0.41046276092529294, "median": 0.0014781951904296875, "p90": 0.40929069519042977, "max": 0.8466644287109375, "pos_frac": 0.5, "sample": [-0.35861778259277344, -0.484619140625, -0.109130859375, 0.8466644287109375, -0.2964191436767578, 0.008512496948242188, 0.4225940704345703, 0.10986328125, -0.11309814453125, 0.009889602661132812, -0.71026611328125, 0.0119781494140625, 0.15899658203125, -0.5642242431640625, -0.23193931579589844, 0.14957427978515625, 0.2592010498046875, -0.4326820373535156, 0.214202880859375, -0.031276702880859375, -0.7390060424804688, 0.1382598876953125, 0.7752456665039062, -0.291900634765625, 0.34023284912109375, -0.18585586547851562, 0.2587127685546875, 0.42607688903808594, 0.3839149475097656, -0.174774169921875, 0.7494125366210938, 0.420166015625, 0.055309295654296875, 0.0955810546875, 0.12899017333984375, 0.33301544189453125, -0.5578231811523438, -0.14075851440429688, 0.00815582275390625, 0.31534385681152344, -0.09235000610351562, -0.032958984375, -0.5434036254882812, -0.02978515625, -0.14044952392578125, -0.2809314727783203, 0.14592742919921875, 0.3749961853027344, -0.35533905029296875, -0.22169113159179688, 0.1640605926513672, -0.2105712890625, 0.26515960693359375, 0.0961761474609375, -0.12989234924316406, 0.12631988525390625, -0.22393417358398438, -0.33394622802734375, -0.2723388671875, 0.6070404052734375, 0.012342453002929688, -0.306640625, -0.116119384765625, -0.005199432373046875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000003.npy"}
{"epoch": 0.004405286343612335, "step": 4, "batch_size": 64, "mean": -0.06117379665374756, "std": 0.38408300280570984, "min": -0.963043212890625, "p10": -0.4441057205200195, "median": -0.11900043487548828, "p90": 0.36890869140625016, "max": 1.1872634887695312, "pos_frac": 0.40625, "sample": [-0.5238494873046875, 0.1072845458984375, -0.4910430908203125, 0.12352752685546875, -0.33001708984375, 0.40700531005859375, -0.81219482421875, -0.963043212890625, -0.44861412048339844, -0.14972686767578125, -0.1592559814453125, 0.7667732238769531, -0.3494415283203125, -0.12491607666015625, 0.065887451171875, -0.12044715881347656, -0.43358612060546875, -0.010242462158203125, -0.14511871337890625, 0.23877334594726562, -0.41473388671875, 0.23541259765625, -0.3685760498046875, 0.0089874267578125, -0.11644744873046875, -0.39349365234375, 0.3873291015625, 0.09566879272460938, -0.3124866485595703, -0.885101318359375, -0.15508079528808594, 0.2024364471435547, 0.0608062744140625, 0.04691314697265625, -0.3376426696777344, 0.11866188049316406, 0.240814208984375, 0.8545379638671875, 0.06399917602539062, 0.072509765625, 0.1160888671875, -0.12116050720214844, 0.200653076171875, -0.3048858642578125, -0.11373519897460938, 0.13373184204101562, -0.380126953125, 0.325927734375, -0.1589527130126953, -0.522918701171875, -0.019273757934570312, 0.7310600280761719, 0.28410911560058594, -0.2965087890625, -0.028455734252929688, 0.49243927001953125, 1.1872634887695312, -0.14824676513671875, -0.222930908203125, -0.23900604248046875, -0.1175537109375, -0.3475322723388672, -0.211212158203125, -0.20616531372070312], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000004.npy"}
{"epoch": 0.005873715124816446, "step": 5, "batch_size": 64, "mean": 0.031100064516067505, "std": 0.32399415969848633, "min": -0.6394500732421875, "p10": -0.4497062683105469, "median": 0.08696365356445312, "p90": 0.3837249755859376, "max": 0.93505859375, "pos_frac": 0.59375, "sample": [-0.29290771484375, 0.629791259765625, -0.19347381591796875, 0.3162841796875, -0.15375518798828125, 0.1590118408203125, 0.357086181640625, -0.020320892333984375, 0.0086212158203125, 0.2404937744140625, 0.08005523681640625, 0.93505859375, 0.23782730102539062, 0.0168609619140625, -0.2982292175292969, 0.004253387451171875, 0.3951416015625, -0.5490531921386719, 0.07781982421875, -0.4677276611328125, 0.09542465209960938, 0.19907379150390625, 0.13540267944335938, -0.1147308349609375, 0.11710548400878906, 0.178924560546875, 0.35501861572265625, -0.06450653076171875, -0.538299560546875, 0.093475341796875, 0.11151123046875, 0.49843597412109375, -0.194915771484375, -0.0046596527099609375, -0.1740894317626953, -0.5724029541015625, 0.1213836669921875, -0.378814697265625, 0.46755218505859375, 0.3212127685546875, 0.26789093017578125, 0.08045196533203125, 0.1363525390625, -0.5445709228515625, 0.4244232177734375, -0.45326995849609375, -0.2668304443359375, 0.13805007934570312, -0.4413909912109375, -0.2183685302734375, -0.003070831298828125, -0.052356719970703125, 0.6897354125976562, 0.16920089721679688, 0.2848701477050781, 0.1553192138671875, 0.156494140625, -0.251861572265625, -0.331024169921875, 0.27021026611328125, -0.6394500732421875, 0.19849395751953125, 0.240966796875, -0.1548004150390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000005.npy"}
{"epoch": 0.007342143906020558, "step": 6, "batch_size": 64, "mean": -0.011603862047195435, "std": 0.39665350317955017, "min": -1.00823974609375, "p10": -0.4863319396972656, "median": -0.00965118408203125, "p90": 0.45274200439453127, "max": 0.7480010986328125, "pos_frac": 0.484375, "sample": [-0.0847930908203125, 0.3553333282470703, 0.00048065185546875, 0.1152496337890625, -0.021419525146484375, 0.7480010986328125, -0.7092456817626953, -0.48401641845703125, -0.04827880859375, -0.3328094482421875, -0.3145103454589844, 0.4116535186767578, -0.884765625, 0.4576759338378906, -0.08668136596679688, -1.00823974609375, 0.1655426025390625, -0.03701210021972656, 0.6396026611328125, 0.4570770263671875, -0.4522857666015625, -0.25310516357421875, 0.3551025390625, -0.00513458251953125, -0.01416778564453125, -0.318572998046875, -0.03023529052734375, 0.37058258056640625, 0.00018310546875, -0.3473548889160156, -0.06034088134765625, 0.05829811096191406, 0.11849212646484375, 0.20827865600585938, 0.03719329833984375, 0.3292999267578125, -0.6696929931640625, -1.0031814575195312, 0.1683502197265625, -0.0173797607421875, -0.52679443359375, -0.16058349609375, 0.03662872314453125, 0.22440719604492188, 0.4285259246826172, -0.09517288208007812, 0.19958877563476562, 0.18880844116210938, -0.484649658203125, -0.150115966796875, -0.053009033203125, 0.7093429565429688, 0.136749267578125, 0.6763420104980469, 0.64410400390625, -0.06633567810058594, 0.40706634521484375, -0.356231689453125, 0.001312255859375, -0.48705291748046875, 0.1780548095703125, -0.3585968017578125, 0.442626953125, -0.0908355712890625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000006.npy"}
{"epoch": 0.00881057268722467, "step": 7, "batch_size": 64, "mean": 0.008680880069732666, "std": 0.43821266293525696, "min": -1.28729248046875, "p10": -0.5190986633300781, "median": 0.021404266357421875, "p90": 0.5184444427490236, "max": 0.9324493408203125, "pos_frac": 0.515625, "sample": [-0.04156494140625, -0.9544448852539062, -0.037326812744140625, -0.08257675170898438, -0.7161865234375, 0.1106109619140625, -0.3739128112792969, 0.52947998046875, -0.2884483337402344, 0.784088134765625, -1.28729248046875, -0.18529510498046875, -0.11114501953125, 0.9324493408203125, -0.34698486328125, 0.1295928955078125, 0.29939842224121094, -0.28572654724121094, -0.1388530731201172, -0.8403701782226562, -0.189422607421875, 0.409942626953125, 0.5353736877441406, 0.4105682373046875, 0.321533203125, 0.08277511596679688, -0.019262313842773438, -0.14005470275878906, -0.3442955017089844, -0.07332229614257812, 0.12881851196289062, 0.48537445068359375, 0.3622093200683594, 0.0794219970703125, 0.4679718017578125, 0.027027130126953125, 0.0483551025390625, 0.44174957275390625, -0.0065155029296875, 0.2201995849609375, -0.777252197265625, -0.9019775390625, 0.015781402587890625, -0.49005126953125, 0.46417999267578125, 0.2122039794921875, 0.12742233276367188, 0.4926948547363281, 0.1551971435546875, -0.26667022705078125, 0.641204833984375, 0.11197662353515625, -0.14392852783203125, 0.43434906005859375, -0.15081024169921875, -0.26712799072265625, -0.0459747314453125, 0.5607452392578125, 0.5469226837158203, 0.31158447265625, 0.36714935302734375, -0.2549285888671875, -0.399505615234375, -0.5315475463867188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000007.npy"}
{"epoch": 0.010279001468428781, "step": 8, "batch_size": 64, "mean": -0.028942614793777466, "std": 0.4162605106830597, "min": -1.120330810546875, "p10": -0.5512023925781249, "median": -0.003841400146484375, "p90": 0.5433097839355469, "max": 0.9019622802734375, "pos_frac": 0.484375, "sample": [0.1773681640625, 0.196868896484375, -0.3377838134765625, 0.0836639404296875, -0.9000091552734375, 0.07256698608398438, -0.045978546142578125, 0.103851318359375, 0.3939208984375, -0.46561431884765625, -0.3558921813964844, -0.00702667236328125, -0.10202789306640625, 0.0678558349609375, 0.32923126220703125, -0.35579681396484375, 0.047679901123046875, -0.0230712890625, 0.003818511962890625, -0.09091949462890625, 0.600128173828125, -0.8027191162109375, -0.042144775390625, 0.5493927001953125, -0.17336082458496094, -0.9244232177734375, -0.37493133544921875, 0.23165512084960938, 0.5512542724609375, -0.9154205322265625, -0.12860107421875, -0.45557403564453125, 0.3913993835449219, 0.4765167236328125, -0.16391754150390625, -0.21587371826171875, -0.19195556640625, 0.026647567749023438, -0.029413223266601562, 0.7576484680175781, 0.5450057983398438, -0.5878829956054688, -0.3054046630859375, 0.5393524169921875, 0.039722442626953125, -1.120330810546875, -0.74267578125, 0.003326416015625, 0.09845924377441406, 0.13323211669921875, -0.0006561279296875, 0.023712158203125, 0.2012939453125, -0.02359771728515625, 0.9019622802734375, -0.251983642578125, -0.30765533447265625, 0.2846221923828125, -0.209686279296875, 0.28052520751953125, -0.06876754760742188, 0.6702556610107422, -0.018280029296875, 0.1041107177734375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000008.npy"}
{"epoch": 0.011747430249632892, "step": 9, "batch_size": 64, "mean": 0.010411262512207031, "std": 0.42377907037734985, "min": -1.0178756713867188, "p10": -0.5291748046874999, "median": 0.022317886352539062, "p90": 0.5272277832031251, "max": 1.1681194305419922, "pos_frac": 0.53125, "sample": [-0.23404693603515625, -0.0629119873046875, 1.1681194305419922, 0.1597137451171875, 0.147796630859375, -0.3392181396484375, -0.0394439697265625, 0.27527618408203125, 0.2242584228515625, 0.38030242919921875, -0.40057373046875, 0.592437744140625, -0.22249603271484375, -1.017059326171875, -0.25323486328125, -0.20440673828125, -0.567718505859375, 0.06486129760742188, -0.3496246337890625, -0.439239501953125, -0.0656280517578125, 0.046756744384765625, -0.24242401123046875, 0.312469482421875, 0.201171875, 0.14923095703125, -0.184906005859375, 0.48992919921875, -0.002628326416015625, 0.27361297607421875, -0.15695953369140625, 0.0041751861572265625, 0.348388671875, 0.5072860717773438, 0.045284271240234375, -0.0339508056640625, -0.08627700805664062, -0.10452651977539062, -0.65863037109375, 0.8344955444335938, -0.4310760498046875, -1.0178756713867188, -0.8921661376953125, 0.020887374877929688, 0.1662464141845703, 0.18084716796875, -0.2431182861328125, 0.557098388671875, 0.6114578247070312, 0.1102752685546875, -0.27081298828125, -0.1501617431640625, -0.18697357177734375, 0.18709754943847656, 0.25421142578125, 0.648162841796875, 0.200347900390625, 0.4676971435546875, 0.023748397827148438, 0.461822509765625, -0.6551055908203125, 0.5357742309570312, -0.674591064453125, 0.2028656005859375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000009.npy"}
{"epoch": 0.013215859030837005, "step": 10, "batch_size": 64, "mean": 0.019427448511123657, "std": 0.39178046584129333, "min": -1.1516342163085938, "p10": -0.4321557998657226, "median": -0.004876136779785156, "p90": 0.3956089019775393, "max": 1.3614349365234375, "pos_frac": 0.5, "sample": [0.029388427734375, -0.29067230224609375, 0.22713470458984375, 0.09333610534667969, -0.4541301727294922, 0.7452850341796875, 0.04076576232910156, 0.01201629638671875, 0.01802825927734375, 0.04561614990234375, -0.04694366455078125, 0.21155166625976562, -0.38088226318359375, 0.0761566162109375, -0.4800567626953125, 0.337890625, -0.4916648864746094, -0.012065887451171875, 0.02439117431640625, -0.019962310791015625, 0.091827392578125, -0.2649345397949219, -0.011028289794921875, -0.026866912841796875, -0.1923675537109375, -0.062297821044921875, -1.1516342163085938, -0.0734405517578125, 0.4203453063964844, 0.4700145721435547, 0.2417144775390625, -0.1859893798828125, -0.0732421875, 0.02133941650390625, -0.5093307495117188, -0.262603759765625, -0.81640625, 0.2964820861816406, 0.2263336181640625, 0.3355140686035156, -0.22430801391601562, -0.36792755126953125, 0.19207000732421875, 0.501434326171875, 0.13129425048828125, 0.162109375, 0.3199005126953125, 0.5418777465820312, -0.0945587158203125, 0.232452392578125, -0.15096282958984375, -0.03404045104980469, -0.0756378173828125, -0.10376358032226562, -0.174102783203125, -0.21347427368164062, 0.328704833984375, 0.25630950927734375, -0.6125717163085938, 1.3614349365234375, 1.2062911987304688, 0.0012760162353515625, -0.06341743469238281, -0.03564453125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000010.npy"}
{"epoch": 0.014684287812041116, "step": 11, "batch_size": 64, "mean": 0.02777191996574402, "std": 0.36676040291786194, "min": -0.8054962158203125, "p10": -0.3768619537353516, "median": -0.031345367431640625, "p90": 0.5138616561889648, "max": 0.86468505859375, "pos_frac": 0.484375, "sample": [-0.3036003112792969, 0.517120361328125, 0.34377288818359375, 0.09087562561035156, -0.2591094970703125, 0.16621017456054688, 0.6322555541992188, -0.08704376220703125, -0.1255474090576172, 0.7477188110351562, 0.1522960662841797, -0.3341064453125, -0.3268623352050781, 0.2755584716796875, -0.8054962158203125, 0.3476295471191406, -0.09375, -0.19010543823242188, -0.2833251953125, -0.1539783477783203, 0.5062580108642578, 0.1825714111328125, -0.49452972412109375, 0.33838462829589844, -0.37599945068359375, 0.8247146606445312, 0.625396728515625, -0.07665252685546875, -0.2901153564453125, -0.23069381713867188, 0.147247314453125, -0.620635986328125, 0.18729782104492188, -0.0439910888671875, -0.07358551025390625, 0.245635986328125, 0.7569580078125, 0.26210784912109375, 0.2735137939453125, -0.28658294677734375, 0.2509193420410156, -0.050899505615234375, -0.48968505859375, -0.718658447265625, 0.031070709228515625, 0.11958694458007812, 0.207244873046875, 0.0024852752685546875, -0.32135009765625, 0.376251220703125, -0.48627662658691406, 0.180084228515625, 0.2828369140625, -0.3772315979003906, 0.3026847839355469, -0.05113792419433594, -0.06538009643554688, -0.207366943359375, -0.01869964599609375, 0.86468505859375, 0.07444000244140625, -0.04630470275878906, -0.1543445587158203, -0.09536361694335938], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000011.npy"}
{"epoch": 0.016152716593245228, "step": 12, "batch_size": 64, "mean": 0.01589377224445343, "std": 0.3374294340610504, "min": -1.0346527099609375, "p10": -0.45523834228515614, "median": 0.02376413345336914, "p90": 0.40777587890625017, "max": 0.78363037109375, "pos_frac": 0.546875, "sample": [0.48101806640625, 0.071868896484375, -0.12683868408203125, -0.509368896484375, -0.810546875, 0.283447265625, 0.13955307006835938, 0.2445831298828125, 0.34458160400390625, 0.4825096130371094, 0.26151275634765625, -0.5654754638671875, 0.04332733154296875, 0.16485595703125, -0.1933746337890625, 0.13349151611328125, 0.15013504028320312, -0.13250732421875, 0.118377685546875, -0.168731689453125, -1.0346527099609375, -0.289764404296875, -0.029388427734375, -0.00440216064453125, -0.20719528198242188, 0.4244384765625, -0.1584014892578125, -0.0047607421875, 0.01457977294921875, 0.5009613037109375, -0.642822265625, 0.4720458984375, -0.03054046630859375, 0.3321418762207031, 0.0344696044921875, -0.022796630859375, 0.03420257568359375, -0.6667327880859375, 0.00540924072265625, -0.12132835388183594, -0.3289337158203125, 0.3103141784667969, 0.20505332946777344, -0.14354324340820312, 0.1369781494140625, 0.32482147216796875, 0.78363037109375, 0.17428207397460938, 0.03294849395751953, 0.24374771118164062, -0.2621898651123047, 0.06993484497070312, -0.19013214111328125, 0.3372688293457031, 0.6312408447265625, 0.133026123046875, -0.5544586181640625, 0.0107269287109375, -0.06208610534667969, -0.00884246826171875, -0.02960205078125, -0.125762939453125, 0.368896484375, -0.0579986572265625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000012.npy"}
{"epoch": 0.01762114537444934, "step": 13, "batch_size": 64, "mean": 0.023847103118896484, "std": 0.3232966959476471, "min": -0.664154052734375, "p10": -0.3289693832397461, "median": 0.015125274658203125, "p90": 0.3691406250000001, "max": 1.1704254150390625, "pos_frac": 0.53125, "sample": [0.4838714599609375, -0.17052841186523438, 0.40778350830078125, -0.2514190673828125, -0.38330078125, -0.25344085693359375, 0.298828125, 0.09400367736816406, 0.381134033203125, -0.012989044189453125, 0.0477294921875, 0.9962310791015625, -0.40389251708984375, -0.2793235778808594, -0.21554946899414062, 1.1704254150390625, -0.2505950927734375, 0.33447265625, -0.18869590759277344, -0.23211669921875, 0.07733917236328125, 0.25122833251953125, -0.22312164306640625, -0.004390716552734375, 0.061885833740234375, -0.664154052734375, 0.027103424072265625, -0.21916580200195312, -0.0532379150390625, -0.6191558837890625, -0.40412139892578125, 0.12197113037109375, 0.06499862670898438, 0.019649505615234375, -0.324188232421875, 0.17093658447265625, -0.1060791015625, -0.33101844787597656, -0.22629356384277344, 0.113616943359375, -0.018123626708984375, 0.2723274230957031, 0.341156005859375, 0.19608306884765625, -0.05809783935546875, 0.010601043701171875, -0.12810134887695312, 0.22226905822753906, 0.024166107177734375, 0.25258636474609375, -0.07651519775390625, 0.5468292236328125, 0.1244964599609375, 0.04857444763183594, 0.16306304931640625, -0.00335693359375, 0.23590850830078125, 0.12216949462890625, 0.49468994140625, -0.43914031982421875, 0.0062408447265625, 0.24922943115234375, -0.257598876953125, -0.10967254638671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000013.npy"}
{"epoch": 0.01908957415565345, "step": 14, "batch_size": 64, "mean": 5.313754081726074e-05, "std": 0.42153263092041016, "min": -1.2278289794921875, "p10": -0.4696866989135742, "median": 0.0053730010986328125, "p90": 0.4686550140380862, "max": 1.0506591796875, "pos_frac": 0.5, "sample": [-0.38890838623046875, -0.15871047973632812, 0.013706207275390625, 0.22587203979492188, 1.0506591796875, -0.24660110473632812, -0.8319091796875, 0.0253753662109375, -0.2462615966796875, 0.10363388061523438, -0.35900115966796875, 0.4111824035644531, 0.7440948486328125, 0.8179244995117188, 0.39899444580078125, -0.513824462890625, -0.4809551239013672, -0.115997314453125, -0.25749969482421875, -0.3498382568359375, 0.6946792602539062, -0.0846405029296875, -0.15162277221679688, 0.16077423095703125, 0.3125457763671875, -0.3008842468261719, -1.2278289794921875, -0.03664398193359375, 0.2661285400390625, 0.2024078369140625, -0.12103271484375, -0.025888442993164062, 0.3480224609375, 0.3351707458496094, 0.10076141357421875, 0.4990882873535156, -0.015361785888671875, 0.162017822265625, 0.1843242645263672, -0.029207229614257812, 0.238800048828125, 0.17496109008789062, -0.9061431884765625, -0.2472686767578125, -0.10059356689453125, 0.07071304321289062, 0.32942962646484375, -0.3740425109863281, 0.4932861328125, -0.4433937072753906, -0.5706024169921875, 0.10198211669921875, 0.14581298828125, -0.12301254272460938, -0.02263641357421875, 0.7050933837890625, -0.002960205078125, 0.407623291015625, -0.9601287841796875, 0.07229232788085938, 0.3695945739746094, -0.14415740966796875, 0.029876708984375, -0.35587120056152344], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000014.npy"}
{"epoch": 0.020558002936857563, "step": 15, "batch_size": 64, "mean": -0.01620301604270935, "std": 0.38533642888069153, "min": -1.3293609619140625, "p10": -0.40866012573242183, "median": -0.025936126708984375, "p90": 0.4247604370117189, "max": 0.9265823364257812, "pos_frac": 0.453125, "sample": [0.018789291381835938, 0.439605712890625, 0.3124523162841797, -0.1499919891357422, 0.570953369140625, 0.3516578674316406, 0.04871368408203125, 0.2871971130371094, -0.0204925537109375, -0.16710662841796875, -0.059234619140625, -0.12998580932617188, 0.3395538330078125, -0.41858673095703125, 0.5000133514404297, 0.3901214599609375, -0.147552490234375, -0.10114669799804688, 0.20953369140625, -0.35733795166015625, -0.0965118408203125, -1.3293609619140625, -0.5463027954101562, 0.01107025146484375, -0.34454917907714844, 0.00152587890625, -0.03137969970703125, 0.1158447265625, -0.385498046875, -0.1139373779296875, -0.13155364990234375, -0.1631317138671875, -0.49607086181640625, -0.1487712860107422, 0.2046661376953125, 0.6213531494140625, -0.21678924560546875, 0.1186065673828125, -0.21652984619140625, -0.6952953338623047, -0.0692138671875, 0.12445831298828125, -0.0047359466552734375, 0.2136707305908203, 0.574188232421875, -0.1755847930908203, 0.2775287628173828, -0.11319541931152344, 0.022552490234375, -0.3387794494628906, 0.17215728759765625, 0.36698150634765625, -0.0406951904296875, -0.10869598388671875, 0.30873870849609375, 0.5358505249023438, -1.180145263671875, -0.2887725830078125, 0.25698089599609375, -0.01393890380859375, -0.16178321838378906, -0.6028022766113281, 0.20711898803710938, 0.9265823364257812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000015.npy"}
{"epoch": 0.022026431718061675, "step": 16, "batch_size": 64, "mean": 0.09651932120323181, "std": 0.35328513383865356, "min": -1.0391464233398438, "p10": -0.29063205718994134, "median": 0.12042999267578125, "p90": 0.5285240173339845, "max": 0.713043212890625, "pos_frac": 0.640625, "sample": [0.03875732421875, 0.6842803955078125, -0.1946563720703125, 0.2767333984375, 0.46930694580078125, 0.42128753662109375, 0.4911956787109375, 0.05400848388671875, -0.31878662109375, 0.713043212890625, -0.1993865966796875, -0.20703125, 0.39260101318359375, 0.6732254028320312, -0.04976844787597656, 0.3732147216796875, -0.31829833984375, -0.32842254638671875, 0.00433349609375, 0.2450714111328125, 0.14054107666015625, 0.2940521240234375, 0.437042236328125, 0.20618438720703125, -4.76837158203125e-05, -0.19748878479003906, -0.8096542358398438, 0.005702972412109375, -0.09321212768554688, -0.18494415283203125, 0.23851394653320312, 0.43303680419921875, -0.19303131103515625, 0.32137298583984375, 0.198638916015625, 0.5136184692382812, 0.3076629638671875, 0.57147216796875, 0.3716888427734375, -0.09397697448730469, 0.1399078369140625, 0.05519866943359375, 0.2440471649169922, -0.229339599609375, 0.6157760620117188, 0.23199081420898438, -0.24254417419433594, -0.8019332885742188, -0.31124114990234375, 0.132476806640625, -0.10812187194824219, -0.11568450927734375, 0.274658203125, -0.009115219116210938, 0.0440826416015625, -0.1640472412109375, 0.16747093200683594, 0.1083831787109375, 0.5634002685546875, 0.534912109375, 0.1001129150390625, 0.25406646728515625, 0.04404449462890625, -1.0391464233398438], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000016.npy"}
{"epoch": 0.023494860499265784, "step": 17, "batch_size": 64, "mean": 0.011324524879455566, "std": 0.3592832386493683, "min": -0.7841033935546875, "p10": -0.352098274230957, "median": -0.0064544677734375, "p90": 0.5316787719726563, "max": 0.9109344482421875, "pos_frac": 0.484375, "sample": [-0.2689552307128906, 0.16047286987304688, -0.22268295288085938, 0.370758056640625, -0.21273040771484375, -0.2957344055175781, -0.113922119140625, -0.7841033935546875, 0.18634796142578125, 0.036670684814453125, -0.105438232421875, -0.13458251953125, -0.2284698486328125, -0.413238525390625, -0.2962779998779297, 0.349517822265625, -0.3522911071777344, -0.07095146179199219, 0.035778045654296875, 0.517547607421875, -0.2306365966796875, -0.29184722900390625, 0.3343772888183594, 0.2238006591796875, -0.009124755859375, 0.38527679443359375, 0.43340492248535156, -0.2623634338378906, 0.597412109375, 0.051025390625, -0.6759796142578125, 0.9109344482421875, 0.5551376342773438, -0.1659698486328125, 0.06867790222167969, 0.6396484375, 0.02002716064453125, 0.2182941436767578, -0.35164833068847656, -0.32464599609375, 0.5709228515625, 0.0346832275390625, -0.10540771484375, 0.20757675170898438, -0.559051513671875, -0.1550750732421875, -0.06069183349609375, -0.07542991638183594, 0.001251220703125, 0.0997161865234375, -0.210235595703125, -0.3398094177246094, -0.6446685791015625, -0.5462799072265625, 0.31539154052734375, 0.375823974609375, -0.0037841796875, 0.7371406555175781, 0.5377349853515625, -0.086029052734375, 0.09129524230957031, -0.29663848876953125, 0.21190643310546875, 0.340911865234375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000017.npy"}
{"epoch": 0.024963289280469897, "step": 18, "batch_size": 64, "mean": 0.026239246129989624, "std": 0.3262401521205902, "min": -1.0489654541015625, "p10": -0.3014060974121094, "median": 0.07665443420410156, "p90": 0.4014099121093751, "max": 0.6437606811523438, "pos_frac": 0.578125, "sample": [-0.143707275390625, -0.2994537353515625, 0.45306396484375, -0.116668701171875, -0.052188873291015625, 0.4986839294433594, -0.27852821350097656, -0.925323486328125, 0.141082763671875, 0.6437606811523438, -0.2005157470703125, -0.12743377685546875, 0.0760040283203125, 0.3552131652832031, 0.1116485595703125, 0.3673095703125, -0.047458648681640625, -0.1387176513671875, 0.3765869140625, -0.1982879638671875, 0.15912628173828125, 0.15107345581054688, -0.40301513671875, 0.18439292907714844, 0.10036849975585938, -0.03921318054199219, -0.151458740234375, 0.41204833984375, -0.2591114044189453, 0.30695533752441406, 0.3194999694824219, 0.2786102294921875, -0.036376953125, -0.300567626953125, 0.0565948486328125, 0.27399444580078125, 0.13321685791015625, 0.3084068298339844, -1.0489654541015625, 0.28310394287109375, 0.22904205322265625, 0.0177154541015625, 0.0687408447265625, 0.0882720947265625, 0.4608612060546875, 0.47048187255859375, 0.3071136474609375, -0.11688613891601562, -0.4455108642578125, 0.19963455200195312, 0.18786239624023438, -0.3758659362792969, -0.7141647338867188, -0.07872772216796875, 0.0279693603515625, 0.5001296997070312, -0.2191619873046875, 0.10823631286621094, -0.2808380126953125, 0.26095008850097656, 0.07730484008789062, -0.30176544189453125, 0.0979461669921875, -0.11378097534179688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000018.npy"}
{"epoch": 0.02643171806167401, "step": 19, "batch_size": 64, "mean": 0.05988246202468872, "std": 0.2861297130584717, "min": -0.5090599060058594, "p10": -0.30813941955566404, "median": 0.02366924285888672, "p90": 0.3493053436279297, "max": 0.9522552490234375, "pos_frac": 0.546875, "sample": [0.3526802062988281, 0.0002422332763671875, -0.20765304565429688, 0.13329315185546875, 0.1882171630859375, 0.259490966796875, -0.0780487060546875, -0.3190422058105469, -0.06884574890136719, -0.08531951904296875, 0.1526947021484375, -0.14263916015625, -0.23073959350585938, 0.34035491943359375, -0.32952880859375, -0.41705322265625, -0.23223876953125, 0.2639579772949219, -0.05556488037109375, 0.037750244140625, -0.0066070556640625, 0.24417495727539062, 0.009588241577148438, -0.12017822265625, -0.4752311706542969, 0.24803543090820312, 0.5418548583984375, 0.16313934326171875, -0.05698394775390625, 0.420501708984375, 0.5820083618164062, 0.14160919189453125, -0.08091354370117188, -0.009204864501953125, 0.9522552490234375, -0.10615348815917969, -0.1949615478515625, -0.07738304138183594, 0.05267333984375, 0.1097412109375, -0.011199951171875, -0.200775146484375, 0.33935546875, 0.606292724609375, 0.04769325256347656, 0.25232696533203125, 0.301483154296875, 0.3414306640625, 0.6977996826171875, 0.19278907775878906, 0.165130615234375, 0.28127288818359375, -0.2826995849609375, 0.0895843505859375, 0.00396728515625, 0.09012794494628906, -0.323333740234375, 0.17811203002929688, -0.0312347412109375, -0.421905517578125, 0.32196044921875, -0.17951583862304688, -0.5090599060058594, -0.01709747314453125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000019.npy"}
{"epoch": 0.027900146842878122, "step": 20, "batch_size": 64, "mean": 0.15211281180381775, "std": 0.38557636737823486, "min": -0.6586990356445312, "p10": -0.26917228698730467, "median": 0.11543750762939453, "p90": 0.5921005249023441, "max": 1.5515594482421875, "pos_frac": 0.609375, "sample": [-0.3869895935058594, 0.310882568359375, 0.22196388244628906, -0.022857666015625, 0.0057373046875, 0.1298694610595703, 0.5091400146484375, 0.17955780029296875, 0.48859405517578125, 0.279052734375, -0.6586990356445312, 0.42848968505859375, 0.7163734436035156, -0.06264877319335938, -0.09497451782226562, 0.052883148193359375, 0.033050537109375, 0.17551040649414062, -0.3597545623779297, 0.1738128662109375, 0.2433013916015625, -0.22124481201171875, -0.044219970703125, -0.03753852844238281, 0.43622589111328125, -0.10971832275390625, -0.0943756103515625, 0.39504432678222656, 0.3502006530761719, -0.025960922241210938, -0.15492820739746094, -0.30217742919921875, -0.5554885864257812, 0.18106460571289062, 0.024065017700195312, -0.20550155639648438, 0.779083251953125, -0.05194091796875, -0.24661636352539062, 0.10100555419921875, 0.380584716796875, 0.15667724609375, 0.1892242431640625, 0.8878860473632812, 0.2451934814453125, 0.29149436950683594, 0.4705963134765625, 0.955413818359375, 0.627655029296875, -0.160552978515625, -0.28441619873046875, -0.278839111328125, 0.3108692169189453, 0.362823486328125, 1.5515594482421875, -0.188018798828125, 1.10516357421875, 0.3124351501464844, -0.11514854431152344, 0.02915191650390625, 0.3036003112792969, -0.02939605712890625, -0.0358428955078125, 0.06783294677734375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000020.npy"}
{"epoch": 0.02936857562408223, "step": 21, "batch_size": 64, "mean": 0.11505846679210663, "std": 0.39660173654556274, "min": -0.9960556030273438, "p10": -0.3818216323852539, "median": 0.07438468933105469, "p90": 0.7297256469726563, "max": 1.0662422180175781, "pos_frac": 0.625, "sample": [0.2033233642578125, -0.03424835205078125, 0.7805023193359375, 0.742462158203125, 0.7706527709960938, 0.22596073150634766, 0.009855270385742188, 0.016506195068359375, 0.3812713623046875, -0.0557403564453125, 0.3228759765625, -0.40094757080078125, 0.3468494415283203, 0.06679153442382812, 0.21908950805664062, 0.48541259765625, -0.04139137268066406, -0.16807937622070312, 0.037109375, 0.05762481689453125, -0.548828125, 0.731048583984375, 0.108001708984375, -0.21483230590820312, 0.05200767517089844, 0.6520767211914062, -0.046295166015625, -0.06239509582519531, 0.5764389038085938, 0.16167449951171875, -0.9960556030273438, 0.5279998779296875, 0.052852630615234375, 0.26369476318359375, -0.36943817138671875, -0.18432044982910156, -0.19237136840820312, 0.1498546600341797, 0.7266387939453125, -0.216033935546875, 0.08197784423828125, 0.12769699096679688, 0.29697418212890625, 0.022769927978515625, -0.21583175659179688, -0.3871288299560547, 0.8474845886230469, -0.18141937255859375, 0.10936737060546875, -0.40911865234375, 0.24527549743652344, 0.5511360168457031, 1.0662422180175781, 0.0913848876953125, -0.0264129638671875, -0.11176300048828125, 0.0945892333984375, -0.5001449584960938, 0.9810256958007812, 0.24691390991210938, -0.5160598754882812, -0.11981582641601562, 0.122039794921875, -0.1910400390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000021.npy"}
{"epoch": 0.030837004405286344, "step": 22, "batch_size": 64, "mean": 0.247263103723526, "std": 0.46102046966552734, "min": -0.8531951904296875, "p10": -0.26502151489257814, "median": 0.2120819091796875, "p90": 0.86051025390625, "max": 1.3591690063476562, "pos_frac": 0.71875, "sample": [0.08600997924804688, 1.100860595703125, -0.26677703857421875, 0.10260772705078125, 0.5731964111328125, -0.5881881713867188, 0.320892333984375, 0.5386581420898438, 0.09287643432617188, 0.863128662109375, 1.1410713195800781, -0.26092529296875, -0.23209762573242188, 1.3591690063476562, -0.0782928466796875, 0.16916656494140625, 0.4105377197265625, 0.3733692169189453, -0.1097412109375, -0.16729736328125, 0.010669708251953125, 1.2263031005859375, 0.01593780517578125, -0.1320648193359375, 0.2211589813232422, 0.38123130798339844, 0.8949127197265625, 0.4445991516113281, 0.24617767333984375, 0.854400634765625, -0.18163681030273438, 0.4697265625, 0.0368194580078125, -0.36419677734375, 0.45761680603027344, 0.6244125366210938, 0.6593093872070312, -0.18848419189453125, -0.15864944458007812, 0.308807373046875, 0.652374267578125, 0.19247817993164062, 0.14832305908203125, 0.3493804931640625, -0.338348388671875, 0.781707763671875, -0.2955513000488281, -0.8531951904296875, 0.5826416015625, 0.22174072265625, 0.2603569030761719, 0.11363983154296875, 0.03827857971191406, 0.2030048370361328, -0.1978759765625, -0.6077880859375, 0.435211181640625, 0.3764915466308594, 0.8141860961914062, 0.400146484375, -0.046478271484375, 1.1876068115234375, 0.146820068359375, 0.0044116973876953125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000022.npy"}
{"epoch": 0.032305433186490456, "step": 23, "batch_size": 64, "mean": 0.18317526578903198, "std": 0.37593337893486023, "min": -0.70562744140625, "p10": -0.19160728454589843, "median": 0.12962818145751953, "p90": 0.5946414947509766, "max": 1.3980865478515625, "pos_frac": 0.6875, "sample": [-0.13486671447753906, -0.16989517211914062, 0.33312225341796875, 0.0310516357421875, 0.1945648193359375, 0.5998954772949219, 0.2298126220703125, -0.3118743896484375, -0.11407661437988281, -0.212921142578125, 0.4204559326171875, 1.0560989379882812, 0.58013916015625, 0.1353759765625, 0.043315887451171875, -0.31177520751953125, 0.4287586212158203, 0.028308868408203125, -0.109222412109375, 0.2364826202392578, 0.7245407104492188, 0.15151214599609375, 1.349884033203125, 0.1103363037109375, 0.12388038635253906, -0.2009124755859375, 0.106109619140625, -0.33502960205078125, 0.5823822021484375, 0.062652587890625, -0.027767181396484375, 0.6607208251953125, 0.0302581787109375, -0.0028839111328125, 0.3137054443359375, -0.04100799560546875, 0.44399261474609375, 0.0217742919921875, 0.17315101623535156, 0.46805763244628906, 0.013336181640625, 0.2303466796875, -0.07287979125976562, 0.3421516418457031, -0.08550262451171875, 1.3980865478515625, 0.2630805969238281, -0.70562744140625, 0.28009033203125, 0.10760498046875, 0.5370731353759766, 0.7358207702636719, -0.09748458862304688, -0.5030250549316406, 0.2153453826904297, 0.4411773681640625, -0.11603546142578125, -0.06634521484375, -0.08829307556152344, 0.302886962890625, 0.22344970703125, 0.0080718994140625, 0.3005523681640625, 0.39122772216796875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000023.npy"}
{"epoch": 0.033773861967694566, "step": 24, "batch_size": 64, "mean": 0.21142247319221497, "std": 0.3573267161846161, "min": -1.080596923828125, "p10": -0.18994979858398436, "median": 0.2298431396484375, "p90": 0.6399824142456058, "max": 0.9803466796875, "pos_frac": 0.71875, "sample": [0.687591552734375, 0.4399604797363281, 0.21106529235839844, -0.0088043212890625, 0.5631809234619141, 0.1609821319580078, 0.809356689453125, -0.16931915283203125, 0.5280914306640625, -0.1084136962890625, 0.4434070587158203, 0.008701324462890625, 0.06855010986328125, 0.34490203857421875, 0.32170867919921875, -0.13644981384277344, -0.3753395080566406, 0.07886505126953125, 0.12879180908203125, 0.27022552490234375, 0.4540252685546875, -0.312835693359375, -0.27517127990722656, 0.5384597778320312, -0.112701416015625, 0.4332084655761719, 0.44516754150390625, -1.080596923828125, -0.0339202880859375, 0.41693878173828125, 0.9803466796875, 0.6728973388671875, 0.1990966796875, -0.11780548095703125, 0.05754852294921875, 0.2415771484375, 0.5253753662109375, 0.1407623291015625, -0.1099090576171875, 0.44022369384765625, 0.8675537109375, 0.4788341522216797, -0.3144645690917969, 0.382110595703125, 0.32271575927734375, 0.8817672729492188, 0.218109130859375, 0.13895606994628906, 0.48358154296875, 0.09185218811035156, 0.3953666687011719, 0.06707191467285156, 0.35755157470703125, -0.04183387756347656, 0.3849029541015625, 0.333740234375, 0.018329620361328125, -0.073577880859375, 0.27281951904296875, -0.36466217041015625, 0.39409637451171875, -0.0457305908203125, -0.19879150390625, 0.71099853515625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000024.npy"}
{"epoch": 0.03524229074889868, "step": 25, "batch_size": 64, "mean": 0.2554857134819031, "std": 0.39530834555625916, "min": -0.6059188842773438, "p10": -0.15473327636718748, "median": 0.2021942138671875, "p90": 0.8444335937500002, "max": 1.078125, "pos_frac": 0.765625, "sample": [0.215911865234375, 0.12840843200683594, 0.704620361328125, 0.6676406860351562, 0.054698944091796875, 0.12912940979003906, -0.47432708740234375, 0.9505996704101562, 0.308685302734375, 0.019374847412109375, -0.1597137451171875, 0.951690673828125, 0.030918121337890625, 0.8912506103515625, 1.078125, -0.4395923614501953, -0.5319881439208984, 0.8894882202148438, -0.06626129150390625, 0.8563308715820312, -0.6059188842773438, -0.06479644775390625, 0.5118217468261719, 0.8166732788085938, 0.49845123291015625, 0.0694427490234375, 0.5749740600585938, 0.46379852294921875, -0.107666015625, -0.10626411437988281, 0.2961883544921875, 0.1280517578125, 0.128448486328125, 0.373931884765625, 0.47066497802734375, 0.4407157897949219, 0.17067527770996094, 0.87860107421875, 0.4818115234375, 0.1773529052734375, 0.064727783203125, 0.78167724609375, 0.1884765625, 0.04351043701171875, 0.01758575439453125, -0.10710906982421875, -0.2348480224609375, -0.10003662109375, 0.6283798217773438, 0.02173614501953125, 0.4077606201171875, 0.506378173828125, 0.5014114379882812, -0.1431121826171875, 0.3129425048828125, 0.03159332275390625, -0.5113067626953125, 0.5395698547363281, 0.398406982421875, 0.35053253173828125, 0.09154510498046875, 0.401824951171875, -0.12625885009765625, 0.4837493896484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000025.npy"}
{"epoch": 0.03671071953010279, "step": 26, "batch_size": 64, "mean": 0.4299335479736328, "std": 0.5889620184898376, "min": -0.6904296875, "p10": -0.1683668136596679, "median": 0.29610729217529297, "p90": 1.120114135742188, "max": 2.3831939697265625, "pos_frac": 0.84375, "sample": [0.2835235595703125, 0.10151100158691406, 0.34417724609375, 0.30180931091308594, 1.00567626953125, 0.60552978515625, 0.47306060791015625, 0.0957794189453125, 0.2787322998046875, -0.10517311096191406, 2.3831939697265625, 0.190399169921875, 2.319610595703125, 0.14935302734375, 0.37468719482421875, 1.169158935546875, -0.1954498291015625, 0.38100433349609375, 0.7093505859375, 0.09361648559570312, 0.24876022338867188, 0.10219383239746094, 0.3724822998046875, 0.2904052734375, 0.711944580078125, 1.54315185546875, 0.05483245849609375, -0.6904296875, 0.321685791015625, 0.957122802734375, -0.5167083740234375, 0.218017578125, 0.3178539276123047, -0.19799232482910156, -0.001491546630859375, 0.1211700439453125, 1.79541015625, -0.34415435791015625, 0.18628692626953125, 0.8398208618164062, 0.11956787109375, 0.5751419067382812, 1.738037109375, 0.0747222900390625, 0.5174407958984375, 0.5989532470703125, -0.3347282409667969, 0.6658782958984375, 0.2219066619873047, 0.3508720397949219, -0.0382843017578125, 0.2509765625, -0.3710975646972656, 0.26959991455078125, 0.10436248779296875, 0.568084716796875, 0.8727264404296875, 0.763885498046875, 1.2238082885742188, 0.007232666015625, 0.6449851989746094, 0.15916824340820312, 0.826141357421875, 0.41645240783691406], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000026.npy"}
{"epoch": 0.0381791483113069, "step": 27, "batch_size": 64, "mean": 0.46608299016952515, "std": 0.5408991575241089, "min": -0.5565872192382812, "p10": -0.09498920440673826, "median": 0.30670738220214844, "p90": 1.133184051513672, "max": 2.2235488891601562, "pos_frac": 0.828125, "sample": [-0.035350799560546875, 0.9052505493164062, 0.6871261596679688, 0.7528076171875, 0.5954399108886719, 1.1438522338867188, 0.108612060546875, 0.0456390380859375, 1.52545166015625, -0.3098011016845703, 0.7328929901123047, -0.221435546875, 1.179473876953125, 0.3321533203125, 0.876800537109375, 0.13103485107421875, 2.2235488891601562, -0.10427093505859375, 0.5575485229492188, 0.060516357421875, -0.026149749755859375, 0.009033203125, 0.1873016357421875, 0.22472572326660156, 0.13863372802734375, 0.5499153137207031, 0.18090248107910156, 0.11798667907714844, 0.4883995056152344, 1.83978271484375, 0.13874053955078125, -0.0693359375, -0.07333183288574219, 0.875396728515625, 0.9284744262695312, 0.3025360107421875, 0.4327392578125, 0.5574798583984375, 0.6433944702148438, 0.14331817626953125, -0.5565872192382812, 0.72467041015625, -0.123687744140625, 0.8512954711914062, 0.5016937255859375, 0.76824951171875, -0.21129608154296875, 0.27396202087402344, 0.872161865234375, 0.8989105224609375, 1.5537872314453125, 0.05841064453125, 0.24551010131835938, 0.09275054931640625, 0.7711029052734375, 0.33538818359375, 1.6903305053710938, -0.13792991638183594, 0.3108787536621094, 0.25600433349609375, 0.213836669921875, 1.1082916259765625, 0.28549957275390625, 0.2688446044921875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000027.npy"}
{"epoch": 0.039647577092511016, "step": 28, "batch_size": 64, "mean": 0.3189205825328827, "std": 0.6242707967758179, "min": -1.63067626953125, "p10": -0.39947433471679683, "median": 0.2976245880126953, "p90": 0.9932178497314454, "max": 1.9360809326171875, "pos_frac": 0.703125, "sample": [0.9654655456542969, 0.7162322998046875, -0.13414764404296875, 0.6114540100097656, 0.5776214599609375, -0.23902130126953125, 0.059307098388671875, 0.03575897216796875, -0.04664421081542969, -0.04770469665527344, 0.09030914306640625, 0.441925048828125, 1.3653564453125, 0.25151824951171875, 0.92498779296875, -0.16562843322753906, 0.48978424072265625, 1.0051116943359375, 0.27591705322265625, 0.543853759765625, 0.6002693176269531, -0.4259033203125, -0.4999237060546875, 1.5609130859375, 0.0639495849609375, 0.8161392211914062, 0.5628814697265625, 0.4145355224609375, 1.56781005859375, 0.3931159973144531, 0.7959976196289062, 0.4145965576171875, 0.138519287109375, -0.45587921142578125, 0.7448158264160156, 0.471343994140625, 0.4978599548339844, 0.141448974609375, 0.2868232727050781, 0.14005279541015625, 0.3084259033203125, 0.5817604064941406, -0.10134315490722656, 1.080535888671875, 0.8365478515625, 0.933074951171875, 0.5412445068359375, 0.25595664978027344, 0.7060127258300781, -0.11983489990234375, 0.883941650390625, -1.63067626953125, -0.6787261962890625, -0.0234222412109375, -0.5060195922851562, 0.14152908325195312, -0.0298004150390625, 1.0771484375, 1.9360809326171875, -0.12415695190429688, -0.33780670166015625, -1.4290771484375, -0.113861083984375, 0.27256011962890625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000028.npy"}
{"epoch": 0.041116005873715125, "step": 29, "batch_size": 64, "mean": 0.5686285495758057, "std": 0.5457553863525391, "min": -0.3347129821777344, "p10": -0.1352821350097656, "median": 0.5650901794433594, "p90": 1.2697410583496096, "max": 2.08251953125, "pos_frac": 0.828125, "sample": [0.741180419921875, -0.036815643310546875, 0.4150390625, 0.6915626525878906, 0.31160736083984375, 0.7808742523193359, 0.07991790771484375, 1.4778366088867188, 0.5585670471191406, -0.14162445068359375, 0.20035552978515625, 0.06468772888183594, 1.1066360473632812, 0.8407516479492188, -0.26105499267578125, 1.3080291748046875, 0.9222259521484375, 0.09912490844726562, 0.020494461059570312, 1.2858428955078125, -0.02942657470703125, 0.6258468627929688, 1.0051651000976562, 0.6928043365478516, 1.2321701049804688, 0.29042816162109375, -0.3347129821777344, 1.0624771118164062, 0.524749755859375, 0.633636474609375, 0.27453041076660156, 0.8551025390625, 1.7552490234375, 0.05702972412109375, 0.5461387634277344, 0.3337993621826172, -0.30336761474609375, -0.011327743530273438, 1.3654556274414062, 2.08251953125, 0.822174072265625, 1.1230316162109375, 0.6301994323730469, 0.5375137329101562, 0.5716133117675781, 0.08289527893066406, 0.9600067138671875, -0.1204833984375, 0.7048301696777344, -0.17876625061035156, 0.2756977081298828, 0.13750648498535156, 1.0698776245117188, 0.098388671875, 1.1164894104003906, 0.8090667724609375, -0.1999969482421875, 0.7437973022460938, 0.2559547424316406, 0.5501670837402344, 1.5468673706054688, 1.0006484985351562, -0.291748046875, 1.0229911804199219], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000029.npy"}
{"epoch": 0.042584434654919234, "step": 30, "batch_size": 64, "mean": 0.6996806859970093, "std": 0.6066210865974426, "min": -0.610565185546875, "p10": 0.04527950286865236, "median": 0.731471061706543, "p90": 1.368218994140625, "max": 2.55706787109375, "pos_frac": 0.90625, "sample": [0.7409782409667969, 0.28350067138671875, 0.09732246398925781, 0.703094482421875, 1.2862396240234375, -0.14585113525390625, 0.07787322998046875, 0.164581298828125, -0.28499412536621094, 0.8425445556640625, 0.89739990234375, 1.0957489013671875, 1.0861434936523438, 1.0431709289550781, 1.336517333984375, 1.0981903076171875, 0.8512420654296875, 1.0841522216796875, -0.11201095581054688, 1.0211334228515625, 1.211273193359375, 1.168701171875, 2.55706787109375, 0.844512939453125, 1.0430984497070312, 0.30181312561035156, 0.9044342041015625, 0.7686080932617188, -0.3128852844238281, 0.3213062286376953, 0.28688812255859375, 0.2049388885498047, 0.66009521484375, 0.07658576965332031, 0.2772712707519531, 0.5197982788085938, 1.711944580078125, 1.92510986328125, 1.381805419921875, 0.6271934509277344, 0.05568695068359375, 1.2842254638671875, 0.4682960510253906, 0.42113494873046875, -0.610565185546875, 1.5158920288085938, 1.75543212890625, 0.7219638824462891, 1.0166549682617188, 1.0006561279296875, 1.2056045532226562, 0.43581390380859375, 0.06388473510742188, -0.19434547424316406, 0.04081916809082031, 1.0245895385742188, 0.8879776000976562, 0.1152191162109375, 0.45757293701171875, 0.05593681335449219, 0.1729888916015625, 1.6869125366210938, 1.0155181884765625, 0.53515625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000030.npy"}
{"epoch": 0.04405286343612335, "step": 31, "batch_size": 64, "mean": 0.4221669137477875, "std": 0.6169999241828918, "min": -0.665679931640625, "p10": -0.30914611816406246, "median": 0.36957359313964844, "p90": 1.1499885559082035, "max": 2.9095535278320312, "pos_frac": 0.78125, "sample": [0.2941913604736328, 1.0554389953613281, -0.6326141357421875, 0.4036598205566406, 1.4891433715820312, 0.5424957275390625, 0.06452560424804688, 0.16419601440429688, 0.20407485961914062, 0.0762176513671875, 2.9095535278320312, 0.158966064453125, 0.44274139404296875, 0.365142822265625, 0.3388404846191406, 0.3004474639892578, 0.49002838134765625, 0.3045005798339844, 1.0207748413085938, 0.6282806396484375, 0.22385406494140625, 0.11023330688476562, 0.266448974609375, -0.5681076049804688, 0.7430992126464844, 0.42023658752441406, 1.08355712890625, 0.2973747253417969, 0.8204326629638672, -0.3178253173828125, 0.819122314453125, 0.43975830078125, 1.5006561279296875, -0.5476226806640625, -0.20386505126953125, -0.665679931640625, -0.07118988037109375, 0.6570892333984375, -0.07804107666015625, 0.8713226318359375, 0.688079833984375, 0.8953628540039062, -0.459381103515625, 0.4508686065673828, -0.03519439697265625, -0.2677764892578125, 0.00708770751953125, 1.0014495849609375, -0.2888946533203125, 0.007923126220703125, 0.15137100219726562, 1.2916717529296875, 0.7843475341796875, -0.4615325927734375, -0.1338958740234375, 0.59619140625, 0.3740043640136719, 0.8997955322265625, 0.07194137573242188, 0.6503067016601562, 1.3508224487304688, 1.2852554321289062, 1.1784591674804688, 0.5589599609375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000031.npy"}
{"epoch": 0.04552129221732746, "step": 32, "batch_size": 64, "mean": 0.7318712472915649, "std": 0.8465555906295776, "min": -0.7093505859375, "p10": -0.24541416168212887, "median": 0.5618495941162109, "p90": 1.985997009277344, "max": 2.71038818359375, "pos_frac": 0.828125, "sample": [0.23009490966796875, 0.10318756103515625, 0.7088165283203125, 1.2976303100585938, 0.8858833312988281, 1.6995086669921875, -0.7093505859375, 0.250030517578125, 0.2775764465332031, 0.8775215148925781, 1.0787277221679688, -0.2560577392578125, -0.4845695495605469, 0.12293624877929688, -0.340179443359375, 1.3706207275390625, -0.3935661315917969, 1.2418556213378906, -0.3910408020019531, 2.581756591796875, 0.4161949157714844, 1.2459030151367188, 1.925445556640625, 0.44429779052734375, 1.1422195434570312, 0.6570205688476562, -0.04138946533203125, -0.1061248779296875, -0.5935783386230469, 0.9128036499023438, 0.6270217895507812, 0.25560569763183594, 0.4671173095703125, 2.288604736328125, 1.9234294891357422, 0.6898059844970703, 0.8352813720703125, 0.7910003662109375, -0.014484405517578125, 0.49378204345703125, 2.3154373168945312, 0.851287841796875, 0.03224945068359375, 0.3795585632324219, 2.71038818359375, 0.38059234619140625, 0.2056427001953125, 2.59722900390625, 2.0119476318359375, -0.2205791473388672, 0.4155540466308594, 0.5466499328613281, 2.4913406372070312, 0.9705429077148438, 0.01207733154296875, 0.19971656799316406, 0.6823959350585938, 0.87237548828125, 0.2564868927001953, 1.763763427734375, 0.2623615264892578, 0.5770492553710938, 1.87286376953125, 0.14149093627929688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000032.npy"}
{"epoch": 0.04698972099853157, "step": 33, "batch_size": 64, "mean": 0.5908748507499695, "std": 0.6606138348579407, "min": -1.0391387939453125, "p10": -0.12511863708496093, "median": 0.5877876281738281, "p90": 1.410595703125, "max": 2.302490234375, "pos_frac": 0.84375, "sample": [-0.13026046752929688, 0.4276084899902344, 0.4140892028808594, 0.7552871704101562, 2.302490234375, 0.2228851318359375, 0.6017379760742188, 0.958709716796875, 0.27658843994140625, 0.7666397094726562, 0.7430648803710938, 1.0872344970703125, 1.73583984375, 0.34767913818359375, 0.97894287109375, 0.6629791259765625, 0.7540283203125, -1.0032463073730469, 0.5738372802734375, -1.0391387939453125, 0.04793548583984375, 1.133270263671875, 0.94354248046875, 0.9137725830078125, 1.4022750854492188, 0.42824554443359375, 1.4530487060546875, 0.091400146484375, 0.21152496337890625, 2.1522216796875, -0.08012008666992188, -0.1810455322265625, 2.2757415771484375, -0.11312103271484375, -0.060375213623046875, 0.7838096618652344, 0.1813507080078125, 0.31467437744140625, 1.0837974548339844, 0.39481353759765625, 0.2931632995605469, 0.6946258544921875, 0.5003585815429688, 0.16172409057617188, 0.5575714111328125, 1.4141616821289062, 0.5436325073242188, 1.4144287109375, 0.7289009094238281, -0.22916030883789062, 0.46044158935546875, -0.23496627807617188, 0.6218643188476562, 0.08536911010742188, 0.187103271484375, 0.8430404663085938, 1.3472900390625, -0.8517379760742188, 0.9647293090820312, 0.5318450927734375, 0.628692626953125, 0.6247329711914062, 0.6773681640625, 1.0370521545410156], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000033.npy"}
{"epoch": 0.048458149779735685, "step": 34, "batch_size": 64, "mean": 0.7414963841438293, "std": 0.849351704120636, "min": -0.7437744140625, "p10": -0.1380664825439453, "median": 0.6068229675292969, "p90": 1.9038276672363283, "max": 3.45965576171875, "pos_frac": 0.78125, "sample": [2.192291259765625, -0.5267448425292969, 0.6811752319335938, 0.1443328857421875, 1.8619842529296875, 0.6015777587890625, 1.256500244140625, 1.1399383544921875, -0.2608642578125, -0.5101089477539062, 0.1122283935546875, -0.008771896362304688, 1.0056228637695312, 0.4791107177734375, 1.061614990234375, 2.28778076171875, 0.3751373291015625, -0.1415557861328125, 2.1474761962890625, 0.27935791015625, 1.0798492431640625, -0.40943145751953125, 0.8063163757324219, 1.6694984436035156, 1.54595947265625, 1.7777862548828125, 1.425384521484375, -0.24279022216796875, 0.3285980224609375, 1.108306884765625, -0.10948944091796875, 0.6882553100585938, 1.2830657958984375, 1.0886650085449219, 2.0906982421875, 0.8274726867675781, 0.6099090576171875, 2.764373779296875, 0.24811744689941406, -0.11396408081054688, 0.3494682312011719, 0.7358589172363281, 0.9547138214111328, 1.3290176391601562, 0.7212448120117188, 0.760162353515625, 0.10742378234863281, 0.17151260375976562, 1.2428436279296875, -0.02661895751953125, 0.3629875183105469, -0.7437744140625, 0.33602142333984375, 0.5193023681640625, 0.29425048828125, -0.08425712585449219, 1.9217605590820312, 0.046627044677734375, -0.04630279541015625, 0.6037368774414062, 1.6664886474609375, 3.45965576171875, 0.25890541076660156, -0.12992477416992188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000034.npy"}
{"epoch": 0.049926578560939794, "step": 35, "batch_size": 64, "mean": 1.1745426654815674, "std": 1.1822210550308228, "min": -0.2684345245361328, "p10": 0.061649322509765625, "median": 0.8926811218261719, "p90": 2.315393829345703, "max": 7.363677978515625, "pos_frac": 0.953125, "sample": [1.5247001647949219, 1.1354827880859375, 2.8997802734375, 1.0654411315917969, 0.04741668701171875, 0.7577457427978516, 0.915496826171875, 0.06058502197265625, -0.069366455078125, 1.637176513671875, 1.726776123046875, 0.0960845947265625, 0.6523056030273438, 0.5105476379394531, 0.4268989562988281, 0.9550704956054688, 2.022777557373047, 3.497222900390625, 1.0981216430664062, 0.2814598083496094, 0.9639110565185547, 1.5709915161132812, 3.2926483154296875, 0.5340499877929688, 0.44646453857421875, 0.5501861572265625, 0.23268508911132812, 0.6949844360351562, 0.0641326904296875, 0.8834457397460938, 7.363677978515625, 1.48809814453125, 0.09918212890625, 1.4836959838867188, 0.5339527130126953, 3.5717926025390625, 0.4996299743652344, 1.664703369140625, 2.5302734375, 2.2084426879882812, 0.70343017578125, 1.8743667602539062, 0.90191650390625, 1.2433319091796875, 0.3858757019042969, 0.05805015563964844, 1.73419189453125, 2.3169708251953125, 1.172821044921875, 0.1425323486328125, 0.8449935913085938, -0.0073089599609375, 0.6373977661132812, 1.4675750732421875, 1.567535400390625, 0.8492679595947266, 0.8448486328125, -0.2684345245361328, 1.1583786010742188, 0.28160858154296875, 0.8027591705322266, 2.3117141723632812, 2.1954421997070312, 0.03279876708984375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000035.npy"}
{"epoch": 0.0513950073421439, "step": 36, "batch_size": 64, "mean": 1.1011340618133545, "std": 1.2764753103256226, "min": -2.450225830078125, "p10": -0.1820869445800781, "median": 0.8459091186523438, "p90": 3.167160797119141, "max": 4.2804412841796875, "pos_frac": 0.8125, "sample": [1.3394088745117188, -0.24377059936523438, 1.0455474853515625, 0.4978790283203125, 1.8155326843261719, 0.849761962890625, 1.1838912963867188, -0.40039825439453125, -0.06611824035644531, 2.72021484375, 2.0224380493164062, -0.20326614379882812, -0.18990325927734375, 0.186553955078125, -0.09863090515136719, 0.8420562744140625, -0.1630535125732422, 1.725555419921875, 1.4929046630859375, 0.3912506103515625, 0.05120849609375, -0.2270355224609375, 0.2670135498046875, 0.13482666015625, 2.7209320068359375, 3.5640792846679688, 1.6102523803710938, -0.982330322265625, 3.29107666015625, 0.5204849243164062, 0.9694728851318359, 1.6516380310058594, -0.07991790771484375, 3.2120285034179688, 0.2315216064453125, 0.10038375854492188, 2.4114990234375, 0.15665435791015625, 1.452301025390625, 0.6560516357421875, 2.1680755615234375, 0.9544296264648438, 3.062469482421875, 0.8369350433349609, 3.570587158203125, 0.9042625427246094, 0.12331771850585938, 4.2804412841796875, 0.8084716796875, -2.450225830078125, 2.325817108154297, 0.81256103515625, 2.2373123168945312, 1.110382080078125, 2.2548828125, 1.090658187866211, -0.163848876953125, 0.6367588043212891, 3.437255859375, 0.5859413146972656, 1.12841796875, 3.27685546875, 0.727081298828125, 0.2937469482421875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000036.npy"}
{"epoch": 0.05286343612334802, "step": 37, "batch_size": 64, "mean": 1.0015329122543335, "std": 1.4108997583389282, "min": -2.07977294921875, "p10": -0.1700254440307617, "median": 0.6198768615722656, "p90": 2.974864196777344, "max": 6.2856597900390625, "pos_frac": 0.796875, "sample": [1.5132522583007812, 0.04381370544433594, 0.8367767333984375, 0.7354736328125, 0.9010543823242188, -0.039089202880859375, -0.2788238525390625, 0.5090789794921875, 1.0849227905273438, -0.18346595764160156, 0.30391693115234375, 3.2512359619140625, 0.39681243896484375, -1.348541259765625, 0.151885986328125, 0.32877349853515625, 0.6834259033203125, 0.08148193359375, 1.745779037475586, 6.2856597900390625, 1.4625396728515625, 4.1486053466796875, 1.2221145629882812, 1.0434150695800781, 0.9316940307617188, 1.759979248046875, 0.20124053955078125, 0.41481781005859375, 0.8441009521484375, 1.0918598175048828, 0.5173816680908203, -0.1256866455078125, 0.3112068176269531, -1.1337966918945312, 0.5563278198242188, -0.08317184448242188, 0.34050941467285156, 2.89154052734375, 1.9038314819335938, 2.8191070556640625, -0.0599365234375, 3.0105743408203125, -0.13866424560546875, 1.5442352294921875, -0.6446113586425781, 0.8588027954101562, -0.01639556884765625, -0.1960296630859375, 0.6966705322265625, 2.2077255249023438, 2.5371246337890625, 0.20570755004882812, 3.0464630126953125, 3.1958084106445312, 0.35974693298339844, 2.7490310668945312, 1.6562652587890625, -2.07977294921875, 0.16106796264648438, 0.12070465087890625, 2.020641326904297, 4.089691162109375, 0.17902374267578125, 0.4731903076171875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000037.npy"}
{"epoch": 0.05433186490455213, "step": 38, "batch_size": 64, "mean": 1.3221120834350586, "std": 1.6496503353118896, "min": -1.5298805236816406, "p10": 0.056237220764160205, "median": 0.7806434631347656, "p90": 3.259176635742188, "max": 8.046539306640625, "pos_frac": 0.90625, "sample": [0.49150848388671875, 8.046539306640625, 4.84375, 1.8047046661376953, 0.70172119140625, 0.13076400756835938, 1.8595199584960938, 0.10254669189453125, 1.1414718627929688, 1.26983642578125, 1.5853958129882812, 0.7401275634765625, 0.3029937744140625, 2.9864349365234375, 1.1160354614257812, -0.2713356018066406, 0.521453857421875, 0.3951377868652344, 0.03639030456542969, 1.6227035522460938, 0.6442184448242188, 1.5543670654296875, 2.508991241455078, 0.12146568298339844, 3.170074462890625, 0.1464557647705078, 2.0118179321289062, 2.1234893798828125, 1.7757453918457031, 0.3395423889160156, 0.4053325653076172, 3.3361129760742188, -1.5298805236816406, 0.8211593627929688, 0.6828746795654297, 0.2522735595703125, 0.7276992797851562, 5.5003509521484375, 0.5950851440429688, 0.31827545166015625, 3.29736328125, 4.75946044921875, 1.5559158325195312, 0.4913444519042969, 0.49973487854003906, -0.17047882080078125, 0.11911392211914062, 0.17317962646484375, 0.6582183837890625, 1.72637939453125, -1.1938705444335938, 0.4619598388671875, -0.3098716735839844, 1.3735771179199219, 1.3467044830322266, 1.53619384765625, 1.1224212646484375, 5.5736541748046875, 1.0226211547851562, 0.16361236572265625, 1.6567649841308594, -0.4024925231933594, 1.4253463745117188, 2.795166015625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000038.npy"}
{"epoch": 0.055800293685756244, "step": 39, "batch_size": 64, "mean": 1.6867289543151855, "std": 1.4485944509506226, "min": -2.518402099609375, "p10": 0.29853286743164065, "median": 1.4697065353393555, "p90": 4.027692413330079, "max": 5.210113525390625, "pos_frac": 0.953125, "sample": [2.3977279663085938, 1.701080322265625, 4.6212921142578125, 1.8519515991210938, 1.513641357421875, -0.54827880859375, 2.5278778076171875, 2.3859596252441406, 1.4688777923583984, 2.5176029205322266, 0.48162078857421875, 3.4533233642578125, 1.657440185546875, 1.1806221008300781, 4.08935546875, 1.243438720703125, 0.6772956848144531, 0.7644119262695312, 0.119476318359375, 1.082061767578125, 0.894500732421875, 1.822418212890625, 2.2624053955078125, 0.7334918975830078, 1.2111225128173828, 2.2667236328125, 0.115966796875, 1.4705352783203125, 4.613372802734375, 0.23348236083984375, 0.8574485778808594, 0.5054721832275391, 3.2686004638671875, 1.9872970581054688, -2.518402099609375, 1.9522514343261719, 0.36997222900390625, 1.675210952758789, 4.104496002197266, 5.210113525390625, 1.295034408569336, 2.0040130615234375, 4.487415313720703, 0.6518096923828125, 1.2563056945800781, 0.5271759033203125, 0.2945709228515625, 0.8779754638671875, 2.199371337890625, 1.2822952270507812, 0.8295669555664062, 3.8352928161621094, 3.8838119506835938, 2.5375823974609375, 0.3169879913330078, 4.0953369140625, 3.742095947265625, 0.7698001861572266, 1.7720699310302734, 0.30777740478515625, 0.4971599578857422, -0.0031185150146484375, 0.6399269104003906, 1.6271324157714844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000039.npy"}
{"epoch": 0.05726872246696035, "step": 40, "batch_size": 64, "mean": 1.512451171875, "std": 1.7438976764678955, "min": -0.7629852294921875, "p10": -0.2824790954589843, "median": 1.0711898803710938, "p90": 3.7645721435546875, "max": 7.7231903076171875, "pos_frac": 0.828125, "sample": [-0.4135284423828125, 0.1411895751953125, 1.0603790283203125, 1.028860092163086, 1.7771949768066406, 0.5134735107421875, 0.186859130859375, 0.29610443115234375, 0.15835189819335938, 2.137859344482422, 7.2023468017578125, 3.488372802734375, 0.9541225433349609, 3.7558670043945312, 1.2944068908691406, -0.5801925659179688, 2.3892269134521484, 2.08880615234375, 0.29456138610839844, -0.7629852294921875, 3.1932830810546875, -0.314117431640625, 0.39190673828125, -0.34088134765625, 2.2018585205078125, 1.3703994750976562, 0.5303916931152344, 0.9565296173095703, 0.12031173706054688, 0.2124786376953125, 3.7683029174804688, -0.11656570434570312, 3.8477630615234375, -0.1717681884765625, 0.4417304992675781, 4.5487823486328125, 0.056915283203125, 3.5277786254882812, 0.9465293884277344, 1.5561447143554688, 5.031097412109375, 0.6611042022705078, 0.7218856811523438, 7.7231903076171875, -0.4302940368652344, 1.5947799682617188, -0.04363250732421875, 1.983062744140625, 2.226715087890625, 2.1733779907226562, 1.262237548828125, 2.046642303466797, 3.2586288452148438, 1.6889820098876953, 1.082000732421875, 1.4279708862304688, 4.0256500244140625, -0.3003387451171875, 2.2362136840820312, 0.5744590759277344, -0.24080657958984375, 1.8663253784179688, 2.1019935607910156, 0.38657379150390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000040.npy"}
{"epoch": 0.05873715124816446, "step": 41, "batch_size": 64, "mean": 1.8094090223312378, "std": 1.9684220552444458, "min": -3.2890777587890625, "p10": -0.021218872070312485, "median": 1.354264259338379, "p90": 4.181517791748048, "max": 9.144859313964844, "pos_frac": 0.875, "sample": [0.8503684997558594, 1.5068817138671875, 3.5736541748046875, 1.4082870483398438, 1.1696090698242188, 2.6007308959960938, 3.9390716552734375, 2.509082794189453, 0.0080718994140625, 3.0373001098632812, 5.44384765625, 0.5498580932617188, 1.8812255859375, 9.144859313964844, 0.05277061462402344, 0.3111724853515625, 0.776763916015625, 0.850128173828125, 0.14526748657226562, -0.48980712890625, 1.4314994812011719, 3.0837440490722656, 4.382057189941406, 1.3333415985107422, 6.1369781494140625, 1.3751869201660156, 3.362884521484375, 3.9105224609375, 0.35706329345703125, 0.2812538146972656, 2.507640838623047, 0.892364501953125, 6.809600830078125, 0.85107421875, 1.2175846099853516, 3.043182373046875, 1.6057205200195312, 3.122406005859375, -0.6009368896484375, 2.4995803833007812, 4.285423278808594, 1.6477527618408203, -3.2890777587890625, 3.4684600830078125, -0.02695465087890625, 0.786865234375, 1.6989898681640625, 1.5643196105957031, 2.1507110595703125, 0.4725608825683594, 1.3150787353515625, 0.8275680541992188, 0.5711669921875, 0.93804931640625, 0.7194919586181641, 1.1679134368896484, -0.00783538818359375, 3.3533477783203125, 1.2237396240234375, -0.09099197387695312, -0.3142223358154297, -0.2472686767578125, 2.0626220703125, 4.6525726318359375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000041.npy"}
{"epoch": 0.06020558002936858, "step": 42, "batch_size": 64, "mean": 2.4904069900512695, "std": 2.376018524169922, "min": -1.5524139404296875, "p10": 0.015890121459960983, "median": 1.9221668243408203, "p90": 5.569944763183594, "max": 10.22906494140625, "pos_frac": 0.890625, "sample": [1.8225936889648438, 0.28348541259765625, 2.114421844482422, 10.22906494140625, 9.8988037109375, 3.1091842651367188, 0.9190139770507812, 5.921234130859375, 2.610809326171875, 3.1779327392578125, 2.0554122924804688, 5.959320068359375, 2.7102508544921875, 8.51904296875, 0.6374073028564453, 2.0691299438476562, 2.8605728149414062, -0.26638031005859375, 2.453033447265625, 3.4556045532226562, 1.6228446960449219, 1.4062366485595703, 3.9113311767578125, 0.6681976318359375, 1.256866455078125, 2.9759445190429688, -0.5502471923828125, 2.3019866943359375, 2.0627975463867188, -0.5938644409179688, 1.8509712219238281, 5.54925537109375, 0.3610954284667969, 5.760772705078125, 2.3836212158203125, 1.2591629028320312, 1.0391101837158203, 1.4346923828125, 1.525909423828125, -0.4199638366699219, 4.0102081298828125, -0.0034332275390625, 1.5808029174804688, 5.5788116455078125, 1.6645927429199219, 3.7703323364257812, 0.7997589111328125, 0.060977935791015625, 1.274404525756836, 1.03704833984375, 0.5937652587890625, 5.408416748046875, -1.5524139404296875, 0.5328826904296875, 1.8146209716796875, 3.520763397216797, -0.16696548461914062, 1.446258544921875, 4.7826995849609375, 1.9933624267578125, 4.3177947998046875, 5.5362396240234375, 1.055511474609375, 3.9829559326171875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000042.npy"}
{"epoch": 0.06167400881057269, "step": 43, "batch_size": 64, "mean": 2.438110113143921, "std": 1.9205657243728638, "min": -0.8292999267578125, "p10": 0.27041721343994146, "median": 2.307130813598633, "p90": 4.892967987060548, "max": 9.786392211914062, "pos_frac": 0.96875, "sample": [0.6307830810546875, 0.11276626586914062, 0.03696441650390625, 3.6216964721679688, 3.7323837280273438, 0.80743408203125, 3.643535614013672, 2.38360595703125, 2.790231704711914, 6.552345275878906, 0.71234130859375, 2.2186317443847656, 0.7021102905273438, 2.287567138671875, 2.997386932373047, 1.1162109375, 3.9545440673828125, 4.961616516113281, 2.4712066650390625, 0.3151397705078125, 6.37152099609375, 2.7640533447265625, 5.453033447265625, 1.0482749938964844, 3.1356887817382812, 2.4462203979492188, 0.3929176330566406, 2.3143653869628906, -0.10248374938964844, 4.7327880859375, 1.3359146118164062, 2.9021034240722656, 3.6921310424804688, 3.001556396484375, 2.299896240234375, 4.132228851318359, 0.7166671752929688, 1.8565292358398438, 1.6702861785888672, 0.24535369873046875, 1.5655059814453125, 1.6988067626953125, 6.328216552734375, 3.6141281127929688, 1.0537872314453125, -0.8292999267578125, 1.1056575775146484, 2.7051849365234375, 0.966522216796875, 3.77734375, 2.810718536376953, 3.5439910888671875, 2.0813980102539062, 0.2995433807373047, 2.1298599243164062, 9.786392211914062, 3.433685302734375, 1.8174972534179688, 0.2579345703125, 0.48082542419433594, 2.54034423828125, 4.982257843017578, 3.1837844848632812, 0.24741363525390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000043.npy"}
{"epoch": 0.0631424375917768, "step": 44, "batch_size": 64, "mean": 2.7193822860717773, "std": 2.210284948348999, "min": -0.7133941650390625, "p10": 0.5116966247558594, "median": 2.1595420837402344, "p90": 5.692515563964844, "max": 9.66619873046875, "pos_frac": 0.9375, "sample": [4.2884063720703125, 2.2503204345703125, 2.1909103393554688, 0.5285186767578125, 6.7618865966796875, 1.8396453857421875, 3.0453720092773438, 1.0868911743164062, 2.21197509765625, 5.727630615234375, 6.3319549560546875, 0.630126953125, 4.802528381347656, 5.4669189453125, 4.447052001953125, 3.1650390625, 0.6431846618652344, 2.940277099609375, 2.03759765625, 5.6105804443359375, 0.47342491149902344, 2.128173828125, 2.893400192260742, 8.237930297851562, 1.9041595458984375, 3.3544082641601562, 0.29508209228515625, 1.3259963989257812, 5.446693420410156, 1.7427101135253906, 3.0171737670898438, -0.23778533935546875, 2.0258560180664062, 0.7037506103515625, 1.5153236389160156, 3.0524444580078125, 0.5181427001953125, 1.7979049682617188, 4.32904052734375, 2.01556396484375, 1.3422698974609375, 2.1922149658203125, 3.560272216796875, 3.732391357421875, 1.2430534362792969, 2.5252609252929688, 2.0899200439453125, 2.951446533203125, 0.5089340209960938, 0.5596466064453125, 6.1598968505859375, 4.26171875, 2.05133056640625, 3.6667022705078125, 1.0147666931152344, 9.66619873046875, 0.5761756896972656, -0.6185302734375, 3.2357101440429688, 1.6168861389160156, -0.7133941650390625, -0.0726776123046875, 1.5539588928222656, 8.390098571777344], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000044.npy"}
{"epoch": 0.06461086637298091, "step": 45, "batch_size": 64, "mean": 2.1919541358947754, "std": 2.5209994316101074, "min": -1.1938552856445312, "p10": -0.18083419799804687, "median": 1.4141130447387695, "p90": 5.486183929443359, "max": 12.3502197265625, "pos_frac": 0.859375, "sample": [4.184967041015625, 1.7249755859375, 0.16058349609375, 2.3697509765625, 0.07473373413085938, 1.6686382293701172, 3.2391815185546875, 6.4402008056640625, 4.95867919921875, 2.5828704833984375, 1.0183792114257812, 7.827247619628906, 3.4709606170654297, 0.439910888671875, 0.11692237854003906, 1.3400306701660156, -0.18268585205078125, 1.7269973754882812, 0.010499954223632812, 5.123210906982422, 0.2278289794921875, 0.2434234619140625, 2.1636734008789062, 0.9654884338378906, 0.6147613525390625, 4.976295471191406, 6.1761932373046875, 4.044536590576172, 0.26746559143066406, 0.5243015289306641, 5.372711181640625, 2.3870162963867188, 5.504180908203125, -0.4322471618652344, -0.176513671875, 1.2682266235351562, 6.013275146484375, 3.6536407470703125, -0.2721061706542969, 1.6036834716796875, 1.381601333618164, 0.27744293212890625, -1.1938552856445312, -0.6870880126953125, 1.209014892578125, 12.3502197265625, 0.7865219116210938, 6.877342224121094, 3.3195343017578125, -1.0041313171386719, 2.4529590606689453, 1.446624755859375, 1.3780555725097656, 0.2794628143310547, 1.2307472229003906, 5.444190979003906, 1.215606689453125, 2.1721649169921875, 2.1688289642333984, 2.5218582153320312, -0.12314224243164062, 3.758270263671875, 0.6332283020019531, -1.03228759765625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000045.npy"}
{"epoch": 0.06607929515418502, "step": 46, "batch_size": 64, "mean": 2.768871307373047, "std": 3.0934934616088867, "min": -2.8267822265625, "p10": -0.21270217895507806, "median": 2.123495101928711, "p90": 6.630067825317385, "max": 14.551422119140625, "pos_frac": 0.8125, "sample": [0.42546653747558594, 0.05562591552734375, 5.3836669921875, 7.1064300537109375, 3.678253173828125, -2.5135345458984375, 1.7576179504394531, 1.4741554260253906, 6.9354095458984375, 3.523897171020508, 6.244358062744141, 1.2767448425292969, 0.05759429931640625, 2.2836761474609375, 4.31268310546875, -0.2690277099609375, -0.14310455322265625, 0.6915206909179688, 3.8340301513671875, 11.056915283203125, -0.36144065856933594, 0.8853759765625, -0.11211013793945312, 4.0997161865234375, 0.4093437194824219, 4.704925537109375, 4.1768035888671875, 1.2693405151367188, 3.6676788330078125, 2.12115478515625, 4.455314636230469, 0.6159496307373047, -2.8267822265625, 0.6973495483398438, 2.7740936279296875, 5.840877532958984, 5.024662017822266, -0.23860931396484375, 7.852012634277344, -0.152252197265625, 0.4070472717285156, 2.8838119506835938, 4.832328796386719, -0.30450439453125, 14.551422119140625, -0.00421905517578125, 6.135833740234375, 5.474540710449219, 0.739166259765625, 2.2559738159179688, 1.8156890869140625, 8.391525268554688, 3.0540523529052734, -0.03755950927734375, 2.125835418701172, 6.795372009277344, 1.3182029724121094, 2.306455612182617, 3.2516708374023438, 5.173271179199219, 1.2641487121582031, 1.2380123138427734, 1.7462615966796875, -0.28235626220703125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000046.npy"}
{"epoch": 0.06754772393538913, "step": 47, "batch_size": 64, "mean": 2.80666446685791, "std": 2.570779800415039, "min": -2.6370620727539062, "p10": 0.013619995117187558, "median": 2.2681427001953125, "p90": 6.531130599975587, "max": 10.270263671875, "pos_frac": 0.890625, "sample": [1.7166748046875, 3.1649856567382812, -0.0104217529296875, 1.6095256805419922, 4.418743133544922, 1.5114994049072266, 0.487335205078125, 1.8858604431152344, 6.6333160400390625, 2.7983951568603516, 0.0697174072265625, 7.084327697753906, 6.751579284667969, 1.1412582397460938, 3.4716758728027344, 2.3758544921875, -0.10263824462890625, 1.6051521301269531, 1.9674758911132812, 5.137596130371094, 6.027107238769531, 2.3312759399414062, 0.6519412994384766, 3.145557403564453, 3.517791748046875, 1.3310413360595703, -0.6318206787109375, 5.80615234375, 6.6680908203125, 2.830272674560547, -0.43604087829589844, 3.3060359954833984, 6.292697906494141, 4.317657470703125, -2.6370620727539062, 0.16302490234375, 0.18575286865234375, 1.09039306640625, 0.8786163330078125, 10.270263671875, 5.0053863525390625, 2.0808544158935547, 1.4943180084228516, 2.27862548828125, 8.858901977539062, 5.746124267578125, -0.4416046142578125, 3.187196731567383, 9.2816162109375, -0.9581832885742188, 1.7883987426757812, 1.9799957275390625, 0.1484527587890625, 1.8051605224609375, 2.2466506958007812, 1.0516014099121094, 4.60528564453125, 2.257659912109375, 3.7881927490234375, 1.6958141326904297, 3.8943939208984375, 4.1175384521484375, 2.4302520751953125, 2.4572296142578125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000047.npy"}
{"epoch": 0.06901615271659324, "step": 48, "batch_size": 64, "mean": 2.7389891147613525, "std": 2.785430908203125, "min": -6.441497802734375, "p10": 0.10363197326660181, "median": 2.2003002166748047, "p90": 6.426161193847657, "max": 11.09417724609375, "pos_frac": 0.90625, "sample": [3.262237548828125, 0.7569656372070312, 6.4419403076171875, 7.319664001464844, 6.311225891113281, 3.3706512451171875, -1.800384521484375, 2.5313339233398438, 4.6318511962890625, 4.088783264160156, 1.5163040161132812, 5.768959045410156, 2.2342987060546875, 0.597564697265625, 0.34503936767578125, 1.1121253967285156, 6.38934326171875, 0.000171661376953125, 1.0927886962890625, -0.4174003601074219, 1.1147270202636719, 8.147727966308594, -0.6469841003417969, 2.1121292114257812, 5.327095031738281, 1.2557754516601562, 2.7974090576171875, 3.456024169921875, -0.11067962646484375, 3.0705909729003906, 1.9925003051757812, 1.6940059661865234, 3.16705322265625, 1.8202533721923828, 7.9635009765625, 3.596281051635742, 4.458259582519531, 8.583152770996094, 2.919708251953125, 0.8973960876464844, 0.7755279541015625, 1.3269462585449219, 1.1573486328125, 2.886890411376953, 1.02081298828125, 3.6013221740722656, 3.1360931396484375, 1.8395843505859375, -1.2303276062011719, 7.210014343261719, 2.7303524017333984, 3.020761489868164, 1.9195556640625, 11.09417724609375, 4.921875, -6.441497802734375, 4.698909759521484, 1.7966670989990234, 0.5087795257568359, 0.484527587890625, 4.267730712890625, 1.4516410827636719, 2.166301727294922, 1.78192138671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000048.npy"}
{"epoch": 0.07048458149779736, "step": 49, "batch_size": 64, "mean": 3.356088638305664, "std": 3.471686840057373, "min": -3.07098388671875, "p10": -0.04294433593749983, "median": 2.96917724609375, "p90": 7.326277923583985, "max": 14.704437255859375, "pos_frac": 0.890625, "sample": [3.3063735961914062, 0.8133888244628906, 0.2943115234375, 2.9630050659179688, 2.714303970336914, 5.563076019287109, 0.7373123168945312, 2.312307357788086, -1.405914306640625, 0.42775726318359375, 0.1259307861328125, 1.9645843505859375, 2.2429580688476562, 7.6678619384765625, 6.675849914550781, 2.974822998046875, 3.7390480041503906, 2.091815948486328, 2.3448638916015625, 1.39984130859375, 1.7123260498046875, -2.7589454650878906, 3.2172698974609375, -0.1954631805419922, 11.700286865234375, 9.394668579101562, 1.536712646484375, 5.811916351318359, 4.068267822265625, 0.6442012786865234, 7.4405517578125, 14.704437255859375, 3.8772201538085938, 0.6820068359375, -1.6328125, -3.07098388671875, 3.1338119506835938, 3.3371829986572266, 4.366077423095703, 5.29595947265625, 3.133157730102539, -0.1126708984375, 0.1197509765625, 4.209861755371094, 4.84661865234375, 13.002655029296875, 2.4122161865234375, 1.8561325073242188, 7.059638977050781, 5.548187255859375, 6.110694885253906, 3.1447715759277344, 4.4546966552734375, 4.606513977050781, 11.996078491210938, 0.4495105743408203, -0.41143798828125, 2.963531494140625, 1.7446765899658203, 3.80718994140625, 4.0265655517578125, 1.6498184204101562, 4.774322509765625, 1.1790008544921875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000049.npy"}
{"epoch": 0.07195301027900147, "step": 50, "batch_size": 64, "mean": 3.707047939300537, "std": 3.535125494003296, "min": -2.4096546173095703, "p10": -0.7096378326416015, "median": 3.0117454528808594, "p90": 8.530538940429688, "max": 13.529571533203125, "pos_frac": 0.84375, "sample": [6.052764892578125, 0.20886993408203125, -1.68536376953125, 9.328163146972656, 1.5072288513183594, 2.391815185546875, 7.474273681640625, 1.8031425476074219, 3.6125106811523438, 1.8436098098754883, -0.6416816711425781, 6.675872802734375, 3.0979843139648438, 12.6561279296875, 0.6504364013671875, 3.08111572265625, 6.869926452636719, 2.4662418365478516, 8.83380126953125, 3.775543212890625, 0.7377510070800781, 4.9993743896484375, 8.568084716796875, 2.9423751831054688, 1.987640380859375, 4.716407775878906, -0.29620361328125, -0.7387619018554688, 2.7004642486572266, 8.44293212890625, 6.4210662841796875, 5.036582946777344, 8.70870590209961, 13.529571533203125, -2.4096546173095703, 0.4076271057128906, 1.2796707153320312, 2.0442657470703125, 0.17140960693359375, 2.8022918701171875, 2.2296600341796875, 1.7300701141357422, 6.239997863769531, 4.0077362060546875, 7.000492095947266, 6.0527496337890625, -0.94757080078125, 1.8314704895019531, 5.2057952880859375, -0.8681640625, 6.634765625, 3.3581466674804688, 4.430915832519531, 6.372245788574219, 2.0916519165039062, 5.4960479736328125, 6.5753173828125, 2.7881851196289062, 10.908699035644531, -0.7829055786132812, 5.662284851074219, 1.4751510620117188, -2.2632904052734375, -0.03238105773925781], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000050.npy"}
{"epoch": 0.07342143906020558, "step": 51, "batch_size": 64, "mean": 3.674722671508789, "std": 4.60264778137207, "min": -4.665679931640625, "p10": -0.5637231826782226, "median": 2.511260986328125, "p90": 9.433250427246094, "max": 18.908843994140625, "pos_frac": 0.828125, "sample": [9.369430541992188, 3.2958450317382812, 0.7715473175048828, -0.5682029724121094, 0.940704345703125, 0.30029296875, 7.78826904296875, -0.8928680419921875, 2.339447021484375, -0.24468994140625, 9.4923095703125, 7.517925262451172, 1.8078746795654297, 4.087165832519531, 1.8833293914794922, 4.432098388671875, 3.410633087158203, -0.5879287719726562, 9.460601806640625, 5.549266815185547, 4.059900283813477, -0.2360382080078125, 0.009836196899414062, 3.5873565673828125, 1.1284713745117188, 7.254108428955078, 0.4557685852050781, 5.653663635253906, 0.5715599060058594, 4.6332550048828125, 18.908843994140625, 2.9868412017822266, 0.6857528686523438, 4.959739685058594, 3.182445526123047, 13.711196899414062, 1.4334831237792969, 14.173309326171875, 5.189064025878906, 2.146627426147461, 2.15631103515625, 0.8975486755371094, 4.0021514892578125, 8.757118225097656, 1.2587928771972656, 0.2835273742675781, 5.3196868896484375, -3.4984207153320312, -0.5532703399658203, 16.808609008789062, 13.2764892578125, 6.245750427246094, 7.034580230712891, -0.7974319458007812, 1.478017807006836, 5.722709655761719, 0.1902179718017578, 0.17104721069335938, 2.04241943359375, -4.665679931640625, 3.2163162231445312, -1.3966064453125, 2.683074951171875, -0.09894561767578125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000051.npy"}
{"epoch": 0.07488986784140969, "step": 52, "batch_size": 64, "mean": 5.584807872772217, "std": 4.617341041564941, "min": -2.514984130859375, "p10": 0.9172161102294923, "median": 4.505901336669922, "p90": 12.001013946533204, "max": 19.157882690429688, "pos_frac": 0.921875, "sample": [1.4613513946533203, 11.582115173339844, 4.073604583740234, 12.1805419921875, 7.095832824707031, 9.246658325195312, 2.7816238403320312, 7.1138763427734375, 8.3155517578125, 13.14288330078125, -0.09313201904296875, 12.188629150390625, 4.700592041015625, 4.796772003173828, 8.908346176147461, 7.576971054077148, 2.6485977172851562, 5.069698333740234, 16.73967742919922, 8.994606018066406, 3.444925308227539, 1.0340652465820312, 11.138816833496094, 2.7595558166503906, 0.06550407409667969, 6.819000244140625, 2.3781356811523438, 2.08056640625, 8.27287483215332, 19.157882690429688, 9.52191162109375, 1.7492294311523438, 9.251472473144531, 4.444816589355469, 3.2259140014648438, 12.556900024414062, 3.59027099609375, -0.6325531005859375, 4.47064208984375, -2.514984130859375, -2.12591552734375, 2.116790771484375, 6.572425842285156, 5.6193695068359375, 1.3365364074707031, 9.226997375488281, 3.9145240783691406, 11.524871826171875, 6.339351654052734, 1.864145278930664, 0.8671379089355469, 3.0423126220703125, 3.2114334106445312, 4.541160583496094, 3.2468032836914062, 9.314468383789062, 5.821754455566406, 1.5338058471679688, 16.173049926757812, -1.3502197265625, 5.741355895996094, 3.852283477783203, 2.421741485595703, 1.2817726135253906], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000052.npy"}
{"epoch": 0.0763582966226138, "step": 53, "batch_size": 64, "mean": 5.6284685134887695, "std": 6.115919589996338, "min": -3.5033416748046875, "p10": 0.454752349853516, "median": 3.720170021057129, "p90": 14.304370880126955, "max": 26.946990966796875, "pos_frac": 0.921875, "sample": [-1.6468849182128906, 3.1121368408203125, 3.2486724853515625, 3.079559326171875, 0.8256607055664062, 3.6119518280029297, 12.173988342285156, 0.2957916259765625, 7.150302886962891, -1.0926361083984375, 1.1051483154296875, 26.588653564453125, -2.315044403076172, 0.9571685791015625, 6.4677276611328125, 6.875347137451172, 0.9030914306640625, 11.021438598632812, 2.4495162963867188, 12.32977294921875, 4.129570007324219, 5.061119079589844, 12.153411865234375, 6.998779296875, 14.516670227050781, 1.57379150390625, 5.130546569824219, 0.18701934814453125, 5.447273254394531, 1.784494400024414, 16.64581298828125, 8.335380554199219, 6.544960021972656, 7.872673034667969, 3.4118385314941406, 0.9072685241699219, 0.9394569396972656, 3.828388214111328, 1.1417522430419922, 4.0282440185546875, 2.9267654418945312, 6.646055221557617, 1.690155029296875, 13.809005737304688, -3.5033416748046875, 16.399810791015625, 16.718032836914062, 2.9174652099609375, 4.171661376953125, 3.4608917236328125, 1.7888355255126953, 3.911832809448242, 2.5474395751953125, 1.3019943237304688, 3.4117660522460938, 4.1843414306640625, 3.0661792755126953, 26.946990966796875, 3.119190216064453, 13.577728271484375, 4.507038116455078, 4.3586273193359375, 14.573066711425781, -0.08936119079589844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000053.npy"}
{"epoch": 0.07782672540381791, "step": 54, "batch_size": 64, "mean": 4.917665004730225, "std": 4.651973247528076, "min": -0.9445838928222656, "p10": 0.20426082611084015, "median": 3.989757537841797, "p90": 10.909226989746095, "max": 21.959060668945312, "pos_frac": 0.9375, "sample": [7.484344482421875, 4.5594329833984375, 8.179859161376953, 0.6656970977783203, 0.5784454345703125, 2.8033065795898438, 0.7964706420898438, 2.5809974670410156, 5.9441680908203125, 7.610881805419922, 1.2611732482910156, 5.615997314453125, 11.082801818847656, 1.8974132537841797, 3.9271926879882812, 10.4903564453125, -0.092132568359375, 4.885395050048828, 5.0247039794921875, 5.3606719970703125, -0.11218452453613281, 1.585601806640625, 15.202957153320312, 0.7863616943359375, 6.926628112792969, 0.07666778564453125, 4.244731903076172, 5.106964111328125, 5.970848083496094, 5.824462890625, 1.7216911315917969, 0.04293060302734375, 7.579254150390625, -0.09099960327148438, 0.5019779205322266, 3.1254539489746094, 11.821464538574219, 2.8763809204101562, 5.877449035644531, 12.108894348144531, 2.315267562866211, 2.1679916381835938, 21.959060668945312, 10.427200317382812, -0.9445838928222656, 2.7220497131347656, 0.0350189208984375, 12.329437255859375, 0.5612602233886719, 4.0523223876953125, 0.9848480224609375, 4.240068435668945, 9.748329162597656, 1.6981048583984375, 2.2455978393554688, 17.227813720703125, 1.2354850769042969, 4.567493438720703, 2.4092636108398438, 9.73388671875, 2.2274646759033203, 9.21337890625, 1.2348670959472656, 10.504219055175781], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000054.npy"}
{"epoch": 0.07929515418502203, "step": 55, "batch_size": 64, "mean": 5.890453338623047, "std": 6.6879658699035645, "min": -9.9039306640625, "p10": -1.1690765380859374, "median": 5.053969383239746, "p90": 14.747193145751956, "max": 25.826416015625, "pos_frac": 0.828125, "sample": [7.704078674316406, 9.235893249511719, 4.8361358642578125, -2.67852783203125, 22.779052734375, -1.61639404296875, 6.379417419433594, 4.313755035400391, 16.013160705566406, -0.1492919921875, 6.207618713378906, 5.307132720947266, 2.19879150390625, 10.247611999511719, 17.734649658203125, 8.801956176757812, 4.545860290527344, 9.231185913085938, 25.826416015625, 3.2046241760253906, 10.455085754394531, -1.7291259765625, 3.8533096313476562, 8.112388610839844, 22.794403076171875, 0.21263694763183594, 14.174453735351562, 0.293426513671875, 0.3868541717529297, 8.34701156616211, 4.113624572753906, -2.7258453369140625, 5.908672332763672, 0.05822181701660156, 11.207000732421875, 4.2083892822265625, -1.0599632263183594, 0.2676963806152344, 0.47400474548339844, -4.8915863037109375, 9.203372955322266, 2.976736068725586, 1.469970703125, 13.782928466796875, 6.8437652587890625, -1.2158393859863281, 1.0143394470214844, 10.461410522460938, 4.939249038696289, -9.9039306640625, 2.748655319213867, 14.992652893066406, 6.094451904296875, 3.7067909240722656, 7.216365814208984, 5.012113571166992, -1.044586181640625, 8.198631286621094, 7.430747985839844, 12.827644348144531, 5.1239166259765625, -0.06570053100585938, 5.0958251953125, 15.495689392089844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000055.npy"}
{"epoch": 0.08076358296622614, "step": 56, "batch_size": 64, "mean": 5.606152534484863, "std": 6.679159164428711, "min": -8.89626693725586, "p10": -1.2879428863525386, "median": 5.088295936584473, "p90": 14.364923095703132, "max": 24.968490600585938, "pos_frac": 0.8125, "sample": [-1.45343017578125, 7.3796234130859375, -2.975433349609375, 7.99053955078125, -0.7013816833496094, -8.89626693725586, 10.085395812988281, -0.9018058776855469, 3.5935935974121094, -3.821136474609375, 3.3800888061523438, 1.4940223693847656, 5.735008239746094, 15.14727783203125, 1.953500747680664, -0.03772735595703125, -0.2957744598388672, 10.530662536621094, 9.516468048095703, 7.668754577636719, -1.8321304321289062, 10.362930297851562, 2.1685714721679688, 20.5347900390625, 7.5924835205078125, 16.5201416015625, 1.4016685485839844, 0.2507476806640625, 8.885461807250977, 2.5212364196777344, 7.080543518066406, 7.7426910400390625, 9.127731323242188, 1.4452896118164062, 9.657417297363281, 11.46234130859375, 0.23669052124023438, 18.209869384765625, 7.0271759033203125, 5.89508056640625, 5.517721176147461, 4.658870697021484, 2.9949951171875, 19.273651123046875, 6.6587982177734375, 12.320281982421875, 5.836681365966797, 10.15692138671875, 0.637603759765625, 24.968490600585938, 1.1920452117919922, 0.06783866882324219, 3.9619293212890625, 12.5394287109375, -6.7321624755859375, 2.663911819458008, -0.8473052978515625, 20.475006103515625, 4.577106475830078, -3.3767013549804688, 3.1741790771484375, 1.5511970520019531, 5.8325042724609375, 9.008049011230469], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000056.npy"}
{"epoch": 0.08223201174743025, "step": 57, "batch_size": 64, "mean": 6.242837905883789, "std": 5.817812442779541, "min": -9.21466064453125, "p10": -0.08787536621093739, "median": 5.806038856506348, "p90": 14.137241744995118, "max": 21.3720703125, "pos_frac": 0.890625, "sample": [7.739589691162109, 3.141002655029297, 0.01802825927734375, -2.0393829345703125, 9.755950927734375, -9.21466064453125, 10.7418212890625, 0.34499168395996094, 12.63153076171875, 6.336540222167969, 14.961944580078125, 3.7504100799560547, 21.3720703125, 3.138214111328125, 6.810203552246094, 5.350433349609375, -0.7367095947265625, 3.5967636108398438, 6.027191162109375, 19.239059448242188, 10.341136932373047, 15.785354614257812, 10.892608642578125, 3.8889312744140625, 6.658721923828125, 1.1349411010742188, 14.126605987548828, 5.58488655090332, 4.395282745361328, 2.9352798461914062, 0.4316864013671875, 4.365291595458984, 3.4724960327148438, -0.13326263427734375, 6.7720947265625, 7.2381744384765625, -0.5350723266601562, 1.5567207336425781, 2.5216445922851562, 8.782455444335938, 10.67938232421875, 2.1870269775390625, 9.702308654785156, 5.10400390625, 8.716079711914062, -2.302928924560547, 9.023334503173828, 6.825462341308594, 8.743377685546875, 3.5399169921875, 2.391834259033203, 16.7386474609375, 5.091148376464844, 2.9042434692382812, 6.51123046875, 3.6269607543945312, 0.8405399322509766, 12.251899719238281, 11.733467102050781, -5.41064453125, 14.141799926757812, 18.444931030273438, 7.250310897827148, 7.626304626464844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000057.npy"}
{"epoch": 0.08370044052863436, "step": 58, "batch_size": 64, "mean": 7.108301639556885, "std": 7.405789375305176, "min": -8.922439575195312, "p10": -0.44911937713623, "median": 5.067347526550293, "p90": 17.167570495605474, "max": 26.414154052734375, "pos_frac": 0.890625, "sample": [15.09967041015625, -2.0553646087646484, 0.6031379699707031, 4.482082366943359, 2.7222900390625, 4.330524444580078, 19.537105560302734, 1.825531005859375, 13.963020324707031, 2.6263599395751953, 5.6298675537109375, 7.291168212890625, 7.8578643798828125, 4.176769256591797, 2.461986541748047, 12.675247192382812, 10.793609619140625, 8.736209869384766, 4.504827499389648, 3.4539623260498047, 8.04571533203125, 13.908554077148438, 14.378227233886719, -1.092376708984375, 11.804878234863281, 1.6382827758789062, 9.102996826171875, 6.812479019165039, 21.790138244628906, 4.126674652099609, 26.414154052734375, 8.279041290283203, 6.4851226806640625, 0.0041980743408203125, 15.096321105957031, 1.0117988586425781, 13.48809814453125, 3.7053260803222656, 3.0067520141601562, -2.0948944091796875, 1.8677997589111328, 2.817171096801758, 23.483551025390625, -1.1703033447265625, 10.296249389648438, -0.6433982849121094, 0.6537227630615234, 7.4013214111328125, -4.57366943359375, 23.418243408203125, 1.8280410766601562, 5.991077423095703, 0.7687416076660156, 14.337276458740234, 10.715370178222656, -8.922439575195312, 22.029754638671875, 2.323699951171875, 2.5715713500976562, 6.801490783691406, 15.718223571777344, 2.001951217651367, 0.7997627258300781, 17.788719177246094], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000058.npy"}
{"epoch": 0.08516886930983847, "step": 59, "batch_size": 64, "mean": 7.614224433898926, "std": 7.869380474090576, "min": -5.707275390625, "p10": -0.9213077545166015, "median": 6.595268249511719, "p90": 19.740702056884764, "max": 32.718414306640625, "pos_frac": 0.875, "sample": [8.401130676269531, 8.336845397949219, 15.478973388671875, 20.161651611328125, 15.815399169921875, 3.9130401611328125, 32.718414306640625, 20.290390014648438, 7.060905456542969, 15.61553955078125, 0.5831356048583984, 7.928577423095703, -2.964305877685547, 2.3291873931884766, 21.9324951171875, -0.9211235046386719, 2.3601999282836914, 13.863059997558594, 6.6968536376953125, 19.74939727783203, 1.2188606262207031, 2.209400177001953, 1.3269195556640625, 0.9954414367675781, 3.8698577880859375, 25.933822631835938, 2.6757450103759766, 2.105894088745117, 7.473659515380859, -5.6544647216796875, 9.304878234863281, -2.048419952392578, 7.751091003417969, 8.196342468261719, 9.011390686035156, 18.52422332763672, 3.299053192138672, 11.976570129394531, 7.96502685546875, 3.9223709106445312, -5.707275390625, -3.5951614379882812, 5.997241973876953, -0.92138671875, 4.3136138916015625, 9.981185913085938, 7.781352996826172, 2.274505615234375, 8.202400207519531, 2.4385204315185547, 2.3682003021240234, 5.765342712402344, 2.848491668701172, 8.919933319091797, 5.043300628662109, 14.067703247070312, 9.794384002685547, 3.9266891479492188, 2.785125732421875, -2.8341751098632812, 17.687145233154297, 19.720413208007812, 6.493682861328125, 20.55169677734375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000059.npy"}
{"epoch": 0.08663729809104258, "step": 60, "batch_size": 64, "mean": 6.171331405639648, "std": 6.999053955078125, "min": -13.973968505859375, "p10": -1.842310333251953, "median": 6.885014533996582, "p90": 13.115478515625002, "max": 26.70343780517578, "pos_frac": 0.828125, "sample": [7.9696044921875, -4.310211181640625, 1.825021743774414, 12.562946319580078, 0.4719085693359375, 3.2956409454345703, 11.365989685058594, 7.9048004150390625, 0.7384414672851562, -13.973968505859375, 7.54144287109375, -1.8775100708007812, 1.9453887939453125, 4.083112716674805, 11.558792114257812, 5.9930419921875, 4.74908447265625, 11.092117309570312, 3.7798690795898438, 10.318168640136719, 13.74530029296875, 12.695816040039062, 19.049102783203125, 9.305282592773438, 9.354454040527344, 7.324256896972656, -0.8108329772949219, 7.135101318359375, 10.438827514648438, 0.04601860046386719, 9.398059844970703, 18.82347869873047, 3.880603790283203, 6.585380554199219, 7.801906585693359, -1.963979721069336, 10.690872192382812, 9.902755737304688, 2.4729232788085938, -5.4626922607421875, 11.820350646972656, 3.099323272705078, 5.4911651611328125, 8.465118408203125, 4.160667419433594, -1.04949951171875, -10.53610610961914, 16.434555053710938, 19.177734375, 11.987533569335938, 13.295333862304688, 2.8346633911132812, -2.414600372314453, -1.7601776123046875, 10.897405624389648, 6.634927749633789, 26.70343780517578, 9.260147094726562, 0.537994384765625, 9.245710372924805, -1.062957763671875, 8.536163330078125, 4.439918518066406, 1.3200931549072266], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000060.npy"}
{"epoch": 0.0881057268722467, "step": 61, "batch_size": 64, "mean": 7.387622833251953, "std": 10.134418487548828, "min": -10.452583312988281, "p10": -1.7310302734374998, "median": 5.251413345336914, "p90": 18.065901947021487, "max": 56.281768798828125, "pos_frac": 0.8125, "sample": [2.7038040161132812, 1.3917293548583984, -1.8621101379394531, 1.397674560546875, 14.35064697265625, 0.03362464904785156, 0.1748180389404297, 8.00619125366211, -1.3187713623046875, 27.730636596679688, 20.742828369140625, 19.275741577148438, 27.608154296875, 13.252265930175781, 7.5188751220703125, 9.125160217285156, -2.0070533752441406, 1.8825550079345703, 12.00332260131836, 12.185928344726562, 3.68927001953125, 5.782417297363281, 16.150497436523438, 8.170429229736328, 5.61993408203125, 6.792558670043945, 5.243198394775391, 5.2596282958984375, 8.971206665039062, 8.633232116699219, -0.7147293090820312, -0.6837158203125, 12.229957580566406, 16.52313232421875, 16.324615478515625, 22.929443359375, 17.10504150390625, 1.2331466674804688, -1.82452392578125, 13.123977661132812, 3.1807022094726562, 0.3311786651611328, 12.636276245117188, 2.933399200439453, 1.1102752685546875, 18.477699279785156, 1.4281692504882812, 3.3201675415039062, 56.281768798828125, 13.373756408691406, 12.12591552734375, -1.51287841796875, -10.452583312988281, 11.72574234008789, 2.176898956298828, -5.270515441894531, 4.8570556640625, 3.931133270263672, -6.670402526855469, 2.738555908203125, 6.8531341552734375, -5.7715911865234375, -0.96710205078125, 1.2163772583007812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000061.npy"}
{"epoch": 0.08957415565345081, "step": 62, "batch_size": 64, "mean": 5.732213973999023, "std": 7.339388847351074, "min": -9.227935791015625, "p10": -3.0558391571044914, "median": 5.65854549407959, "p90": 14.934059143066406, "max": 30.115447998046875, "pos_frac": 0.859375, "sample": [9.250194549560547, 1.2684860229492188, 0.5452852249145508, 10.746597290039062, 5.664556503295898, 11.2215576171875, 7.339536666870117, 1.0196361541748047, 9.56414794921875, -4.9615478515625, 15.003433227539062, 6.4400787353515625, 1.9186248779296875, 0.058910369873046875, 10.207420349121094, 3.4705467224121094, 10.4508056640625, 5.906982421875, 0.14415740966796875, 5.652534484863281, 6.795661926269531, 7.425457000732422, 1.754119873046875, 5.681879043579102, -4.535121917724609, 8.646286010742188, 4.661643981933594, 8.377883911132812, -0.37162208557128906, 30.115447998046875, 17.871536254882812, 8.574737548828125, 5.703758239746094, 2.5755271911621094, 1.5228652954101562, 5.723518371582031, 0.9726715087890625, 18.37469482421875, -9.227935791015625, -4.2703857421875, -5.8056488037109375, 0.9264392852783203, 1.7256584167480469, 1.08123779296875, 1.7931404113769531, 8.683706283569336, 16.397197723388672, 9.299747467041016, -3.3610267639160156, 14.772186279296875, 8.770771026611328, 5.356487274169922, -6.87994384765625, 3.782163619995117, 13.380683898925781, 3.4814834594726562, 3.0605239868164062, 3.7090682983398438, 27.040802001953125, 20.025466918945312, 7.146707534790039, 1.0474319458007812, -2.3437347412109375, 6.4865570068359375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000062.npy"}
{"epoch": 0.09104258443465492, "step": 63, "batch_size": 64, "mean": 7.671149253845215, "std": 8.070914268493652, "min": -13.110954284667969, "p10": -0.5719120025634759, "median": 7.278438568115234, "p90": 15.728804016113282, "max": 32.68048095703125, "pos_frac": 0.890625, "sample": [25.27782440185547, 7.694664001464844, 4.844324111938477, 9.609535217285156, 3.41888427734375, 1.31036376953125, 5.1015625, -7.4449615478515625, 6.619377136230469, 6.5064697265625, -13.110954284667969, 15.685256958007812, 9.029518127441406, 7.354957580566406, 2.8468990325927734, -1.0427532196044922, 15.747467041015625, 4.3444366455078125, 12.490310668945312, 21.582687377929688, 12.297584533691406, 13.801956176757812, 6.777793884277344, 0.2376861572265625, -0.9028854370117188, 12.406755447387695, -4.000633239746094, 0.20035934448242188, 4.909461975097656, 3.3052215576171875, 12.770858764648438, 8.270759582519531, 7.468109130859375, 9.62314224243164, 10.733489990234375, 12.215263366699219, 13.889884948730469, 13.637657165527344, 5.390972137451172, 2.2433700561523438, 0.50933837890625, 14.434036254882812, 2.8404884338378906, 5.4380645751953125, -10.927345275878906, 13.591772079467773, 7.392982482910156, 1.7838134765625, 1.88140869140625, 7.2019195556640625, 3.544647216796875, 14.421485900878906, 17.782363891601562, 8.330421447753906, 17.890975952148438, 0.8605270385742188, 8.928853988647461, 32.68048095703125, -3.4320144653320312, 14.713863372802734, 4.9174041748046875, 26.30780029296875, 5.601020812988281, 11.1165771484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000063.npy"}
{"epoch": 0.09251101321585903, "step": 64, "batch_size": 64, "mean": 8.976408958435059, "std": 10.173526763916016, "min": -17.38787841796875, "p10": -0.932947540283203, "median": 7.634852409362793, "p90": 21.660649108886723, "max": 47.59423828125, "pos_frac": 0.8125, "sample": [1.187896728515625, 18.13738250732422, 26.823257446289062, 8.311416625976562, 5.077220916748047, 12.300071716308594, 3.0350589752197266, 16.043563842773438, -0.8210010528564453, 6.800350189208984, 2.243499755859375, 10.541831970214844, 1.0373382568359375, 4.693611145019531, 12.095733642578125, 30.961227416992188, 4.768363952636719, -1.9070682525634766, 17.415008544921875, 7.591150283813477, 11.636650085449219, -2.8182601928710938, 29.48077392578125, 8.015087127685547, -0.5021724700927734, 5.851165771484375, -0.9809246063232422, 6.764848709106445, -17.38787841796875, -0.5024261474609375, 7.678554534912109, -0.6576347351074219, 47.59423828125, 5.366950988769531, 2.0636634826660156, 10.82904052734375, 16.969825744628906, 20.475051879882812, 11.088882446289062, 5.886241912841797, 6.008916854858398, 23.379074096679688, 12.0545654296875, 9.276115417480469, -1.1945343017578125, -9.472747802734375, 25.280853271484375, 3.1798477172851562, 22.16876220703125, 0.6676254272460938, 15.008773803710938, 13.561553955078125, 12.501304626464844, -1.4011459350585938, 13.294677734375, -0.34276580810546875, 5.297496795654297, 7.766902923583984, 13.589736938476562, 12.837081909179688, 2.139965057373047, 11.601577758789062, 18.436744689941406, 5.662220001220703], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000064.npy"}
{"epoch": 0.09397944199706314, "step": 65, "batch_size": 64, "mean": 9.088768005371094, "std": 10.020284652709961, "min": -23.389312744140625, "p10": -0.8437259674072259, "median": 7.694366455078125, "p90": 23.021383666992193, "max": 30.32421875, "pos_frac": 0.84375, "sample": [30.32421875, 25.66619873046875, 16.803359985351562, 12.686195373535156, 5.86448860168457, 9.451431274414062, 0.8022994995117188, 14.015739440917969, 3.570859909057617, 21.74602508544922, 17.12281036376953, 1.7490425109863281, -0.23954391479492188, 23.447296142578125, 27.99755859375, 0.8954887390136719, 9.791755676269531, 2.490100860595703, -4.183998107910156, 5.6226654052734375, 1.0704383850097656, 18.790817260742188, 6.4942474365234375, -23.389312744140625, 16.991744995117188, 8.47882080078125, -4.09735107421875, 7.718170166015625, 13.990364074707031, 5.166023254394531, 5.812309265136719, 5.631690979003906, -2.1444969177246094, 5.197118759155273, 29.265487670898438, 7.223960876464844, 22.027587890625, 15.744697570800781, 26.432899475097656, 7.703571319580078, -0.1559600830078125, -12.066574096679688, 9.834396362304688, 1.848855972290039, 7.222381591796875, 18.269363403320312, 23.797706604003906, -1.1026611328125, 6.483154296875, 20.167327880859375, 11.949920654296875, 4.0893707275390625, 20.301422119140625, -1.131540298461914, 12.580718994140625, 7.907257080078125, 6.059120178222656, 2.4541015625, 16.44475555419922, 11.162254333496094, -0.16973495483398438, 8.018409729003906, 0.2991180419921875, 7.685161590576172], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000065.npy"}
{"epoch": 0.09544787077826726, "step": 66, "batch_size": 64, "mean": 8.45463752746582, "std": 10.17934799194336, "min": -9.409027099609375, "p10": -0.5172908782958983, "median": 6.912614822387695, "p90": 19.805652999877932, "max": 48.24018859863281, "pos_frac": 0.828125, "sample": [7.248039245605469, 48.24018859863281, 0.6773910522460938, 15.564697265625, 13.73345947265625, 12.893539428710938, 2.6164493560791016, -0.22998809814453125, 10.434179306030273, 7.376197814941406, -8.13934326171875, 2.3169097900390625, 6.433443069458008, -0.27066802978515625, 12.424934387207031, 3.1626815795898438, 4.806581497192383, 0.15822982788085938, 2.274435043334961, 20.066879272460938, 8.259284973144531, 10.763538360595703, -0.047321319580078125, 10.85589599609375, 18.138145446777344, 2.235260009765625, 4.685813903808594, -3.2585830688476562, -3.8247928619384766, -9.409027099609375, 10.6434326171875, 17.794532775878906, 19.196125030517578, 7.319427490234375, 3.151334762573242, 11.779541015625, 6.691272735595703, -0.5820770263671875, 2.180604934692383, 14.351638793945312, 10.98797607421875, 3.9046707153320312, -0.3661231994628906, 7.1339569091796875, 5.998907089233398, -7.613807678222656, 10.012115478515625, 1.5431785583496094, -3.6217117309570312, 14.21002197265625, 4.3175811767578125, 21.409561157226562, 27.05096435546875, 21.734817504882812, 4.926906585693359, 16.887252807617188, 2.4298477172851562, 8.025569915771484, 14.94091796875, 2.5601539611816406, 34.89381408691406, 1.2935752868652344, 32.039154052734375, 13.68524169921875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000066.npy"}
{"epoch": 0.09691629955947137, "step": 67, "batch_size": 64, "mean": 8.551324844360352, "std": 10.06843090057373, "min": -13.342056274414062, "p10": -1.1327018737792962, "median": 6.409984588623047, "p90": 20.542781829833988, "max": 46.60491943359375, "pos_frac": 0.828125, "sample": [22.363693237304688, 4.088966369628906, 21.90605926513672, 1.0293121337890625, 2.2737979888916016, -0.46974945068359375, 16.753311157226562, 5.12571907043457, -6.137542724609375, 1.9307098388671875, 17.690139770507812, 9.819732666015625, 4.2093353271484375, 16.292633056640625, -0.448455810546875, -4.756628036499023, 8.97119140625, 10.04132080078125, 1.3506622314453125, -13.342056274414062, 19.94434356689453, 8.3671875, -0.438995361328125, -0.137847900390625, 5.301643371582031, 2.360107421875, 20.79925537109375, 6.434835433959961, -1.4168243408203125, 6.9480743408203125, 2.5765533447265625, 14.867176055908203, 7.0198974609375, 2.998384475708008, 2.0813751220703125, 9.935897827148438, 15.483060836791992, 5.979520797729492, 0.8878631591796875, -5.3887176513671875, 5.4168243408203125, 18.715255737304688, 4.772972106933594, 3.665447235107422, 26.473907470703125, 15.265251159667969, 6.385133743286133, 17.928176879882812, 14.0684814453125, 14.467811584472656, 12.501472473144531, 5.08160400390625, 12.162908554077148, 33.3724365234375, 6.733955383300781, 1.2607097625732422, 46.60491943359375, 3.344757080078125, -2.7255783081054688, 25.9844970703125, 19.848182678222656, 7.683979034423828, -3.7782440185546875, 8.75494384765625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000067.npy"}
{"epoch": 0.09838472834067548, "step": 68, "batch_size": 64, "mean": 8.31753158569336, "std": 8.447430610656738, "min": -7.1655426025390625, "p10": -0.08700313568115164, "median": 6.382474899291992, "p90": 20.828923034667973, "max": 37.77430725097656, "pos_frac": 0.890625, "sample": [-4.8774566650390625, 3.3068466186523438, 24.925567626953125, 8.712837219238281, 5.872358322143555, -5.5897369384765625, 7.964324951171875, 23.81207275390625, 1.7555160522460938, 0.62310791015625, 13.93206787109375, 15.666122436523438, 18.574440002441406, 12.110328674316406, 10.683242797851562, -0.52850341796875, 6.257907867431641, 12.94144058227539, 3.1739730834960938, 7.884101867675781, -1.0382461547851562, 4.387273788452148, 4.6673431396484375, 3.17791748046875, 7.675111770629883, 12.232353210449219, 20.049530029296875, 8.202140808105469, 9.139106750488281, 19.956520080566406, 21.162948608398438, 22.47921371459961, 1.6714935302734375, 6.58306884765625, 9.631256103515625, 1.3010177612304688, -0.39133644104003906, 5.835859298706055, 2.6433029174804688, 5.065238952636719, 16.87606430053711, 2.7414627075195312, 10.553102493286133, -7.1655426025390625, 6.827873229980469, 6.507041931152344, 3.95086669921875, 22.0491943359375, 2.4287338256835938, 1.5510292053222656, 2.549978256225586, 37.77430725097656, 24.982635498046875, 16.856109619140625, 12.982795715332031, 1.041778564453125, 1.9285736083984375, 5.316963195800781, 11.772048950195312, -1.9289093017578125, 4.940399169921875, 2.4940872192382812, 5.70489501953125, 7.954826354980469], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000068.npy"}
{"epoch": 0.09985315712187959, "step": 69, "batch_size": 64, "mean": 9.93748664855957, "std": 9.83029556274414, "min": -5.0157928466796875, "p10": -0.16551551818847648, "median": 6.811107635498047, "p90": 20.821404266357426, "max": 45.27044677734375, "pos_frac": 0.875, "sample": [4.66412353515625, 6.587757110595703, 32.16473388671875, 9.149002075195312, 12.562515258789062, 16.221237182617188, -5.0157928466796875, -1.3226852416992188, -0.20058822631835938, 3.8400650024414062, 5.021942138671875, 8.918712615966797, -0.88165283203125, 9.212493896484375, 6.687751770019531, 15.498241424560547, 26.542572021484375, 7.295021057128906, 17.680801391601562, 16.371986389160156, -0.3516082763671875, 24.986038208007812, 13.303398132324219, 10.326911926269531, 18.647926330566406, 14.770950317382812, 4.6484375, 15.795036315917969, 13.144203186035156, 5.385004043579102, 1.0001087188720703, 0.3779182434082031, 19.182571411132812, 45.27044677734375, 4.9377288818359375, 20.154457092285156, 6.284660339355469, 16.123321533203125, 17.287399291992188, 1.0742149353027344, 1.8640213012695312, 16.070556640625, 11.358379364013672, 1.8560981750488281, -0.5877914428710938, 15.264564514160156, 6.4925384521484375, 37.288818359375, 4.32196044921875, 7.862287521362305, 6.178483963012695, -4.7171173095703125, 6.131004333496094, 2.8793182373046875, 2.0725631713867188, 6.9344635009765625, 21.10723876953125, 22.528289794921875, 0.745819091796875, 19.21612548828125, 1.8776130676269531, 1.2636184692382812, 4.726648330688477, -0.08367919921875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000069.npy"}
{"epoch": 0.1013215859030837, "step": 70, "batch_size": 64, "mean": 10.791955947875977, "std": 11.445080757141113, "min": -9.811004638671875, "p10": -1.5642583847045897, "median": 10.46110725402832, "p90": 26.107794189453134, "max": 42.391815185546875, "pos_frac": 0.84375, "sample": [6.343294143676758, 0.180206298828125, 11.456134796142578, 19.51507568359375, 23.638259887695312, 13.030191421508789, -8.120452880859375, -1.7739715576171875, 13.882148742675781, 3.8586082458496094, -2.2632522583007812, 14.0068359375, -2.4215240478515625, 9.224151611328125, 39.27296447753906, 1.8119888305664062, 11.958053588867188, 27.01861572265625, 11.487489700317383, 42.391815185546875, 12.213790893554688, 10.274906158447266, 1.9436798095703125, 29.561843872070312, 0.77630615234375, 17.326980590820312, 19.713287353515625, 13.731294631958008, -1.6665325164794922, 7.582584381103516, 6.1070404052734375, 27.70221710205078, 2.0178909301757812, 0.6578788757324219, 1.6908721923828125, -0.5386428833007812, 14.163951873779297, 3.1229934692382812, 12.667503356933594, -2.3021068572998047, 3.1960182189941406, 3.8225955963134766, -0.237823486328125, 1.91827392578125, 11.258583068847656, 1.99951171875, 20.169570922851562, 23.9825439453125, -9.811004638671875, 5.1452178955078125, 18.297470092773438, 2.761627197265625, 1.5174331665039062, 34.33259582519531, 13.95077133178711, -1.3256187438964844, 35.7313232421875, 10.647308349609375, 21.397789001464844, 17.3144474029541, 4.04673957824707, 23.827194213867188, 13.584278106689453, 21.913970947265625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000070.npy"}
{"epoch": 0.1027900146842878, "step": 71, "batch_size": 64, "mean": 12.418464660644531, "std": 12.390776634216309, "min": -9.510610580444336, "p10": 0.4899227142334003, "median": 10.377716064453125, "p90": 23.54490203857422, "max": 50.80706787109375, "pos_frac": 0.890625, "sample": [-6.572792053222656, 42.203216552734375, 6.307575225830078, 2.2566146850585938, 5.822834014892578, 13.164447784423828, 6.503231048583984, 12.796646118164062, 18.364971160888672, 5.611124038696289, 22.185653686523438, 6.907657623291016, 17.64764404296875, 4.638498306274414, 3.358428955078125, 7.183677673339844, 11.875286102294922, 15.600189208984375, -0.49676513671875, 2.3810958862304688, 4.354026794433594, 46.20509338378906, 7.640941619873047, 7.0852813720703125, 16.91126251220703, 17.387474060058594, 5.592258453369141, 13.224929809570312, 13.322713851928711, -8.575828552246094, 4.248634338378906, 14.138824462890625, 11.070709228515625, -0.2672309875488281, 5.188037872314453, 18.97130584716797, 19.524703979492188, -0.46978759765625, 22.501449584960938, 17.27729034423828, 5.130435943603516, 13.129127502441406, 6.4650115966796875, 17.80224609375, 15.427108764648438, 18.50092315673828, 4.570106506347656, 37.36859130859375, 49.8258056640625, 50.80706787109375, 2.4104080200195312, 23.992095947265625, -9.510610580444336, -1.1466102600097656, 13.990478515625, 6.873653411865234, 8.06783676147461, 11.330528259277344, 19.909278869628906, 15.184837341308594, 31.486907958984375, 4.1837921142578125, 6.226648330688477, 9.684722900390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000071.npy"}
{"epoch": 0.10425844346549193, "step": 72, "batch_size": 64, "mean": 13.379773139953613, "std": 13.739155769348145, "min": -14.553009033203125, "p10": -2.4219781875610327, "median": 11.217761993408203, "p90": 33.76233367919924, "max": 58.5970458984375, "pos_frac": 0.890625, "sample": [18.81884002685547, 16.372236251831055, 6.152107238769531, 13.888710021972656, 6.266963958740234, 3.8085765838623047, 9.432525634765625, 28.13178253173828, -14.553009033203125, 26.5430908203125, 15.820598602294922, 14.466011047363281, 7.353849411010742, 0.32965087890625, 17.080821990966797, 9.435386657714844, 13.434455871582031, 26.091049194335938, 15.403327941894531, 36.395233154296875, 41.034515380859375, -3.601247787475586, 7.702476501464844, 10.569002151489258, 18.354515075683594, 4.109367370605469, 19.28973388671875, 10.694778442382812, 11.740745544433594, 18.03278350830078, 4.104461669921875, 5.476264953613281, 12.591842651367188, -14.03338623046875, 37.314300537109375, 29.420242309570312, 20.36444854736328, 7.347866058349609, 23.587677001953125, -3.8832168579101562, 9.420999526977539, 42.40684509277344, 58.5970458984375, -7.146026611328125, 10.098615646362305, 26.197921752929688, 6.9914093017578125, 8.313423156738281, 1.1180553436279297, 35.62322998046875, -10.983154296875, 4.735080718994141, 2.252826690673828, 18.797988891601562, 14.318679809570312, 20.221511840820312, 10.450878143310547, 16.41173553466797, 36.993621826171875, 12.280710220336914, 7.224910736083984, 3.2545242309570312, -4.142784118652344, 1.9780426025390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000072.npy"}
{"epoch": 0.10572687224669604, "step": 73, "batch_size": 64, "mean": 13.37204360961914, "std": 19.64159393310547, "min": -28.021514892578125, "p10": -9.579868125915526, "median": 10.235583305358887, "p90": 34.883740997314455, "max": 69.15608215332031, "pos_frac": 0.78125, "sample": [33.08259582519531, -14.309700012207031, 7.850765228271484, 15.265052795410156, 7.196393966674805, 37.369651794433594, -7.1258087158203125, 26.275787353515625, 7.219596862792969, -1.1134834289550781, 29.757232666015625, 34.16526794433594, 25.585403442382812, -1.4184188842773438, 0.6644744873046875, -16.427108764648438, 58.584625244140625, -2.2365264892578125, 10.786909103393555, 20.597732543945312, 18.110450744628906, 8.178474426269531, 11.227806091308594, 1.4556732177734375, 2.1983089447021484, 9.684257507324219, -20.142818450927734, 11.341972351074219, 32.13673400878906, 15.545616149902344, 13.870288848876953, 3.0922889709472656, 60.221405029296875, -12.439254760742188, 5.263881683349609, 30.954750061035156, 32.75370788574219, -10.106546401977539, 4.7527008056640625, 2.5990428924560547, -2.68902587890625, -6.383094787597656, -10.301429748535156, 7.710975646972656, 17.130332946777344, 17.33106231689453, 2.8672351837158203, 5.805580139160156, 22.65430450439453, 46.47227478027344, 15.259994506835938, 6.397983551025391, 24.656005859375, 5.495979309082031, 15.370872497558594, -8.3509521484375, 14.10455322265625, 60.78221130371094, 30.348434448242188, 69.15608215332031, 3.2418899536132812, 35.19165802001953, 19.11016082763672, -28.021514892578125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000073.npy"}
{"epoch": 0.10719530102790015, "step": 74, "batch_size": 64, "mean": 14.660597801208496, "std": 21.833105087280273, "min": -41.16600799560547, "p10": -3.3655128479003897, "median": 11.006040573120117, "p90": 34.30225219726563, "max": 128.5081787109375, "pos_frac": 0.796875, "sample": [29.065101623535156, 9.587478637695312, 13.532241821289062, -0.5012226104736328, 26.044021606445312, 7.457206726074219, 10.724445343017578, 58.54651641845703, 2.7162227630615234, 17.37842559814453, 16.186416625976562, 10.481996536254883, 20.772624969482422, 1.4079971313476562, 17.53276824951172, 128.5081787109375, 8.270538330078125, 32.77519989013672, 15.956352233886719, 3.1897125244140625, 1.8276100158691406, 53.77348327636719, 5.068092346191406, 34.956703186035156, 27.191123962402344, 11.056747436523438, -3.974365234375, 0.1475067138671875, -0.7222690582275391, -19.469215393066406, 15.65167236328125, -1.3030319213867188, 0.1075439453125, 9.775825500488281, -41.16600799560547, 22.06005859375, -2.5692596435546875, 27.95626449584961, 4.954050064086914, -4.639560699462891, -10.806583404541016, 11.63720703125, 19.585800170898438, 25.884262084960938, -0.7496337890625, 47.840179443359375, 13.973419189453125, 3.29913330078125, 9.465744018554688, 8.488407135009766, 10.955333709716797, 38.60466003417969, 19.857070922851562, 24.546661376953125, 40.277748107910156, 17.63463592529297, 5.237672805786133, 17.767562866210938, 29.609848022460938, 28.390525817871094, -0.27339935302734375, -3.9454803466796875, -3.7067642211914062, 14.389053344726562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000074.npy"}
{"epoch": 0.10866372980910426, "step": 75, "batch_size": 64, "mean": 20.787948608398438, "std": 20.932628631591797, "min": -13.215667724609375, "p10": -3.324973678588864, "median": 16.652326583862305, "p90": 50.60549697875977, "max": 76.41387939453125, "pos_frac": 0.859375, "sample": [-0.4115104675292969, 25.999771118164062, 17.97930908203125, 12.857124328613281, 19.495384216308594, 12.533645629882812, 11.853424072265625, 7.237815856933594, 48.057403564453125, 69.02960205078125, 32.603309631347656, 15.56419563293457, -7.852149963378906, 29.15594482421875, 76.41387939453125, 15.684864044189453, 43.620452880859375, 64.76445007324219, 9.614585876464844, -0.23632240295410156, 46.480010986328125, 25.805923461914062, 16.493289947509766, 10.597160339355469, 16.84869384765625, 49.77727508544922, 50.96044921875, 3.498769760131836, 9.01919174194336, 32.71341323852539, 19.489608764648438, 17.637493133544922, 4.28997802734375, 68.68070983886719, -5.5286407470703125, -4.573600769042969, 26.73199462890625, -13.215667724609375, 21.453582763671875, 21.16565704345703, 6.720729827880859, 13.551998138427734, -8.908592224121094, 8.344741821289062, -7.6166534423828125, 29.789718627929688, 3.359668731689453, 13.021408081054688, 39.8707275390625, 27.9613037109375, 4.178365707397461, -7.622161865234375, 8.763715744018555, 6.724525451660156, 16.390533447265625, 5.619834899902344, 8.055450439453125, 24.448211669921875, 17.127262115478516, 17.758316040039062, 39.22954559326172, 60.420501708984375, 64.13778686523438, 16.811363220214844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000075.npy"}
{"epoch": 0.11013215859030837, "step": 76, "batch_size": 64, "mean": 13.58674430847168, "std": 16.404356002807617, "min": -44.9515380859375, "p10": -2.9950830459594715, "median": 13.054203033447266, "p90": 32.43123245239258, "max": 68.86264038085938, "pos_frac": 0.859375, "sample": [14.602792739868164, 6.106052398681641, 2.5854568481445312, 26.39923095703125, 3.95648193359375, 19.159332275390625, 10.814155578613281, 28.815139770507812, 32.83317184448242, 15.07818603515625, 16.591705322265625, 21.520713806152344, 9.911500930786133, 32.61249542236328, 4.005439758300781, 5.426816940307617, 8.659879684448242, 68.86264038085938, 34.777687072753906, 6.201194763183594, -6.48974609375, 12.880172729492188, 0.07292556762695312, 13.233039855957031, -15.8878173828125, 6.476354598999023, 5.831846237182617, 16.584060668945312, 6.558784484863281, 7.507778167724609, 31.931854248046875, 17.07209014892578, 23.170082092285156, 9.738121032714844, 30.117172241210938, 4.326265335083008, -3.4901046752929688, 27.53350830078125, 2.6166114807128906, -10.342903137207031, 7.550773620605469, 2.4628639221191406, -6.654840469360352, -7.4836578369140625, 17.50591278076172, 32.00828552246094, 51.94598388671875, 29.0888671875, 9.26511001586914, 6.1472625732421875, 13.549514770507812, -1.8400325775146484, 16.737747192382812, 18.069297790527344, 16.226806640625, 13.131118774414062, -0.5865840911865234, 15.65777587890625, 37.31074523925781, 24.979721069335938, 23.205642700195312, 34.88737487792969, 12.977287292480469, -44.9515380859375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000076.npy"}
{"epoch": 0.11160058737151249, "step": 77, "batch_size": 64, "mean": 21.639652252197266, "std": 22.75507164001465, "min": -40.983154296875, "p10": 0.12116661071777418, "median": 22.11380958557129, "p90": 56.663919067382814, "max": 78.500732421875, "pos_frac": 0.890625, "sample": [17.353130340576172, 44.718544006347656, 32.12386703491211, 43.300899505615234, 43.95689392089844, -0.1857147216796875, 62.117156982421875, 22.637584686279297, 9.514898300170898, 10.156696319580078, 6.798200607299805, 35.47210693359375, 40.252479553222656, 5.908992767333984, 4.0486297607421875, 26.640708923339844, -40.983154296875, 22.541732788085938, 36.940330505371094, 32.112876892089844, 14.224617004394531, -2.5943031311035156, 10.356361389160156, 0.8372230529785156, 3.4451828002929688, 56.781005859375, 3.1422576904296875, 27.260108947753906, 11.829437255859375, -5.559848785400391, 33.37666702270508, 29.32904052734375, 13.446395874023438, 65.76319885253906, 35.13606262207031, 9.921310424804688, 59.50677490234375, 78.500732421875, 21.640464782714844, 13.163482666015625, -34.88685607910156, 8.324440002441406, -16.801910400390625, 4.034915924072266, 12.298185348510742, 26.949012756347656, 32.51886749267578, 25.450674057006836, 27.176254272460938, 26.077613830566406, 7.709590911865234, -7.05029296875, 5.026885986328125, 27.177581787109375, 27.945350646972656, 23.529342651367188, 4.442474365234375, 58.096771240234375, 69.68217468261719, 12.130245208740234, 24.49746322631836, 56.390716552734375, 21.68588638305664, 7.599334716796875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000077.npy"}
{"epoch": 0.1130690161527166, "step": 78, "batch_size": 64, "mean": 16.735197067260742, "std": 19.410526275634766, "min": -21.710525512695312, "p10": -3.250660705566406, "median": 16.428627967834473, "p90": 43.44438629150391, "max": 86.70114135742188, "pos_frac": 0.796875, "sample": [1.0793685913085938, 1.5461235046386719, 42.28826904296875, 17.305294036865234, -2.2484817504882812, 48.29432678222656, 17.37929916381836, 86.70114135742188, 27.06060791015625, 23.83945083618164, 21.15478515625, 9.682437896728516, -4.677864074707031, 15.047815322875977, 21.994876861572266, 20.818866729736328, -0.672515869140625, 9.714410781860352, -5.804634094238281, 3.2798233032226562, 6.887590408325195, 18.390012741088867, 26.939891815185547, 24.574417114257812, 19.989459991455078, 14.41025161743164, 18.117252349853516, 18.237579345703125, 24.314498901367188, 5.784685134887695, 43.93986511230469, 16.433820724487305, 5.541603088378906, -10.996822357177734, 26.28301239013672, -12.05108642578125, 55.61004638671875, 21.92919921875, 45.089881896972656, -1.5728683471679688, 37.95490646362305, 3.5044097900390625, 51.18888854980469, 33.028316497802734, 30.607276916503906, -0.6106758117675781, 11.123931884765625, 3.9232616424560547, 13.035778045654297, 7.756572723388672, 5.672054290771484, -1.3418750762939453, 16.42343521118164, -3.0093841552734375, 32.60516357421875, -21.710525512695312, 52.0166015625, 4.502498626708984, -18.20848846435547, 27.458328247070312, -3.35406494140625, 36.30937194824219, 2.7727890014648438, 27.76830291748047], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000078.npy"}
{"epoch": 0.1145374449339207, "step": 79, "batch_size": 64, "mean": 18.436786651611328, "std": 23.784814834594727, "min": -23.134109497070312, "p10": -6.65648784637451, "median": 13.724443435668945, "p90": 51.36130981445314, "max": 78.62054443359375, "pos_frac": 0.75, "sample": [-7.1918487548828125, 55.658180236816406, 5.933309555053711, 73.04817199707031, 4.749702453613281, 0.015926361083984375, -2.5492992401123047, -10.781932830810547, 43.82838439941406, 37.72550964355469, 19.470291137695312, -23.134109497070312, 25.669517517089844, 72.87307739257812, 9.688446044921875, 26.57391357421875, 75.1595458984375, 53.31561279296875, 17.691532135009766, -10.379928588867188, 78.62054443359375, 17.6728515625, -3.7796859741210938, -12.44818115234375, 29.2569580078125, 1.0214710235595703, -0.236572265625, 17.189857482910156, 14.41751480102539, 65.58553314208984, 3.5654067993164062, 8.95994758605957, 13.0313720703125, 28.607276916503906, -12.691390991210938, 10.47195053100586, -0.8083305358886719, 7.712501525878906, 43.752685546875, 41.95912170410156, 20.962810516357422, 33.1353759765625, 34.55525207519531, 20.11669158935547, 28.576385498046875, -5.407312393188477, 2.8071517944335938, 32.74058532714844, 38.060020446777344, 10.751398086547852, 15.694671630859375, -5.359609603881836, -1.0808639526367188, 41.51103210449219, -1.3628997802734375, 21.825149536132812, 5.234172821044922, 7.2256622314453125, 0.5069122314453125, -9.190536499023438, 7.6695098876953125, 46.80126953125, -5.047222137451172, 20.003860473632812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000079.npy"}
{"epoch": 0.11600587371512482, "step": 80, "batch_size": 64, "mean": 17.029022216796875, "std": 24.560388565063477, "min": -25.211647033691406, "p10": -8.02565689086914, "median": 12.095054626464844, "p90": 44.13153839111329, "max": 98.56991577148438, "pos_frac": 0.78125, "sample": [26.565101623535156, 8.406143188476562, -8.264762878417969, 25.38005828857422, 6.247051239013672, 3.6760501861572266, -3.164745330810547, -9.778984069824219, 34.43681335449219, 2.7762603759765625, 4.164190292358398, 16.635562896728516, -16.01270294189453, -21.290348052978516, -7.467742919921875, 24.951507568359375, 98.56991577148438, 40.22502899169922, 84.89495849609375, 47.43011474609375, 25.852218627929688, 6.618377685546875, 42.86646270751953, 44.67371368408203, 35.221099853515625, 15.955615997314453, -1.203521728515625, 34.07310485839844, 15.501087188720703, 9.93377685546875, 3.62060546875, -17.917205810546875, 4.987010955810547, -6.2967071533203125, 24.751113891601562, -25.211647033691406, 60.7935791015625, 32.20613098144531, -8.503013610839844, 91.14114379882812, 10.805826187133789, 2.9339141845703125, 2.7752418518066406, 2.6940155029296875, 47.168601989746094, 13.220039367675781, 24.13861846923828, 31.415542602539062, 17.142377853393555, 10.970069885253906, -4.022489547729492, -3.4167041778564453, 1.0811042785644531, 6.8479766845703125, 24.01151466369629, 23.224502563476562, 6.01666259765625, 3.24066162109375, 33.65765380859375, 25.211410522460938, 14.210594177246094, 39.566375732421875, -5.407085418701172, 14.928504943847656], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000080.npy"}
{"epoch": 0.11747430249632893, "step": 81, "batch_size": 64, "mean": 23.939817428588867, "std": 28.985416412353516, "min": -31.064056396484375, "p10": -9.685163116455078, "median": 19.814626693725586, "p90": 60.79706726074219, "max": 122.4580078125, "pos_frac": 0.84375, "sample": [45.565452575683594, 35.34136962890625, -31.064056396484375, 40.7431640625, 26.045772552490234, 61.89225769042969, 19.806896209716797, -11.745159149169922, 7.262327194213867, 34.230560302734375, 33.03266143798828, -24.192214965820312, 2.7302474975585938, 26.11724853515625, 89.72085571289062, 10.96904182434082, 25.533355712890625, 38.40625, 60.866729736328125, -15.360023498535156, 1.2731895446777344, 10.715347290039062, 21.883987426757812, 73.66301727294922, 25.212196350097656, 12.337884902954102, 18.274017333984375, 8.198688507080078, 7.44743537902832, -8.727645874023438, 27.505523681640625, 7.319854736328125, 5.4612274169921875, 13.784500122070312, 30.547744750976562, 122.4580078125, 43.253299713134766, 19.822357177734375, 3.4040374755859375, 60.634521484375, 42.4339599609375, 50.17523193359375, 15.566474914550781, -0.5782241821289062, 4.756011962890625, -2.749664306640625, -19.13833236694336, 15.058189392089844, 14.379730224609375, 30.68731689453125, 16.011661529541016, 13.350988388061523, 22.11139678955078, -14.967849731445312, 39.42113494873047, 46.94004821777344, 112.235107421875, 63.02409362792969, 10.080368041992188, 36.526527404785156, 3.1557846069335938, 28.96039581298828, -10.095527648925781, 34.43159484863281], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000081.npy"}
{"epoch": 0.11894273127753303, "step": 82, "batch_size": 64, "mean": 18.85877227783203, "std": 31.259567260742188, "min": -62.10081481933594, "p10": -17.53169708251953, "median": 16.99924659729004, "p90": 53.041306304931645, "max": 125.13247680664062, "pos_frac": 0.796875, "sample": [-8.988414764404297, 33.73414611816406, -20.748207092285156, 19.125415802001953, 21.49062728881836, 19.067140579223633, -28.241851806640625, 16.17275619506836, 26.36225128173828, 59.703041076660156, 71.30548095703125, 13.421161651611328, -14.078571319580078, 9.8033447265625, 15.929471969604492, 109.27845764160156, 13.800033569335938, -55.04957580566406, -10.44195556640625, 34.567535400390625, 1.8660964965820312, 33.018829345703125, 52.832740783691406, 4.478570938110352, -19.011608123779297, -6.308265686035156, 3.9053115844726562, 25.052764892578125, 21.756004333496094, 11.938949584960938, 22.14958953857422, 37.209930419921875, 39.13105773925781, -1.0240135192871094, 77.23043823242188, 19.518726348876953, 53.13069152832031, 17.079692840576172, 9.540491104125977, 6.175506591796875, -2.2139816284179688, 30.313278198242188, 62.63053894042969, -19.84795379638672, 30.373085021972656, 22.71106719970703, 7.53758430480957, 48.43484115600586, 35.76398468017578, -22.918411254882812, 10.993354797363281, 7.538970947265625, -62.10081481933594, 7.650367736816406, 33.52918243408203, 22.606218338012695, 5.1302642822265625, 27.310531616210938, 16.642181396484375, 125.13247680664062, 16.918800354003906, 18.0801944732666, 0.22231292724609375, 48.63947296142578], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000082.npy"}
{"epoch": 0.12041116005873716, "step": 83, "batch_size": 64, "mean": 22.43198013305664, "std": 27.50795555114746, "min": -27.584938049316406, "p10": -9.611540222167967, "median": 18.21010971069336, "p90": 64.96145324707034, "max": 89.71903991699219, "pos_frac": 0.8125, "sample": [22.081298828125, 26.521549224853516, 18.093769073486328, 72.49101257324219, 46.72053527832031, 13.174041748046875, -7.917701721191406, 74.31576538085938, 59.052703857421875, 4.618080139160156, 12.8245849609375, 18.27642822265625, 2.27557373046875, -5.933433532714844, 19.010005950927734, 72.75704956054688, -13.4810791015625, 89.71903991699219, 11.441726684570312, 75.68798065185547, 41.06636047363281, 12.272247314453125, 52.28440856933594, -20.934722900390625, -27.584938049316406, -10.337471008300781, 57.82135009765625, -21.42926788330078, 17.52961540222168, 67.4937744140625, 12.176521301269531, 15.496353149414062, 4.901811599731445, 0.04401397705078125, 27.04840087890625, 12.658805847167969, 27.127567291259766, -0.33562660217285156, 41.91755676269531, 14.674453735351562, 21.556304931640625, 19.383914947509766, 46.048431396484375, 75.0535888671875, 20.009475708007812, 49.91505432128906, 5.181652069091797, -19.67198944091797, -2.5931129455566406, 2.7700729370117188, -23.853729248046875, 54.62559509277344, 8.805950164794922, 43.85060119628906, 26.556625366210938, 25.01055145263672, -7.582183837890625, 8.365341186523438, 19.46674346923828, 34.75897216796875, 43.89991760253906, 18.14379119873047, 28.20587921142578, 2.1191864013671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000083.npy"}
{"epoch": 0.12187958883994127, "step": 84, "batch_size": 64, "mean": 19.303150177001953, "std": 21.120254516601562, "min": -44.72161865234375, "p10": -1.6684860229492184, "median": 16.75400161743164, "p90": 43.871881103515626, "max": 85.84275817871094, "pos_frac": 0.84375, "sample": [7.290641784667969, 16.834182739257812, 3.1826019287109375, 5.701175689697266, 47.5953369140625, 4.307891845703125, 5.832607269287109, 21.353683471679688, 16.645544052124023, 9.209548950195312, 30.253582000732422, 36.959510803222656, -20.936859130859375, 15.673881530761719, 22.776714324951172, 30.798194885253906, 10.295896530151367, 85.84275817871094, 30.924060821533203, 2.149383544921875, 7.413583755493164, 30.740875244140625, 7.165290832519531, 6.645303726196289, -1.403106689453125, 43.6605224609375, 26.85301971435547, 32.18034362792969, -44.72161865234375, -1.7781009674072266, 55.23816680908203, 2.6114959716796875, 38.058990478515625, 16.67382049560547, 19.25958251953125, 8.291587829589844, 49.65875244140625, 43.96246337890625, 30.994110107421875, 20.511322021484375, 2.7872161865234375, 32.38268280029297, -4.870080947875977, 16.633203506469727, 29.6016845703125, 29.731365203857422, -1.4127178192138672, -3.8293991088867188, 35.487945556640625, 33.97479248046875, -2.390045166015625, 43.58366394042969, 56.53251647949219, 19.357521057128906, 62.59686279296875, 15.718597412109375, 31.602935791015625, 8.422248840332031, -18.22577667236328, 31.855247497558594, 12.935497283935547, 20.03264617919922, 8.371532440185547, -0.18527603149414062], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000084.npy"}
{"epoch": 0.12334801762114538, "step": 85, "batch_size": 64, "mean": 19.99712371826172, "std": 25.37059211730957, "min": -31.917755126953125, "p10": -5.897201538085937, "median": 18.423781394958496, "p90": 59.954100799560564, "max": 72.04931640625, "pos_frac": 0.703125, "sample": [43.58631134033203, 14.876091003417969, 30.220726013183594, 3.679594039916992, -3.2835540771484375, 23.034278869628906, 27.77001953125, 1.6869926452636719, 63.59326934814453, 9.40643310546875, -2.6487255096435547, 6.305747985839844, 66.21168518066406, 27.145965576171875, 25.008283615112305, 22.07098388671875, -4.805267333984375, -18.95288848876953, 19.4742431640625, -0.9422683715820312, -31.917755126953125, 72.04931640625, 19.438186645507812, -2.3614864349365234, -4.3385162353515625, 47.41826629638672, 7.83843994140625, -9.59283447265625, 19.407272338867188, 11.01947021484375, 48.868988037109375, -6.36517333984375, -17.60308837890625, -1.3123245239257812, 14.3544921875, 5.566936492919922, 66.42066955566406, 17.95199203491211, 39.714752197265625, -0.4016704559326172, -3.727293014526367, 71.17288208007812, -2.0257511138916016, 45.958770751953125, 61.78228759765625, 15.674365997314453, 55.688331604003906, 68.06364440917969, 10.307220458984375, 5.936454772949219, 33.41797637939453, 51.831626892089844, -0.3130950927734375, -7.278755187988281, 19.36187744140625, 18.895570755004883, 54.664794921875, 23.04619598388672, 30.400283813476562, 38.15266036987305, -23.101425170898438, 19.513431549072266, -0.9192161560058594, 43.719261169433594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000085.npy"}
{"epoch": 0.12481644640234948, "step": 86, "batch_size": 64, "mean": 21.28069305419922, "std": 38.672786712646484, "min": -58.550323486328125, "p10": -15.568009948730467, "median": 15.483247756958008, "p90": 67.09755249023439, "max": 149.52044677734375, "pos_frac": 0.734375, "sample": [-13.040870666503906, 18.685009002685547, 70.55610656738281, -20.852684020996094, 36.51448059082031, 32.181854248046875, 27.672758102416992, -1.2337646484375, 107.26388549804688, 12.281486511230469, 6.713701248168945, -6.112846374511719, 0.8997116088867188, 22.161376953125, -9.818191528320312, 20.547195434570312, 50.92681884765625, 19.702415466308594, -3.3024024963378906, -52.11030578613281, 61.226165771484375, 22.69040298461914, 32.60869598388672, 10.892961502075195, 10.19390869140625, -16.65106964111328, -5.4423675537109375, 69.61386108398438, -4.525257110595703, 7.858417510986328, 38.82181167602539, 19.616825103759766, 8.134477615356445, -58.550323486328125, 47.89263916015625, 55.01361083984375, 28.251022338867188, -41.28626251220703, 76.60334777832031, 3.506959915161133, 149.52044677734375, 19.45376205444336, 26.190750122070312, 11.900684356689453, -4.865756988525391, 132.59445190429688, 6.936634063720703, 124.0521240234375, 48.3260498046875, -7.130828857421875, -30.26538848876953, 40.36674499511719, 9.420171737670898, 35.19964599609375, 30.06512451171875, 36.33390426635742, 37.003631591796875, 6.282989501953125, 1.1146621704101562, 4.220848083496094, -4.627330780029297, -19.056869506835938, 3.03985595703125, 19.782543182373047], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000086.npy"}
{"epoch": 0.1262848751835536, "step": 87, "batch_size": 64, "mean": 24.9149227142334, "std": 34.261924743652344, "min": -77.7696533203125, "p10": -8.444403839111326, "median": 20.52509880065918, "p90": 67.63536071777345, "max": 132.17149353027344, "pos_frac": 0.8125, "sample": [23.565698623657227, 36.65016174316406, 0.14996910095214844, 9.884275436401367, 38.61578369140625, -9.192359924316406, 31.849899291992188, -0.6370372772216797, 1.5443801879882812, -5.241546630859375, -25.215103149414062, 48.616607666015625, -6.603515625, 69.68875122070312, 31.872528076171875, 60.48698425292969, 54.49800109863281, 10.86079216003418, 93.64425659179688, 62.565826416015625, 21.573989868164062, 20.324935913085938, 7.239522933959961, 42.29735565185547, 41.10957336425781, 2.2386398315429688, 40.56067657470703, 132.17149353027344, 13.121536254882812, 2.1009063720703125, 13.648698806762695, 77.05262756347656, 61.31988525390625, 10.60806655883789, 58.139678955078125, 26.937850952148438, 14.625925064086914, 40.33127975463867, 20.603466033935547, 76.89476013183594, 0.6951122283935547, 82.6627197265625, 15.082496643066406, 20.446731567382812, -77.7696533203125, 11.314310073852539, 42.272727966308594, -34.74432373046875, 40.157135009765625, 73.39189147949219, -17.13823699951172, -16.790531158447266, 9.05875015258789, 20.205490112304688, -6.6991729736328125, 62.8441162109375, 31.241165161132812, 35.67842102050781, -36.913604736328125, 24.72661590576172, 16.022079467773438, 1.178232192993164, 51.91880798339844, -4.791484832763672], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000087.npy"}
{"epoch": 0.1277533039647577, "step": 88, "batch_size": 64, "mean": 18.689455032348633, "std": 32.75210189819336, "min": -61.47918701171875, "p10": -9.333811950683593, "median": 12.56061840057373, "p90": 61.178782653808625, "max": 133.5142822265625, "pos_frac": 0.765625, "sample": [11.20025634765625, 11.214401245117188, 65.6025390625, 66.48738098144531, 34.63438415527344, 49.68873596191406, 3.8285255432128906, 64.78688049316406, -9.174331665039062, 13.927753448486328, 34.27323913574219, 6.752902984619141, 1.4869384765625, 48.5965576171875, -23.650436401367188, 31.98822021484375, -20.25110626220703, 16.708251953125, -49.58601379394531, -61.47918701171875, -5.363897323608398, 75.77236938476562, 39.58367156982422, -3.654876708984375, 43.42633819580078, 5.243783950805664, 30.142959594726562, -9.40216064453125, 22.627107620239258, 21.333511352539062, 41.18525695800781, 7.347938537597656, -1.4374008178710938, 27.1243896484375, 1.2177352905273438, 95.33798217773438, 9.399986267089844, 52.7598876953125, 4.897121429443359, 133.5142822265625, 5.953254699707031, 45.16217803955078, 3.674501419067383, 14.142057418823242, 12.800033569335938, 24.849159240722656, 0.4898338317871094, -2.7411117553710938, 34.52748107910156, 81.32391357421875, 33.90593719482422, 15.719169616699219, -0.5601615905761719, 10.066795349121094, -52.07469177246094, -5.1102294921875, 10.997337341308594, -1.3998851776123047, 12.321203231811523, 15.144853591918945, 19.38541603088379, -21.073654174804688, 8.98260498046875, 41.54725646972656], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000088.npy"}
{"epoch": 0.12922173274596183, "step": 89, "batch_size": 64, "mean": 20.956254959106445, "std": 28.136306762695312, "min": -66.62799072265625, "p10": -2.4235883712768556, "median": 17.284506797790527, "p90": 49.96467666625978, "max": 125.47996520996094, "pos_frac": 0.859375, "sample": [6.3280487060546875, 1.0220718383789062, 5.743888854980469, 21.300737380981445, -10.480777740478516, 51.514923095703125, 41.99810791015625, 16.155208587646484, -9.417724609375, 59.075355529785156, 59.003578186035156, 23.298160552978516, 22.76122283935547, 46.347434997558594, 37.35474395751953, 30.323795318603516, 13.101692199707031, 10.079322814941406, -2.505308151245117, 19.829689025878906, 15.536849975585938, 2.6303768157958984, 16.786273956298828, 11.776935577392578, -10.8453369140625, 1.5013351440429688, 1.1857681274414062, 10.105363845825195, 76.38552856445312, 19.02679443359375, 28.66905975341797, 89.32162475585938, 17.146358489990234, 45.29930114746094, 21.982135772705078, 2.125823974609375, 13.185604095458984, 125.47996520996094, 4.211662292480469, -2.4240169525146484, 29.446338653564453, -0.3922843933105469, 21.855379104614258, 30.91442108154297, 15.357873916625977, 5.639595031738281, 9.943296432495117, 5.365337371826172, 4.1546783447265625, 36.56982421875, 21.459503173828125, 42.158233642578125, -2.422588348388672, 17.856369018554688, 43.922691345214844, -66.62799072265625, -26.068161010742188, 27.12055206298828, 17.42265510559082, 23.12359619140625, 26.2857666015625, 24.365158081054688, 94.6729736328125, 7.155452728271484], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000089.npy"}
{"epoch": 0.13069016152716592, "step": 90, "batch_size": 64, "mean": 28.446239471435547, "std": 42.291595458984375, "min": -52.76481628417969, "p10": -11.296680831909173, "median": 18.81656265258789, "p90": 93.53346557617189, "max": 149.42572021484375, "pos_frac": 0.796875, "sample": [67.45944213867188, -44.53777313232422, 30.398757934570312, 95.54312133789062, 22.666942596435547, 36.6239013671875, 0.7497787475585938, -4.177330017089844, 56.828521728515625, -1.5008583068847656, 44.75215148925781, 18.74267578125, -1.0529670715332031, 12.768280029296875, 39.70063781738281, 87.62631225585938, -5.0755462646484375, 149.42572021484375, 54.40379333496094, 1.6611709594726562, 2.7640857696533203, 10.5372314453125, 18.89044952392578, -13.923171997070312, 117.99444580078125, 16.27285385131836, 25.8994140625, 124.00640869140625, 4.574068069458008, 23.62533950805664, 139.8665771484375, 22.1153564453125, -27.484100341796875, 11.926910400390625, -4.273292541503906, -52.76481628417969, 28.054569244384766, 8.53289794921875, 62.97943115234375, 6.737556457519531, 14.939964294433594, 12.973739624023438, -23.33868408203125, -5.168201446533203, 20.230667114257812, 88.84426879882812, 100.76434326171875, -36.15470886230469, -31.350372314453125, 11.603267669677734, 36.74633026123047, 1.622335433959961, 19.236927032470703, 36.962303161621094, 67.93453979492188, 34.22661590576172, 10.507123947143555, 10.093759536743164, 61.690155029296875, 17.404605865478516, 24.43425750732422, 17.228181838989258, 98.7386474609375, 41.05036163330078], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000090.npy"}
{"epoch": 0.13215859030837004, "step": 91, "batch_size": 64, "mean": 23.69327735900879, "std": 34.77570343017578, "min": -35.03614044189453, "p10": -19.32709655761719, "median": 21.616052627563477, "p90": 71.01205749511723, "max": 123.0406494140625, "pos_frac": 0.75, "sample": [18.795562744140625, 52.9696044921875, -34.18998718261719, 8.165470123291016, 75.71546936035156, 0.4600410461425781, -19.356979370117188, 44.928829193115234, 4.431007385253906, 20.774459838867188, 43.48329162597656, 40.922607421875, 43.84917449951172, 13.169708251953125, -35.03614044189453, 111.31690979003906, -11.20379638671875, 2.5139312744140625, 8.28851318359375, 85.79562377929688, 10.670433044433594, 33.35733413696289, 123.0406494140625, -17.693077087402344, 27.98717498779297, 57.337913513183594, -19.89104461669922, 3.391681671142578, 23.261642456054688, -5.2640838623046875, -19.054611206054688, 22.91388702392578, 60.03742980957031, -21.524124145507812, 18.964797973632812, 22.112838745117188, 57.020103454589844, -13.979347229003906, 21.119266510009766, -4.96306037902832, 9.247001647949219, 25.8323974609375, 59.745262145996094, 2.9596710205078125, -19.257369995117188, 37.788665771484375, 23.49462890625, 80.58761596679688, 25.680274963378906, 42.69672393798828, 11.929634094238281, -25.165817260742188, 0.7832317352294922, 29.298011779785156, -2.4432849884033203, 43.00844192504883, -0.3823566436767578, 85.82627868652344, 45.99614715576172, 25.20197296142578, 53.526641845703125, -26.053810119628906, 48.14637756347656, 83.2844009399414], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000091.npy"}
{"epoch": 0.13362701908957417, "step": 92, "batch_size": 64, "mean": 23.916975021362305, "std": 34.08684539794922, "min": -48.350372314453125, "p10": -11.600814819335938, "median": 17.891562461853027, "p90": 58.85210418701172, "max": 129.84942626953125, "pos_frac": 0.734375, "sample": [-4.368526458740234, 6.006690979003906, 7.664970397949219, 48.89757537841797, -7.875457763671875, 6.376548767089844, -48.350372314453125, -16.97730255126953, 48.06587219238281, 24.607749938964844, 66.54486083984375, 0.43599510192871094, 45.467254638671875, 2.5558929443359375, -6.7514190673828125, 113.96224975585938, 55.742279052734375, 129.84942626953125, -42.432342529296875, -5.6716461181640625, 25.36520767211914, -11.031723022460938, 27.440689086914062, -7.386882781982422, -17.476531982421875, 58.828765869140625, -12.289360046386719, 73.713134765625, 32.31031036376953, 5.281284332275391, 73.24602508544922, -2.6557369232177734, 34.2286376953125, 0.47715187072753906, 51.94898986816406, 8.140213012695312, 32.59214782714844, 106.77400207519531, 10.318859100341797, 58.055084228515625, 45.318756103515625, 58.86210632324219, -1.4521255493164062, -5.4894256591796875, 9.40646743774414, 24.506298065185547, 4.9704437255859375, 41.490570068359375, 35.87797546386719, 11.060283660888672, -0.2778778076171875, -13.07425308227539, 52.60500717163086, 17.02406120300293, 18.759063720703125, 12.935401916503906, 36.34226989746094, 51.80523681640625, 43.749046325683594, 24.886390686035156, 11.815114974975586, -11.844711303710938, 56.10560607910156, 33.67414855957031], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000092.npy"}
{"epoch": 0.13509544787077826, "step": 93, "batch_size": 64, "mean": 24.08247184753418, "std": 30.333179473876953, "min": -37.411460876464844, "p10": -4.440135955810546, "median": 16.90390396118164, "p90": 58.654702758789064, "max": 154.47140502929688, "pos_frac": 0.828125, "sample": [21.50139617919922, 9.109781265258789, 26.957183837890625, 64.15678405761719, 58.810585021972656, 55.61045837402344, 5.051544189453125, 22.971328735351562, 45.97980499267578, -15.639717102050781, 75.67776489257812, 19.75357437133789, 53.20030212402344, 51.09343719482422, 154.47140502929688, 12.844470977783203, -3.2919387817382812, 5.2224578857421875, 2.2720870971679688, 89.82959747314453, -8.20086669921875, 13.387975692749023, 22.15530776977539, 0.0395355224609375, 33.786468505859375, 9.769866943359375, 33.202903747558594, 5.784843444824219, -1.3072681427001953, 12.92706298828125, -0.7286014556884766, 13.91571044921875, 9.675228118896484, 19.525588989257812, 34.732383728027344, 53.39360046386719, 66.16458892822266, -23.687759399414062, -4.932220458984375, 49.54461669921875, 44.36573028564453, 35.110992431640625, 37.74950408935547, 6.825996398925781, 10.060869216918945, 9.39984130859375, 29.6651611328125, 2.7882633209228516, 58.290977478027344, 0.7880973815917969, -7.290641784667969, 60.181365966796875, 6.313270568847656, 17.410751342773438, 6.288808822631836, 5.519496917724609, -18.068687438964844, 31.87915802001953, 47.28034210205078, 36.059669494628906, 47.42406463623047, 16.397056579589844, -37.411460876464844, -0.4816741943359375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000093.npy"}
{"epoch": 0.13656387665198239, "step": 94, "batch_size": 64, "mean": 27.756542205810547, "std": 31.737600326538086, "min": -22.423233032226562, "p10": -7.44887409210205, "median": 23.498767852783203, "p90": 68.7798698425293, "max": 114.40194702148438, "pos_frac": 0.78125, "sample": [26.299720764160156, -17.335647583007812, 44.88787841796875, 19.618595123291016, 5.900962829589844, -15.166358947753906, 10.53839111328125, 61.72385787963867, 5.481359481811523, -20.5679931640625, 23.65279197692871, 12.479736328125, 25.580089569091797, 6.675483703613281, 11.490642547607422, 0.4120635986328125, 60.121734619140625, 12.141426086425781, -6.888843536376953, 114.40194702148438, 5.899507522583008, 30.218265533447266, 32.122459411621094, 54.85877990722656, 82.22085571289062, 28.641357421875, 13.567298889160156, 29.671859741210938, 21.392139434814453, -3.7394561767578125, -4.348932266235352, 17.3449649810791, 37.254791259765625, 60.68927001953125, 44.36367416381836, 19.38128662109375, 37.039886474609375, 68.89429473876953, 34.041080474853516, 23.344743728637695, 5.353448867797852, 45.097572326660156, -2.7208480834960938, -17.682159423828125, 68.51287841796875, 76.76385498046875, -7.434576034545898, 61.144683837890625, 113.87734985351562, -7.4550018310546875, -22.423233032226562, 89.66377258300781, 32.20344543457031, 57.59271240234375, -3.1083908081054688, 18.714447021484375, -14.108978271484375, -4.310821533203125, 36.9984130859375, 15.581924438476562, 36.54197311401367, 57.520721435546875, 73.0885009765625, 52.700958251953125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000094.npy"}
{"epoch": 0.13803230543318648, "step": 95, "batch_size": 64, "mean": 22.366260528564453, "std": 30.126068115234375, "min": -75.17951965332031, "p10": -7.306961441040038, "median": 16.124473571777344, "p90": 63.93638916015627, "max": 93.72421264648438, "pos_frac": 0.828125, "sample": [15.842391967773438, 43.591094970703125, 6.196922302246094, 23.648422241210938, 20.982484817504883, 70.08477783203125, 8.861541748046875, 6.282987594604492, -7.738956451416016, 15.680694580078125, 19.72760581970215, 4.558647155761719, 35.5521354675293, 83.02757263183594, 51.09663391113281, -6.298973083496094, 38.79944610595703, 3.096059799194336, 15.598167419433594, 47.872955322265625, 22.405242919921875, 39.9849853515625, 19.364290237426758, 15.25117301940918, 10.192468643188477, -10.940460205078125, 10.799705505371094, 54.604644775390625, 21.31732940673828, 29.657997131347656, -10.291786193847656, -7.759544372558594, 55.6986083984375, 93.72421264648438, 77.38180541992188, 58.8302001953125, 36.566871643066406, 1.6422061920166016, 0.22722625732421875, 66.124755859375, 47.138648986816406, 82.85479736328125, 71.89625549316406, 4.37554931640625, 14.859622955322266, 8.011262893676758, -1.5055160522460938, 32.873802185058594, 29.839279174804688, -75.17951965332031, 2.402069091796875, 2.2480716705322266, -31.36151123046875, 47.61968994140625, 44.423545837402344, 12.399898529052734, 41.74922180175781, 11.781335830688477, 17.488508224487305, 16.40655517578125, -5.275327682495117, -2.7906131744384766, 15.611465454101562, -37.67102813720703], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000095.npy"}
{"epoch": 0.1395007342143906, "step": 96, "batch_size": 64, "mean": 30.528703689575195, "std": 36.399131774902344, "min": -55.93699645996094, "p10": -0.27762069702148406, "median": 20.483097076416016, "p90": 78.62533493041994, "max": 157.32679748535156, "pos_frac": 0.890625, "sample": [62.63426208496094, 17.910179138183594, 37.96913146972656, 31.99405288696289, 20.246673583984375, 66.1182861328125, 6.869659423828125, 2.620197296142578, 1.279958724975586, 24.28173828125, 112.84825134277344, 16.891746520996094, 10.506401062011719, -4.755825042724609, 2.760486602783203, 22.259265899658203, 29.39000701904297, 44.88671875, 87.3360824584961, 91.31251525878906, 10.127571105957031, 44.889747619628906, 37.182395935058594, 157.32679748535156, 74.54979705810547, 14.522796630859375, 20.719520568847656, 10.08868408203125, 16.269989013671875, 15.375408172607422, 9.945304870605469, 138.32913208007812, 33.88970947265625, 29.141525268554688, 5.665992736816406, 90.86038208007812, 60.32502746582031, 6.252239227294922, 36.88227462768555, 1.5894889831542969, 48.344764709472656, 26.904088973999023, 23.39605712890625, -10.208732604980469, 15.601385116577148, 0.016332626342773438, -1.6788864135742188, 41.189186096191406, -0.8115825653076172, 25.834022521972656, 12.60650634765625, 59.97925567626953, -14.92072868347168, 7.5145263671875, 67.20518493652344, 11.220962524414062, 17.890653610229492, 64.77427673339844, 80.37199401855469, 0.8981857299804688, -0.40360069274902344, 32.892601013183594, -55.93699645996094, 1.8639297485351562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000096.npy"}
{"epoch": 0.14096916299559473, "step": 97, "batch_size": 64, "mean": 23.692989349365234, "std": 28.4271183013916, "min": -63.22728729248047, "p10": -8.356702232360838, "median": 22.95645046234131, "p90": 58.69494476318361, "max": 108.00653076171875, "pos_frac": 0.78125, "sample": [17.98486328125, 52.515594482421875, 2.108154296875, 1.785125732421875, -5.482624053955078, -3.3047828674316406, 16.59494400024414, 28.37688636779785, 2.482189178466797, 48.11504364013672, 24.218914031982422, 42.96101379394531, -0.8130836486816406, 18.862533569335938, 80.08091735839844, 62.60432434082031, -21.463546752929688, 35.383636474609375, 0.7913436889648438, 13.964761734008789, 27.459396362304688, 6.640911102294922, 16.570205688476562, -19.16510009765625, 72.475341796875, 20.293466567993164, 25.87097930908203, 30.93065643310547, 42.206748962402344, 15.762760162353516, 27.26744842529297, -6.138078689575195, 16.932174682617188, -12.917642593383789, 50.05113220214844, 43.12211608886719, 50.576148986816406, 30.015789031982422, -63.22728729248047, -9.535354614257812, 49.76128387451172, 33.369014739990234, -10.015426635742188, 8.054248809814453, 56.396820068359375, 35.42518997192383, 1.6580429077148438, 21.693986892700195, 24.92189598083496, 17.815940856933594, -4.388525009155273, 59.67985534667969, -9.307540893554688, 51.32411575317383, -1.5983772277832031, 65.79121398925781, -1.3001327514648438, 44.458526611328125, 48.88849639892578, 62.92503356933594, 34.236324310302734, 9.343425750732422, 108.00653076171875, 26.253265380859375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000097.npy"}
{"epoch": 0.14243759177679882, "step": 98, "batch_size": 64, "mean": 27.052547454833984, "std": 37.74333190917969, "min": -105.12384033203125, "p10": -12.940454101562496, "median": 29.854888916015625, "p90": 69.12065505981445, "max": 129.50482177734375, "pos_frac": 0.796875, "sample": [41.59722900390625, 6.279232025146484, 66.90748596191406, 44.18684387207031, 6.612285614013672, 20.201196670532227, 24.113113403320312, 27.517356872558594, 30.31964111328125, 7.2934722900390625, 31.025049209594727, -8.2142333984375, 21.395889282226562, 23.948265075683594, 68.62229919433594, 50.58483123779297, -14.294429779052734, 2.7386474609375, -0.4688568115234375, -23.80901336669922, 41.24064254760742, 36.69017028808594, -30.817291259765625, -38.604248046875, 57.89458465576172, 55.17329788208008, 31.657363891601562, 69.33423614501953, 71.96891784667969, 26.402423858642578, 40.588531494140625, -3.277923583984375, 62.52214813232422, 129.50482177734375, -105.12384033203125, 85.39375305175781, 61.382110595703125, 30.105194091796875, 47.2147216796875, 18.229820251464844, -9.781177520751953, 40.391624450683594, -56.47711181640625, 26.236900329589844, 37.52679443359375, 3.8256187438964844, 40.39826202392578, 84.06596374511719, 60.958091735839844, 37.782108306884766, 16.06006622314453, 10.943336486816406, 75.97642517089844, 106.74252319335938, 34.603172302246094, 33.283870697021484, 29.604583740234375, 14.527624130249023, 3.5599822998046875, -9.667789459228516, 56.329002380371094, -3.8161163330078125, 2.8136234283447266, -18.560081481933594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000098.npy"}
{"epoch": 0.14390602055800295, "step": 99, "batch_size": 64, "mean": 33.028785705566406, "std": 41.816871643066406, "min": -33.319236755371094, "p10": -7.8356597900390605, "median": 22.774709701538086, "p90": 83.66069793701172, "max": 181.33424377441406, "pos_frac": 0.828125, "sample": [-1.0140132904052734, -8.558372497558594, 6.233543395996094, 22.347339630126953, 2.1162185668945312, 44.081687927246094, 26.32398223876953, 30.710792541503906, 108.00515747070312, 20.348657608032227, 25.1361083984375, 124.147216796875, 7.872554779052734, -31.192138671875, 83.83100891113281, -4.628334045410156, 23.20207977294922, 37.18138122558594, -1.0018196105957031, 17.633132934570312, 74.62347412109375, 15.046592712402344, 11.808080673217773, 161.5338134765625, 9.778533935546875, 60.2659912109375, 2.8067398071289062, -25.66516876220703, 11.327005386352539, -33.319236755371094, -9.757038116455078, 55.60853576660156, 39.151283264160156, 27.272441864013672, 66.34552001953125, 181.33424377441406, 75.25930786132812, 39.15116500854492, 16.41846466064453, 19.93909454345703, 5.322029113769531, 30.37042999267578, 38.68597412109375, 83.2633056640625, 101.89434051513672, 6.551322937011719, 12.040351867675781, 68.8671875, -6.149330139160156, 0.7846088409423828, 32.87696838378906, 89.63050842285156, 30.551280975341797, 11.040950775146484, 57.8956298828125, -9.909896850585938, 42.13106918334961, 15.273355484008789, 67.15077209472656, 52.17334747314453, 21.705184936523438, 47.72346496582031, 3.8909950256347656, -21.626731872558594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000099.npy"}
{"epoch": 0.14537444933920704, "step": 100, "batch_size": 64, "mean": 19.44617462158203, "std": 36.13125228881836, "min": -76.05886840820312, "p10": -19.21170864105224, "median": 19.294036865234375, "p90": 64.92655410766602, "max": 144.59164428710938, "pos_frac": 0.765625, "sample": [55.62482452392578, 31.3592586517334, -6.5916595458984375, 15.272666931152344, 23.70923614501953, -9.141815185546875, 33.104522705078125, 26.416015625, 8.105182647705078, 41.93656921386719, 12.013137817382812, 0.9915409088134766, 38.604705810546875, 24.277366638183594, 4.673521041870117, 79.29156494140625, 19.782684326171875, -44.50439453125, -31.219650268554688, -53.249542236328125, -13.783548355102539, -9.44384765625, 1.3062000274658203, -63.21205520629883, 56.02936935424805, 18.805389404296875, 50.26068115234375, 24.4674072265625, 1.3186416625976562, 6.45268440246582, 43.86363220214844, -1.7361602783203125, 45.74549102783203, 6.495006561279297, 68.26593017578125, 30.65807342529297, 8.38644027709961, 11.634880065917969, 63.02397918701172, 25.535369873046875, 40.7296028137207, 15.802139282226562, 7.366943359375, 0.3052082061767578, 31.064437866210938, 35.32615661621094, -8.72589111328125, -26.338226318359375, -6.766265869140625, 67.82353210449219, 38.086036682128906, 69.15308380126953, 24.637163162231445, -76.05886840820312, 144.59164428710938, -11.424737930297852, 16.826675415039062, 80.77865600585938, -21.538063049316406, 65.741943359375, 47.16442108154297, 7.025535583496094, 38.11589050292969, 20.338851928710938], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000100.npy"}
{"epoch": 0.14684287812041116, "step": 101, "batch_size": 64, "mean": 23.464595794677734, "std": 40.59137725830078, "min": -53.915374755859375, "p10": -20.500362014770506, "median": 18.83548641204834, "p90": 85.56019897460945, "max": 132.16836547851562, "pos_frac": 0.6875, "sample": [97.71882629394531, 132.16836547851562, 30.46002197265625, 30.10295867919922, -14.548866271972656, 103.48058319091797, 108.48028564453125, 10.261268615722656, 92.95466613769531, 108.17146301269531, -34.469268798828125, 28.730560302734375, -39.33081817626953, 48.49542999267578, 13.904895782470703, -9.539237976074219, -21.850692749023438, 13.998285293579102, 62.93419647216797, 28.683212280273438, -15.988502502441406, 25.8289794921875, 52.22938537597656, 63.931434631347656, 19.39257049560547, 18.211814880371094, 9.695621490478516, 65.0372085571289, 117.74591064453125, 25.979736328125, -7.334625244140625, -6.529270172119141, -14.049715042114258, 51.64076614379883, 17.781721115112305, 37.95045471191406, 68.30644226074219, 44.83732604980469, -6.6248931884765625, 6.534832000732422, -2.490528106689453, -17.349590301513672, 20.436519622802734, -7.387016296386719, 0.91680908203125, -14.499441146850586, 34.818260192871094, 34.27617263793945, 8.776651382446289, 25.874174118041992, -53.915374755859375, 35.467369079589844, 1.140411376953125, -30.684856414794922, 25.40073013305664, 18.27840232849121, -15.294197082519531, 39.32136535644531, 7.00115966796875, -1.8675537109375, 51.2833251953125, -27.554973602294922, 39.73224639892578, -35.3292236328125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000101.npy"}
{"epoch": 0.14831130690161526, "step": 102, "batch_size": 64, "mean": 17.24917221069336, "std": 38.234046936035156, "min": -78.19869995117188, "p10": -22.227838897705077, "median": 10.931655883789062, "p90": 55.86031265258789, "max": 137.2321319580078, "pos_frac": 0.671875, "sample": [2.9127883911132812, 38.39869689941406, 57.958900451660156, 11.738594055175781, 3.3182373046875, 6.309955596923828, -14.717575073242188, -44.7216796875, 22.29570960998535, -75.70700073242188, 28.344589233398438, 88.255126953125, -11.367523193359375, 38.03895568847656, 28.54513931274414, -8.905786514282227, 10.124717712402344, 34.241058349609375, 50.118560791015625, 0.9579048156738281, -15.558746337890625, -4.020906448364258, 47.07037353515625, -4.605531692504883, 56.20458984375, 23.146839141845703, 37.963409423828125, 137.2321319580078, -3.0690383911132812, 0.26556396484375, 110.35382843017578, -0.27492713928222656, 37.914127349853516, 42.129112243652344, 3.7407588958740234, -16.187759399414062, 32.009361267089844, -78.19869995117188, 0.710357666015625, -27.71731185913086, 0.9548320770263672, -43.6142578125, -23.009925842285156, -3.545928955078125, 55.05699920654297, 30.86761474609375, 42.0975341796875, 80.27685546875, 43.406036376953125, 8.0606689453125, 68.17565155029297, -26.84893035888672, -6.3740997314453125, 6.720073699951172, 51.56658172607422, -20.402969360351562, 39.43817138671875, 34.385353088378906, 25.631410598754883, 36.8988037109375, -12.645370483398438, -16.655487060546875, 41.656471252441406, 46.604068756103516], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000102.npy"}
{"epoch": 0.14977973568281938, "step": 103, "batch_size": 64, "mean": 34.63999938964844, "std": 42.25282287597656, "min": -35.9150390625, "p10": -12.196329498291016, "median": 27.72077178955078, "p90": 81.8691879272461, "max": 180.104736328125, "pos_frac": 0.796875, "sample": [-5.777065277099609, 7.181648254394531, 13.624885559082031, 33.04163360595703, -20.772232055664062, -25.509613037109375, 33.807003021240234, 39.60657501220703, 13.741764068603516, 19.25491714477539, 28.119277954101562, 44.30938720703125, 17.18041229248047, 19.14752197265625, -2.602447509765625, 66.37712860107422, 111.94976806640625, 36.00360870361328, 50.63917541503906, 60.41938018798828, -34.04533386230469, -12.142608642578125, 49.675872802734375, 27.322265625, -35.9150390625, 75.06281280517578, 35.413551330566406, -16.353973388671875, 13.275703430175781, 19.611618041992188, 38.60790252685547, 165.32693481445312, 26.33777618408203, 18.909114837646484, 71.37600708007812, 67.28461456298828, -6.247657775878906, -12.219352722167969, 22.219385147094727, 17.17466926574707, -4.4727325439453125, 39.812294006347656, 63.386268615722656, -29.065216064453125, 22.888458251953125, 98.76508331298828, 80.27200317382812, 27.106094360351562, 180.104736328125, 99.54283142089844, 103.99724578857422, 82.55369567871094, 12.461761474609375, 39.97101593017578, 67.3497085571289, -1.3462772369384766, 31.86677360534668, 3.297121047973633, 4.281642913818359, 51.33033752441406, 79.3364486694336, 16.337818145751953, 31.81640625, 44.949371337890625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000103.npy"}
{"epoch": 0.1512481644640235, "step": 104, "batch_size": 64, "mean": 32.84601593017578, "std": 47.76563262939453, "min": -63.74711608886719, "p10": -12.902635383605956, "median": 20.49639892578125, "p90": 91.85311889648438, "max": 187.19122314453125, "pos_frac": 0.75, "sample": [91.96124267578125, -7.3973236083984375, 14.116201400756836, 14.850379943847656, 62.11224365234375, -6.663307189941406, -28.200599670410156, 3.1291465759277344, -2.8052520751953125, 38.629356384277344, -13.359933853149414, 34.62425231933594, 148.27978515625, 51.605682373046875, 86.24939727783203, -2.254405975341797, 91.600830078125, 32.67962646484375, -17.28813934326172, 114.69918823242188, 38.36814880371094, 4.874473571777344, 20.206493377685547, 7.2968902587890625, 14.775278091430664, 2.4318294525146484, 187.19122314453125, -45.178016662597656, 57.46509552001953, 133.0963897705078, 138.13751220703125, 30.7012939453125, 13.707096099853516, 65.82481384277344, 12.246116638183594, 14.57958984375, -11.83560562133789, -22.184036254882812, 112.08547973632812, 15.580772399902344, 50.23255157470703, -6.46124267578125, 69.29631042480469, -3.3030166625976562, 83.21868896484375, 38.570068359375, 31.35004425048828, 49.7529411315918, 36.77809143066406, 32.19206237792969, 4.474020004272461, 89.20706176757812, 6.883705139160156, 27.334815979003906, 63.56095886230469, 67.69869232177734, -7.19605827331543, 27.659151077270508, -0.38265228271484375, 20.786304473876953, -22.214134216308594, 1.9693164825439453, -63.74711608886719, 8.545129776000977], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000104.npy"}
{"epoch": 0.1527165932452276, "step": 105, "batch_size": 64, "mean": 46.669586181640625, "std": 41.742515563964844, "min": -66.13911437988281, "p10": 3.07851028442383, "median": 43.37665557861328, "p90": 100.00765075683597, "max": 177.32069396972656, "pos_frac": 0.90625, "sample": [47.997833251953125, 30.132797241210938, 45.71294403076172, 29.062026977539062, 42.17499542236328, 43.960243225097656, 72.25877380371094, 110.07648468017578, 9.709711074829102, -66.13911437988281, -6.403266906738281, 80.63636779785156, 82.43008422851562, 13.512723922729492, 34.517364501953125, 49.2097282409668, -33.44430160522461, 2.4027938842773438, -6.3295135498046875, 109.27584075927734, 31.5765380859375, 108.63221740722656, 71.65806579589844, 20.469268798828125, 11.507827758789062, 80.98731231689453, 62.88751220703125, 42.793067932128906, 9.158447265625, 38.69567108154297, 65.50550842285156, 163.52154541015625, 20.9398193359375, 51.640594482421875, 17.926891326904297, 18.715538024902344, 70.59086608886719, 90.66096496582031, 47.13493347167969, 16.55984115600586, 50.72142028808594, 93.07833862304688, 36.47943115234375, 38.45948028564453, 42.455352783203125, 52.84589385986328, -1.1635818481445312, 65.45333862304688, 68.732177734375, 102.97735595703125, 37.50567626953125, 23.208282470703125, -17.645919799804688, 9.329986572265625, 65.648193359375, 60.722930908203125, 56.114463806152344, 111.45645141601562, 60.7598876953125, 177.32069396972656, 25.64038848876953, 4.655181884765625, 12.63581657409668, 77.14509582519531], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000105.npy"}
{"epoch": 0.15418502202643172, "step": 106, "batch_size": 64, "mean": 29.177989959716797, "std": 35.9396858215332, "min": -54.515655517578125, "p10": -13.09050216674804, "median": 29.76407241821289, "p90": 77.08616638183595, "max": 132.12200927734375, "pos_frac": 0.84375, "sample": [28.333816528320312, 5.298978805541992, 7.855474472045898, -3.949655532836914, 18.683645248413086, 1.7486820220947266, 48.28679656982422, -5.991279602050781, 31.13965606689453, 68.73355102539062, 36.94367980957031, 36.34602737426758, 2.592723846435547, 74.65898132324219, 26.347457885742188, 91.92787170410156, 7.576147079467773, 12.32777214050293, 78.12638854980469, 1.2058353424072266, 63.93072509765625, 41.47476577758789, 1.8907394409179688, 84.7901611328125, 23.62143898010254, 43.47118377685547, 8.301353454589844, -16.133026123046875, 53.527381896972656, -36.01582336425781, 48.20677947998047, 14.268226623535156, 41.25740432739258, 40.65030288696289, -2.1031246185302734, 5.658485412597656, 22.749744415283203, 2.6961593627929688, 91.2049560546875, 63.267982482910156, 62.763946533203125, 24.470111846923828, -54.515655517578125, 4.6068267822265625, 132.12200927734375, 36.701202392578125, 29.462432861328125, 34.285072326660156, 33.679412841796875, 8.162742614746094, 0.8919143676757812, 58.377586364746094, 105.85311126708984, 34.234275817871094, 55.7421875, 56.62298583984375, -23.82404327392578, 33.046424865722656, -21.466094970703125, 30.065711975097656, 78.88642883300781, -41.18811798095703, -23.6265869140625, 47.12920379638672], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000106.npy"}
{"epoch": 0.15565345080763582, "step": 107, "batch_size": 64, "mean": 32.95298767089844, "std": 42.5029411315918, "min": -87.79942321777344, "p10": -12.749419403076162, "median": 22.43692111968994, "p90": 85.40412139892578, "max": 139.2478485107422, "pos_frac": 0.84375, "sample": [84.65200805664062, 110.81082153320312, 47.959228515625, 68.2773208618164, 24.70848846435547, 30.71575164794922, 51.53076171875, -3.343505859375, -87.79942321777344, 9.246286392211914, 16.09282875061035, 77.62564086914062, -37.60950469970703, 27.63709259033203, 10.363468170166016, 7.206901550292969, 14.749935150146484, 85.72645568847656, 1.4093132019042969, -27.400367736816406, 107.21187591552734, 66.65178680419922, 77.94534301757812, 5.548913955688477, -16.78052520751953, 3.4954280853271484, -1.4174957275390625, 4.88885498046875, 139.2478485107422, 74.64423370361328, 19.558856964111328, 17.986343383789062, 97.0347671508789, 45.28839111328125, 65.18499755859375, 87.83209991455078, 16.622678756713867, 28.500350952148438, 16.565717697143555, -16.805397033691406, 60.86631774902344, -45.609710693359375, 65.17938995361328, 76.51632690429688, 11.624443054199219, 82.99141693115234, 1.5728530883789062, 79.21630859375, 49.99751281738281, 54.40257263183594, 22.835838317871094, 22.03800392150879, 3.1946868896484375, -26.36705780029297, -0.9222412109375, 5.065521240234375, 12.804622650146484, 73.72416687011719, 8.541946411132812, 24.513437271118164, 64.90467834472656, 98.37020874023438, 2.3292388916015625, 9.436233520507812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000107.npy"}
{"epoch": 0.15712187958883994, "step": 108, "batch_size": 64, "mean": 39.5773811340332, "std": 57.56081771850586, "min": -60.89237976074219, "p10": -23.192282295227052, "median": 28.63060760498047, "p90": 122.62135925292971, "max": 211.9638671875, "pos_frac": 0.765625, "sample": [35.890281677246094, 135.84657287597656, 39.80132293701172, -46.732696533203125, -60.89237976074219, 104.615478515625, 30.32975959777832, 8.786338806152344, 211.9638671875, -0.0262451171875, -11.211471557617188, 127.1253890991211, 8.370048522949219, 5.983757019042969, -5.761161804199219, 72.90009307861328, -6.989307403564453, 94.66535949707031, 162.22886657714844, 106.98783111572266, 116.25045776367188, 81.86106872558594, 1.9106788635253906, -22.950101852416992, 48.19976043701172, 10.656045913696289, 28.264793395996094, 15.323688507080078, 76.26629638671875, 5.4116973876953125, 25.498870849609375, 0.5279769897460938, 72.38568878173828, -23.29607391357422, 172.43048095703125, -39.350341796875, 79.94041442871094, -29.924823760986328, -14.504501342773438, 4.171419143676758, 26.59729766845703, 8.3770751953125, 50.76457977294922, 154.52346801757812, 34.487770080566406, 11.165420532226562, 86.96468353271484, 72.12446594238281, 30.281587600708008, -51.18193054199219, 13.8056640625, 51.194313049316406, 125.35174560546875, 62.695579528808594, -1.6973457336425781, -1.177469253540039, 38.866859436035156, 16.879844665527344, 73.49093627929688, -51.29838943481445, 28.996421813964844, 40.93905258178711, 65.0436782836914, 22.801860809326172], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000108.npy"}
{"epoch": 0.15859030837004406, "step": 109, "batch_size": 64, "mean": 42.396968841552734, "std": 50.21357727050781, "min": -82.48141479492188, "p10": -10.112741088867187, "median": 36.48912811279297, "p90": 107.72051086425783, "max": 185.36685180664062, "pos_frac": 0.84375, "sample": [49.73982238769531, 7.662517547607422, 133.9944610595703, 84.78936004638672, 29.638961791992188, 53.33445358276367, -56.57292175292969, 11.397331237792969, 17.65654754638672, 50.32032012939453, 130.50161743164062, 42.254390716552734, 72.39042663574219, 52.84025573730469, 56.01789855957031, 5.716697692871094, 21.817684173583984, 36.912384033203125, -13.151344299316406, -5.329870223999023, -6.024326324462891, 185.36685180664062, 0.5523681640625, -10.136444091796875, 25.709375381469727, 4.2260589599609375, 125.26225280761719, 30.14253807067871, 31.7155818939209, -38.02022171020508, 62.83497619628906, 68.38771057128906, 9.35952377319336, 69.42684936523438, 56.36529541015625, 38.75226593017578, 99.02095031738281, 70.7745132446289, 109.20662689208984, 104.2529067993164, 0.6278152465820312, 12.903770446777344, -82.48141479492188, 29.53931427001953, 1.685943603515625, 73.63410186767578, 33.90830993652344, 34.22933578491211, 19.36232566833496, 83.81454467773438, -48.713775634765625, 33.819366455078125, 37.18034744262695, 112.57252502441406, 128.2690887451172, 36.06587219238281, 86.66997528076172, 37.06731414794922, 88.94097900390625, 102.5125961303711, -10.05743408203125, -23.125396728515625, 3.605653762817383, 102.26810455322266], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000109.npy"}
{"epoch": 0.16005873715124816, "step": 110, "batch_size": 64, "mean": 38.10965347290039, "std": 53.41533660888672, "min": -98.44601440429688, "p10": -14.255920791625975, "median": 33.34476375579834, "p90": 109.09635009765627, "max": 181.96685791015625, "pos_frac": 0.828125, "sample": [118.79679870605469, 54.00776672363281, 22.05678939819336, 52.17692565917969, 57.72008514404297, 37.84477996826172, 49.22813034057617, 1.8115730285644531, -98.44601440429688, 45.66754913330078, 65.267333984375, 16.6484375, -63.420379638671875, 18.84832000732422, 11.882030487060547, 1.3430843353271484, 30.9155216217041, 168.63339233398438, 104.50033569335938, 27.21902084350586, 4.751070022583008, -8.07449722290039, 181.96685791015625, 134.91436767578125, 101.42393493652344, 77.08926391601562, 17.83898162841797, -16.965858459472656, -13.059383392333984, 60.81034851074219, -43.10081481933594, -14.768722534179688, 10.251167297363281, 19.06092071533203, 6.22178840637207, 156.83346557617188, 54.15715026855469, 82.99578857421875, 111.06607055664062, 4.454063415527344, 75.24497985839844, -8.11667537689209, 38.259788513183594, 35.77400588989258, 14.811161041259766, 11.348182678222656, 81.70269012451172, -57.90740966796875, 74.97447204589844, -24.836837768554688, 38.24296569824219, 1.9804039001464844, 55.31768035888672, 36.023616790771484, 51.32475662231445, 43.757659912109375, 20.38763427734375, 19.73340606689453, 17.949268341064453, 44.794029235839844, 78.47108459472656, 2.3059024810791016, -4.5089569091796875, 141.41650390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000110.npy"}
{"epoch": 0.16152716593245228, "step": 111, "batch_size": 64, "mean": 43.028907775878906, "std": 59.06182098388672, "min": -83.12123107910156, "p10": -32.704910278320305, "median": 33.973236083984375, "p90": 118.10268096923829, "max": 193.85946655273438, "pos_frac": 0.765625, "sample": [193.85946655273438, -50.95867919921875, -82.60763549804688, 119.39306640625, -6.674468994140625, 53.79307556152344, 13.598251342773438, 34.618629455566406, 27.268253326416016, 143.74171447753906, 31.636802673339844, 78.13841247558594, 42.78340148925781, 55.85508728027344, 28.639068603515625, 111.54067993164062, 110.84896087646484, -36.317901611328125, 97.99092102050781, 8.033332824707031, 78.82600402832031, -26.718231201171875, -21.790767669677734, -10.378387451171875, 66.05754089355469, 115.09178161621094, -5.137077331542969, 112.24479675292969, -35.2706298828125, 11.876213073730469, -22.681106567382812, 33.327842712402344, 133.65106201171875, 23.352584838867188, 147.95693969726562, 55.968414306640625, 31.334510803222656, 54.721309661865234, 67.48958587646484, 130.78741455078125, 23.6241455078125, -43.511749267578125, 18.796234130859375, 28.146991729736328, 133.37908935546875, 68.56964111328125, 27.688072204589844, 58.75508117675781, 110.55485534667969, 2.583293914794922, 110.68173217773438, 38.057373046875, 94.44799041748047, 70.33352661132812, 50.96924591064453, 13.991155624389648, -83.12123107910156, 31.85832977294922, 28.388154983520508, 41.08148193359375, -5.646486282348633, 93.55326843261719, -64.26165771484375, -10.958984375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000111.npy"}
{"epoch": 0.16299559471365638, "step": 112, "batch_size": 64, "mean": 27.551637649536133, "std": 40.65027618408203, "min": -59.55384063720703, "p10": -24.845450592041015, "median": 25.718181610107422, "p90": 90.95178375244143, "max": 121.75540161132812, "pos_frac": 0.734375, "sample": [44.35562515258789, 121.75540161132812, 85.92146301269531, -6.449529647827148, -42.64882278442383, 33.28072738647461, 36.93528747558594, 30.004608154296875, 28.91241455078125, -5.42510986328125, 100.1826171875, 78.9637451171875, -0.4905681610107422, -2.4456634521484375, 93.10763549804688, 61.63078689575195, 39.5184211730957, 55.79173278808594, 39.94593811035156, 37.27174377441406, 45.35704040527344, 30.212486267089844, 70.16229248046875, -15.476264953613281, 25.660614013671875, 22.89554786682129, 30.54810333251953, 32.546810150146484, -24.85052490234375, 17.133846282958984, 4.500848770141602, -33.11055374145508, 31.342002868652344, 107.5206527709961, 111.57233428955078, -32.01908874511719, 40.87413787841797, 15.199033737182617, 69.51713562011719, -0.08094215393066406, -59.55384063720703, 96.9474868774414, 25.337867736816406, 12.412986755371094, -11.818359375, 21.584178924560547, 60.752899169921875, 0.32857513427734375, 16.20734405517578, 96.97181701660156, 25.77574920654297, 8.791706085205078, 44.83274841308594, -41.37152099609375, 12.02284049987793, -24.83361053466797, -26.079132080078125, -11.149070739746094, 74.70852661132812, 1.1684703826904297, 24.27389907836914, 5.399051666259766, -8.61175537109375, 39.58197784423828], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000112.npy"}
{"epoch": 0.1644640234948605, "step": 113, "batch_size": 64, "mean": 29.678359985351562, "std": 38.32505798339844, "min": -40.037452697753906, "p10": -16.35086936950683, "median": 30.94820213317871, "p90": 82.64872970581055, "max": 116.96271514892578, "pos_frac": 0.734375, "sample": [-32.046051025390625, 75.64707946777344, -18.104646682739258, 88.4384765625, 9.422271728515625, 39.49354553222656, 14.363594055175781, 36.810829162597656, 3.340677261352539, -9.439472198486328, 8.641647338867188, 84.86726379394531, 54.18737030029297, 109.760009765625, 38.195587158203125, -19.52924156188965, -6.560821533203125, -8.39581298828125, 8.460205078125, 65.2711181640625, -20.095657348632812, 48.23345184326172, 80.55577087402344, -19.562225341796875, 54.490882873535156, -6.8732147216796875, 3.1419200897216797, 116.96271514892578, 48.211814880371094, -40.037452697753906, 18.04076385498047, 75.73783111572266, 35.264774322509766, -10.013809204101562, 36.76087951660156, 53.95564270019531, 13.148078918457031, 82.72904205322266, 19.863197326660156, -12.18701171875, 52.92973327636719, 70.65509796142578, -12.258722305297852, 1.681753158569336, -24.720947265625, 45.37203598022461, 36.00810241699219, 40.5916748046875, -10.241798400878906, 15.836084365844727, 113.12911987304688, -2.531597137451172, 33.649959564208984, 32.35027313232422, 98.91472625732422, 82.46133422851562, 33.57713317871094, 63.31867218017578, -6.707651138305664, 9.832290649414062, 13.532196044921875, 15.56515884399414, 29.546131134033203, 45.77317810058594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000113.npy"}
{"epoch": 0.16593245227606462, "step": 114, "batch_size": 64, "mean": 47.11562728881836, "std": 64.65396118164062, "min": -38.721038818359375, "p10": -27.5888069152832, "median": 42.66447830200195, "p90": 112.73421859741215, "max": 349.51531982421875, "pos_frac": 0.765625, "sample": [179.415283203125, 90.8254165649414, 53.17125701904297, 47.78173065185547, 27.723220825195312, 118.21235656738281, 19.709291458129883, 47.74687194824219, -38.721038818359375, 67.06266784667969, -31.819189071655273, 71.60092163085938, 44.293617248535156, 22.66767120361328, -37.06433868408203, 84.27818298339844, 14.506694793701172, -32.989410400390625, 34.36304473876953, 122.04960632324219, 13.170408248901367, 54.82575225830078, -5.770294189453125, 117.62726593017578, 41.355918884277344, 60.109031677246094, 34.157554626464844, 24.515247344970703, 37.3140869140625, 60.92751693725586, 65.15396881103516, -5.7647552490234375, -8.01263427734375, 56.387237548828125, -24.808326721191406, 15.954122543334961, 49.52124786376953, 68.99282836914062, 41.99174499511719, 101.31710815429688, 59.14255142211914, 87.517333984375, 5.46989631652832, 50.397071838378906, 38.68025588989258, -13.796733856201172, 232.8156280517578, 6.970340728759766, 43.33721160888672, 142.15042114257812, -9.826147079467773, 73.49369049072266, -38.56678009033203, -28.780441284179688, 349.51531982421875, 56.72518539428711, -4.001804351806641, 87.67301940917969, 34.94920349121094, -33.4997444152832, 27.106407165527344, 87.08561706542969, -18.181812286376953, 75.24545288085938], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000114.npy"}
{"epoch": 0.16740088105726872, "step": 115, "batch_size": 64, "mean": 32.45915222167969, "std": 65.51622009277344, "min": -150.52056884765625, "p10": -28.02845687866211, "median": 18.580130577087402, "p90": 126.72046890258795, "max": 237.09579467773438, "pos_frac": 0.765625, "sample": [0.5534286499023438, -2.549348831176758, 133.6741485595703, 58.16386413574219, -51.86225891113281, 0.151641845703125, -45.72901916503906, 10.26732063293457, 12.495941162109375, 16.79682159423828, 25.733734130859375, -6.3173370361328125, 43.23801040649414, 56.93202209472656, 10.252153396606445, 169.5082550048828, 22.00697898864746, -8.313095092773438, 237.09579467773438, -11.721685409545898, 74.83747100830078, 6.712799072265625, 139.34210205078125, 70.13774108886719, 170.6221923828125, -28.200668334960938, 24.076126098632812, 25.339557647705078, 8.19087028503418, 96.82200622558594, 172.50294494628906, 99.62857055664062, 1.7606964111328125, -122.06375885009766, 9.607669830322266, 35.83708190917969, 58.85516357421875, 16.55758285522461, 8.88553237915039, 44.152137756347656, 28.21026611328125, 50.177886962890625, 26.22821044921875, 30.544723510742188, 141.08761596679688, -27.719146728515625, 22.183860778808594, 17.40192413330078, -8.442436218261719, 12.122642517089844, 19.758337020874023, 78.20057678222656, 2.8744125366210938, 13.682933807373047, -20.679731369018555, 110.4952163696289, 63.9361572265625, -16.663217544555664, 63.19441223144531, -150.52056884765625, 108.24755859375, -47.090003967285156, 4.334047317504883, -28.16101837158203], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000115.npy"}
{"epoch": 0.16886930983847284, "step": 116, "batch_size": 64, "mean": 36.026241302490234, "std": 46.1640739440918, "min": -35.67174530029297, "p10": -17.20233097076416, "median": 31.354171752929688, "p90": 90.32418365478516, "max": 213.25132751464844, "pos_frac": 0.78125, "sample": [7.405750274658203, 4.290943145751953, -32.46671676635742, -28.9151611328125, 25.13132667541504, 44.589378356933594, 64.09385681152344, 23.603057861328125, 48.040283203125, 57.88653564453125, 31.0062255859375, -9.604509353637695, 42.70646667480469, 27.905738830566406, 9.560707092285156, -6.809074401855469, 54.13903045654297, -35.67174530029297, 82.89317321777344, 88.92771911621094, 39.628379821777344, -18.27349853515625, 22.35833740234375, 17.44029998779297, 49.20008850097656, 25.214920043945312, 77.38116455078125, 106.47050476074219, 31.702117919921875, -33.18916320800781, 122.31582641601562, 36.51044845581055, 22.56676483154297, 42.68121337890625, -14.702939987182617, -28.083641052246094, 4.904094696044922, 9.349105834960938, -4.435670852661133, 102.2060546875, 5.612979888916016, 132.34214782714844, 32.15808868408203, 88.05615234375, 79.5701904296875, 33.27157974243164, 54.574668884277344, -31.1102294921875, -8.204017639160156, 81.9198989868164, 23.981727600097656, -6.439517974853516, 213.25132751464844, 66.35057830810547, 11.27874755859375, 51.75257873535156, 67.43138122558594, 63.749568939208984, -10.863000869750977, 15.152328491210938, 2.0116195678710938, 40.408782958984375, 98.54171752929688, 90.92266845703125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000116.npy"}
{"epoch": 0.17033773861967694, "step": 117, "batch_size": 64, "mean": 29.55158042907715, "std": 50.94339370727539, "min": -101.67010498046875, "p10": -25.60205726623534, "median": 24.399970054626465, "p90": 86.30660095214846, "max": 187.22433471679688, "pos_frac": 0.765625, "sample": [13.747371673583984, 53.88427734375, 49.6153564453125, -17.263652801513672, 22.18414878845215, -3.0039520263671875, 18.92445182800293, 38.74274444580078, 21.25829315185547, 40.31407928466797, 44.57522201538086, 67.59558868408203, 131.69915771484375, 115.17707824707031, 17.70844268798828, 45.298545837402344, -54.63029479980469, 88.72685241699219, 5.2038116455078125, -93.81594848632812, -29.1756591796875, 7.945281982421875, 16.321426391601562, 62.41831588745117, 44.537479400634766, -14.450576782226562, -33.68800354003906, 3.8016319274902344, 26.61579132080078, -15.109657287597656, 39.44793701171875, 121.8936767578125, -2.7556228637695312, 2.77252197265625, 148.4891357421875, 13.957782745361328, 187.22433471679688, 16.695465087890625, 29.775794982910156, 65.82483673095703, 15.19131851196289, 8.228702545166016, 8.004718780517578, -9.002769470214844, -101.67010498046875, 30.50591278076172, 80.65934753417969, 32.506622314453125, 91.23204040527344, 38.84950256347656, -44.7867431640625, -9.860811233520508, 7.504817962646484, 54.306034088134766, 79.76966857910156, 80.14031982421875, 6.035404205322266, 49.79119110107422, 73.51203155517578, 28.348663330078125, 43.365203857421875, -2.3403854370117188, -37.40196228027344, 69.92896270751953], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000117.npy"}
{"epoch": 0.17180616740088106, "step": 118, "batch_size": 64, "mean": 36.0295524597168, "std": 46.62529373168945, "min": -53.89006042480469, "p10": -12.394589996337887, "median": 25.037193298339844, "p90": 109.47839202880863, "max": 159.47869873046875, "pos_frac": 0.78125, "sample": [-7.101814270019531, 21.105628967285156, 0.9999103546142578, -24.400497436523438, 20.836761474609375, 15.48282241821289, 60.77959060668945, 43.369850158691406, 48.04393768310547, 112.76264953613281, 87.88957214355469, 24.057373046875, 125.02108764648438, 30.9725341796875, -8.314918518066406, 54.68058776855469, -19.28488540649414, -14.143020629882812, 33.14950942993164, 159.47869873046875, 119.94805908203125, -8.27096176147461, -53.89006042480469, 3.6134490966796875, -39.41954040527344, 19.834993362426758, 1.8468971252441406, 33.406639099121094, 13.350017547607422, 14.210464477539062, 11.124137878417969, 75.57044982910156, 2.809661865234375, 76.40672302246094, 25.476478576660156, 48.09759521484375, 22.319082260131836, 40.135345458984375, 149.5401611328125, -1.72369384765625, 30.917495727539062, -18.08100128173828, -1.9489364624023438, 41.372894287109375, 142.09698486328125, 86.35236358642578, 114.45079040527344, 43.364013671875, 79.68575286865234, 13.672266006469727, -0.5820693969726562, 68.00204467773438, 101.81512451171875, 70.40313720703125, 50.80232238769531, 25.148849487304688, 34.44078826904297, 18.047630310058594, 0.02327728271484375, 24.925537109375, -20.74951934814453, 83.23782348632812, 3.5963897705078125, -4.873905181884766], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000118.npy"}
{"epoch": 0.17327459618208516, "step": 119, "batch_size": 64, "mean": 35.49628829956055, "std": 56.15363693237305, "min": -68.633056640625, "p10": -29.637727355957033, "median": 35.86178779602051, "p90": 97.37463531494143, "max": 207.49203491210938, "pos_frac": 0.6875, "sample": [14.928213119506836, 57.98687744140625, 38.475616455078125, 145.95355224609375, 39.5460205078125, -29.650466918945312, -7.697395324707031, 50.191619873046875, -4.914112091064453, 51.13041687011719, -9.908126831054688, -10.929595947265625, 207.49203491210938, 50.175270080566406, 18.744415283203125, 120.23672485351562, 22.114551544189453, 12.31534194946289, 183.16746520996094, 47.73817443847656, 90.66961669921875, 74.54600524902344, 75.2961196899414, -5.164989471435547, 30.312721252441406, 35.72380065917969, -34.5145263671875, 17.709152221679688, 55.82926940917969, 56.705780029296875, 0.9171295166015625, -3.6955413818359375, 100.24821472167969, 36.97450637817383, 170.86105346679688, -3.45623779296875, 66.5234146118164, 77.38566589355469, -1.8945941925048828, 27.20209503173828, -54.928497314453125, 43.86842346191406, -59.6055908203125, 130.57586669921875, 50.87896728515625, 65.40614318847656, -22.935867309570312, -30.730010986328125, 84.73477935791016, 35.99977493286133, -29.608001708984375, -68.633056640625, -1.4701004028320312, 86.35940551757812, 17.31732177734375, 57.84659194946289, 51.01929473876953, 9.994115829467773, 64.83729553222656, -6.792533874511719, -42.476463317871094, 3.3066253662109375, 42.38572692871094, -20.862892150878906], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000119.npy"}
{"epoch": 0.17474302496328928, "step": 120, "batch_size": 64, "mean": 49.14187240600586, "std": 59.97622299194336, "min": -81.2618408203125, "p10": -19.703353500366205, "median": 40.10682678222656, "p90": 137.37402191162113, "max": 192.77694702148438, "pos_frac": 0.84375, "sample": [150.1413116455078, 87.18313598632812, 32.850494384765625, 109.130859375, 36.40180587768555, 9.289262771606445, -59.69390869140625, 101.97511291503906, 20.324234008789062, 72.62842559814453, 53.342994689941406, 166.3358612060547, 79.3903579711914, 5.92236328125, 16.71161460876465, 20.658065795898438, 140.14352416992188, 58.195770263671875, 69.56255340576172, -39.877098083496094, -38.216156005859375, 130.91184997558594, 43.03483581542969, 32.89959716796875, 43.0782470703125, 36.94114685058594, 46.48027038574219, 6.542778015136719, 166.71005249023438, -81.2618408203125, -23.072914123535156, 40.34751892089844, 26.816139221191406, 4.382228851318359, 105.12203979492188, 9.81859016418457, 192.77694702148438, 54.80080795288086, 70.24081420898438, 3.290872573852539, 11.290966033935547, -6.869623184204102, 55.70634460449219, 184.671875, 34.292236328125, 43.93414306640625, 129.8037872314453, 174.28182983398438, 39.86613464355469, 47.598670959472656, 0.14691162109375, 88.67754364013672, -42.89202880859375, 19.197616577148438, 64.09537506103516, 36.48222351074219, -3.8708229064941406, 111.21932983398438, 76.90620422363281, -13.372562408447266, 75.42961120605469, -22.416549682617188, 36.10638427734375, 2.5337677001953125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000120.npy"}
{"epoch": 0.1762114537444934, "step": 121, "batch_size": 64, "mean": 54.12976837158203, "std": 70.16508483886719, "min": -147.92752075195312, "p10": -9.46298904418945, "median": 48.697914123535156, "p90": 149.18289184570312, "max": 218.35365295410156, "pos_frac": 0.765625, "sample": [8.6695556640625, -6.431480407714844, -2.259246826171875, 218.35365295410156, -6.219085693359375, 10.404014587402344, 95.49221801757812, -16.337234497070312, 78.65231323242188, 74.09891510009766, -1.1261138916015625, 63.75670623779297, 138.63368225097656, 108.4188461303711, 82.12872314453125, -0.8022232055664062, 88.30178833007812, 65.86619567871094, 53.18573760986328, -40.698402404785156, 37.01127624511719, 113.412353515625, -57.33056640625, 147.17857360839844, 168.88243103027344, 65.40174865722656, 35.829124450683594, 16.812389373779297, 113.54248046875, 3.351316452026367, 77.79985046386719, -22.46849822998047, 47.56884765625, 84.02369689941406, 2.192533493041992, 201.06027221679688, 4.860382080078125, 183.37435913085938, 45.159873962402344, 205.18798828125, 9.108360290527344, 127.23668670654297, -10.76220703125, 18.356430053710938, -25.443206787109375, 13.086753845214844, 75.36723327636719, 23.19781494140625, -5.547027587890625, 54.961273193359375, 4.777605056762695, 19.78514862060547, 62.08949279785156, 133.77926635742188, 150.04188537597656, 72.8218002319336, -147.92752075195312, 54.89251708984375, -5.0844879150390625, 79.00901794433594, 209.338623046875, 19.10211944580078, -2.6483707427978516, 49.82698059082031], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000121.npy"}
{"epoch": 0.1776798825256975, "step": 122, "batch_size": 64, "mean": 67.46307373046875, "std": 66.26060485839844, "min": -87.95213317871094, "p10": 0.6147026062011732, "median": 51.563270568847656, "p90": 161.98345184326172, "max": 233.37396240234375, "pos_frac": 0.90625, "sample": [150.7241973876953, 22.11774444580078, 75.36286926269531, 82.90350341796875, 19.965192794799805, 27.56951904296875, 49.89933776855469, 97.88764190673828, 22.349143981933594, 112.80668640136719, 15.233535766601562, 130.03359985351562, 124.04581451416016, 233.37396240234375, 105.00740814208984, 35.508750915527344, 16.78752899169922, 55.72797393798828, 161.1260223388672, 47.80622100830078, 20.115962982177734, 141.43885803222656, 35.5540657043457, -9.378433227539062, 82.63688659667969, 11.756744384765625, 64.79869079589844, 141.26080322265625, 73.11678314208984, 66.20980834960938, 75.7746810913086, -25.77539825439453, 172.78506469726562, -6.237585067749023, 52.35029602050781, 164.00543212890625, 64.72624969482422, 26.56342315673828, 176.652587890625, 155.9113006591797, 9.260011672973633, 50.7762451171875, -33.535919189453125, 231.83021545410156, 18.90721893310547, 39.85732650756836, 33.158653259277344, 42.76603317260742, 24.55768585205078, -87.95213317871094, -7.949546813964844, 28.723907470703125, 31.5334415435791, 66.44633483886719, 203.42300415039062, 2.0004005432128906, 114.45365905761719, 54.703651428222656, 0.020832061767578125, 14.339500427246094, 162.35092163085938, 75.09620666503906, 20.59683609008789, 151.76931762695312], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000122.npy"}
{"epoch": 0.17914831130690162, "step": 123, "batch_size": 64, "mean": 44.099815368652344, "std": 73.62096405029297, "min": -152.5787353515625, "p10": -26.70355472564697, "median": 22.730673789978027, "p90": 142.60141296386726, "max": 235.27462768554688, "pos_frac": 0.65625, "sample": [80.59104919433594, 80.5179672241211, -5.994316101074219, 8.958789825439453, 89.2952880859375, 75.71894836425781, 119.79971313476562, 111.89877319335938, 79.9133071899414, 48.15613555908203, -0.4181690216064453, -18.975479125976562, 9.08572769165039, -41.123435974121094, 155.3778839111328, 86.54119873046875, 16.197628021240234, -13.729995727539062, -3.2085952758789062, 67.81686401367188, -5.9832916259765625, 124.53546142578125, 65.50868225097656, -76.33590698242188, 21.990089416503906, 97.41091918945312, 216.87271118164062, 23.222896575927734, 150.34396362304688, 161.4775390625, -25.8852596282959, 63.67939758300781, 20.79439926147461, 8.609977722167969, 84.7060775756836, 123.02017211914062, 19.888259887695312, 47.5562744140625, 178.1884307861328, -20.452537536621094, 92.40240478515625, 82.80656433105469, -42.73634719848633, -67.13642883300781, 77.34736633300781, -8.323883056640625, -42.69813919067383, -6.24159049987793, -17.70671844482422, 235.27462768554688, -27.05425262451172, 68.77857971191406, 52.906166076660156, 205.78726196289062, -9.752090454101562, 22.23845100402832, 120.60813903808594, 35.13459777832031, -152.5787353515625, -14.916282653808594, -0.625823974609375, -9.494060516357422, 0.5128536224365234, 2.288053512573242], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000123.npy"}
{"epoch": 0.18061674008810572, "step": 124, "batch_size": 64, "mean": 65.09678649902344, "std": 95.11799621582031, "min": -118.48265075683594, "p10": -26.337614822387696, "median": 42.521244049072266, "p90": 177.73108825683596, "max": 376.2948303222656, "pos_frac": 0.78125, "sample": [148.8574981689453, -25.752792358398438, 41.45996856689453, 34.622650146484375, 26.734371185302734, 149.8712921142578, -99.12770080566406, -9.562217712402344, 162.01220703125, 189.83877563476562, -3.3672714233398438, 30.493667602539062, 4.771604537963867, 69.45267486572266, 79.06761932373047, 231.39218139648438, 156.552001953125, 51.98393630981445, -19.139007568359375, 49.715248107910156, -118.48265075683594, -41.13983154296875, 143.0449981689453, -0.20887374877929688, 203.31304931640625, 129.8046875, 77.73664855957031, -2.7682037353515625, 46.55793762207031, 120.27790832519531, -26.588253021240234, 32.854122161865234, 88.02656555175781, 14.14543342590332, 11.36260986328125, 346.53997802734375, 101.2156982421875, 179.21286010742188, 29.243560791015625, 174.27362060546875, 104.1117935180664, -51.865333557128906, 16.904666900634766, -18.600677490234375, 39.80646514892578, 123.88018798828125, -97.80377197265625, 99.1854248046875, 19.984235763549805, 43.58251953125, 39.33268356323242, 77.45157623291016, -65.80108642578125, 255.86318969726562, 22.959991455078125, 376.2948303222656, 46.133575439453125, 169.22872924804688, 1.1590442657470703, 32.93994140625, 50.90589904785156, 76.42517852783203, 14.725467681884766, 11.0914306640625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000124.npy"}
{"epoch": 0.18208516886930984, "step": 125, "batch_size": 64, "mean": 57.879905700683594, "std": 78.16503143310547, "min": -93.51776123046875, "p10": -28.946736907958982, "median": 33.17298984527588, "p90": 165.9600479125977, "max": 250.57833862304688, "pos_frac": 0.78125, "sample": [-3.1250362396240234, 149.0437774658203, 91.7388916015625, 23.588943481445312, -14.501510620117188, 9.432586669921875, 110.83665466308594, -26.59447479248047, 141.2176971435547, 17.887216567993164, 10.102397918701172, -29.954849243164062, 24.093408584594727, 0.4378814697265625, 103.2789535522461, 110.30680847167969, 87.73646545410156, 52.425453186035156, 17.702791213989258, 4.577648162841797, 107.4930419921875, 18.15894317626953, -54.00604248046875, -10.93292236328125, 91.17308807373047, 250.57833862304688, -36.196258544921875, 30.709653854370117, 107.042236328125, 36.54771423339844, -4.5348663330078125, -83.01795196533203, 35.20091247558594, 15.831167221069336, 30.49840545654297, 210.3937225341797, 7.289882659912109, 210.42794799804688, 123.11932373046875, -22.706947326660156, 12.745101928710938, 104.81373596191406, 188.4544677734375, 92.14847564697266, 230.5794677734375, 93.57955932617188, 56.57878875732422, 46.319278717041016, 170.87872314453125, 52.41447448730469, 11.657608032226562, 129.4151611328125, 42.265647888183594, -16.15904426574707, 112.110107421875, 27.19968032836914, -42.94490051269531, 154.48313903808594, 31.14506721496582, -93.51776123046875, 207.64552307128906, 154.14715576171875, -33.164306640625, 28.217744827270508], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000125.npy"}
{"epoch": 0.18355359765051396, "step": 126, "batch_size": 64, "mean": 53.98036193847656, "std": 71.85811614990234, "min": -75.72037506103516, "p10": -22.17084197998047, "median": 37.8333797454834, "p90": 158.3166732788086, "max": 250.47381591796875, "pos_frac": 0.828125, "sample": [123.4549560546875, 65.77006530761719, 12.131757736206055, 90.82997131347656, 13.169631958007812, 52.842315673828125, -37.68439483642578, 2.432872772216797, 60.95794677734375, 156.52774047851562, -52.994293212890625, 2.9331398010253906, 250.47381591796875, 20.159385681152344, 84.74588012695312, 0.9882354736328125, 133.03591918945312, 42.532188415527344, 132.02609252929688, 66.52972412109375, 44.73479461669922, 51.954227447509766, -8.221315383911133, 36.99465560913086, 63.31549072265625, 186.06533813476562, -22.500450134277344, 159.08335876464844, 34.85517501831055, 48.14654541015625, 20.341487884521484, 25.37078094482422, 72.63111877441406, 13.828147888183594, 179.37603759765625, 38.67210388183594, 56.97724151611328, -73.80043029785156, 146.73907470703125, -14.864856719970703, -61.85028076171875, -38.28520202636719, 236.55105590820312, -4.912179946899414, 27.25624656677246, 11.343624114990234, 1.3204994201660156, 20.212299346923828, 121.70101928710938, 115.51417541503906, -75.72037506103516, 190.43174743652344, 85.7958984375, 24.208648681640625, 35.40415954589844, 97.7991943359375, 33.57054138183594, 64.48428344726562, 17.212947845458984, -21.401756286621094, 13.813224792480469, 91.80345153808594, 12.308631896972656, 175.61972045898438], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000126.npy"}
{"epoch": 0.18502202643171806, "step": 127, "batch_size": 64, "mean": 39.52136993408203, "std": 62.40430450439453, "min": -121.9561767578125, "p10": -35.16357650756836, "median": 41.298038482666016, "p90": 115.79159927368164, "max": 183.9109344482422, "pos_frac": 0.78125, "sample": [54.951087951660156, 79.76947784423828, -8.951940536499023, 37.60224151611328, 3.4177093505859375, -93.60720825195312, 151.70887756347656, 9.078964233398438, 99.35004425048828, -121.9561767578125, 9.017358779907227, 61.174171447753906, 75.04331970214844, -33.89647674560547, 18.20852279663086, 35.861602783203125, 93.18753814697266, 10.989871978759766, -121.27949523925781, 75.1446533203125, 26.65454864501953, 71.75758361816406, 125.78080749511719, 22.312210083007812, 68.09953308105469, 169.33822631835938, 22.202289581298828, 84.871826171875, -24.814292907714844, 16.516761779785156, 119.4455795288086, 75.92603302001953, 108.16900634765625, 24.451595306396484, 52.53013610839844, 147.5097198486328, -2.2860145568847656, -19.612205505371094, 20.90846824645996, -2.2856483459472656, 16.540157318115234, 115.0621566772461, 9.233474731445312, 75.44599914550781, 45.91966247558594, 63.93372344970703, 44.99383544921875, 116.10421752929688, 107.30597686767578, 50.990478515625, -47.44425964355469, 48.52587127685547, 19.52977752685547, 183.9109344482422, -59.63341522216797, 61.36981201171875, 47.59650421142578, 92.27165222167969, 46.83050537109375, 13.604961395263672, 31.427398681640625, -24.691055297851562, -35.70661926269531, -36.04438781738281], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000127.npy"}
{"epoch": 0.18649045521292218, "step": 128, "batch_size": 64, "mean": 41.59375762939453, "std": 73.50816345214844, "min": -169.99185180664062, "p10": -47.563420867919916, "median": 37.25638961791992, "p90": 144.5634002685547, "max": 250.9597930908203, "pos_frac": 0.6875, "sample": [46.5811882019043, 44.354759216308594, 133.68507385253906, -14.351173400878906, 92.63255310058594, -59.16928482055664, 102.63902282714844, 144.7706298828125, 35.71693420410156, 42.62156677246094, 163.67034912109375, 50.43268585205078, 48.97935485839844, 36.310157775878906, -86.83698272705078, 100.1131591796875, 150.35675048828125, 4.427385330200195, -0.25026893615722656, 75.90037536621094, -72.74317932128906, -3.5245323181152344, 250.9597930908203, -169.99185180664062, 148.44985961914062, 49.422706604003906, 157.00909423828125, 146.00839233398438, 70.3154525756836, 48.37129211425781, -9.113208770751953, -58.128448486328125, 103.85724639892578, -4.304841995239258, -16.387489318847656, 19.393455505371094, -78.30731201171875, -3.373699188232422, 33.59092330932617, 46.21437454223633, 87.69568634033203, -38.59845733642578, 53.668792724609375, -24.881261825561523, -9.301740646362305, 143.2364959716797, 32.497520446777344, 38.20262145996094, 22.37285804748535, 50.56516647338867, 23.69964599609375, -8.635549545288086, 127.17169189453125, -1.3347434997558594, 12.53571891784668, 2.8013153076171875, -11.192695617675781, 55.319129943847656, 104.09886169433594, 11.260242462158203, -51.405548095703125, 126.65939331054688, 144.07986450195312, 1.1830730438232422], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000128.npy"}
{"epoch": 0.18795888399412627, "step": 129, "batch_size": 64, "mean": 43.959712982177734, "std": 83.90424346923828, "min": -141.35601806640625, "p10": -43.21637840270996, "median": 31.175371170043945, "p90": 146.99029235839845, "max": 364.98809814453125, "pos_frac": 0.65625, "sample": [2.4770145416259766, 32.5919075012207, -43.793399810791016, 111.26533508300781, -47.122352600097656, 120.86203002929688, 11.365943908691406, -41.8699951171875, 166.86453247070312, 26.203432083129883, -10.906509399414062, 106.95033264160156, 26.45012664794922, 42.95396423339844, 57.914730072021484, 74.02873229980469, 147.5673065185547, -38.068336486816406, -34.646392822265625, 54.54351806640625, 214.1202392578125, 88.77015686035156, 218.03216552734375, -95.32305145263672, 88.97674560546875, -87.80130004882812, -43.9786376953125, 39.35442352294922, 364.98809814453125, 142.13589477539062, 28.268291473388672, 19.738311767578125, -0.77606201171875, 75.6518783569336, -11.128646850585938, -17.146404266357422, 29.758834838867188, 121.114990234375, 28.738059997558594, -16.026962280273438, -72.00377655029297, -7.970983505249023, 49.630218505859375, 79.11827087402344, 146.5408935546875, -141.35601806640625, 18.34357261657715, 18.822444915771484, -7.852388381958008, -6.542030334472656, 136.49879455566406, 170.55982971191406, 39.44215774536133, 46.6510009765625, 69.51384735107422, -17.767494201660156, 42.63401794433594, 54.3541259765625, -36.850990295410156, 85.44229888916016, 147.18289184570312, -28.964073181152344, 75.39263153076172, -0.4966697692871094], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000129.npy"}
{"epoch": 0.1894273127753304, "step": 130, "batch_size": 64, "mean": 66.11294555664062, "std": 84.00402069091797, "min": -107.1248550415039, "p10": -20.374899291992183, "median": 46.46612548828125, "p90": 192.35877075195316, "max": 323.3695068359375, "pos_frac": 0.78125, "sample": [74.45368957519531, 39.753421783447266, 30.258392333984375, 185.85275268554688, -15.250328063964844, 11.682723999023438, 198.2965087890625, 80.29376983642578, -53.93315887451172, -9.026350021362305, 15.137006759643555, 57.998653411865234, 38.367488861083984, 183.9818115234375, 203.44564819335938, 23.817970275878906, 92.93170166015625, -8.013359069824219, 36.66522216796875, 70.66886901855469, -11.433635711669922, 32.91587829589844, 7.600128173828125, 86.48838806152344, 94.00291442871094, -3.3437881469726562, 33.98185729980469, 90.03228759765625, 58.17572784423828, 179.07449340820312, 141.02928161621094, 212.08071899414062, -34.380714416503906, 28.136184692382812, 149.53089904785156, -103.07621765136719, 33.5875244140625, 28.801197052001953, 33.43112564086914, -22.571144104003906, 59.60108184814453, 53.06784439086914, 74.1073226928711, 240.8651885986328, 156.74444580078125, -8.186668395996094, 323.3695068359375, -41.33641815185547, -3.643198013305664, 87.7702407836914, 33.23394012451172, 39.86440658569336, 195.14706420898438, -31.940757751464844, 143.39366149902344, -107.1248550415039, 130.57044982910156, 2.0155696868896484, 120.62411499023438, 114.99311828613281, 207.36651611328125, 56.667816162109375, 32.927284240722656, 59.685272216796875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000130.npy"}
{"epoch": 0.19089574155653452, "step": 131, "batch_size": 64, "mean": 66.538818359375, "std": 72.90219116210938, "min": -147.05718994140625, "p10": -1.4027109146118164, "median": 50.281538009643555, "p90": 159.16501770019534, "max": 265.20135498046875, "pos_frac": 0.875, "sample": [50.2273063659668, 46.32958984375, 50.963157653808594, 29.377033233642578, 265.20135498046875, -147.05718994140625, 57.46736526489258, 40.12200927734375, 29.62183380126953, 105.10430145263672, 109.76800537109375, 20.43677520751953, 59.513336181640625, 13.42116928100586, 98.703125, 36.25028991699219, 41.04265594482422, -1.846883773803711, 139.64898681640625, 71.68536376953125, 40.271461486816406, 156.06729125976562, -12.153602600097656, -12.86363410949707, 50.33576965332031, 42.70027160644531, 85.77009582519531, 8.030523300170898, 201.80709838867188, 39.109886169433594, 42.01968765258789, -83.01272583007812, 160.49261474609375, 9.830047607421875, 221.12200927734375, 3.943300247192383, 97.95270538330078, 30.165626525878906, 137.8048553466797, 46.60639572143555, 125.56912231445312, 31.859704971313477, 204.09722900390625, 27.013652801513672, -88.67739868164062, 126.43331909179688, 6.6104888916015625, 106.85137176513672, -1.4171733856201172, 25.987876892089844, -1.3689651489257812, 135.3433837890625, 10.183952331542969, 175.42726135253906, 154.05126953125, 163.50656127929688, 91.04765319824219, 66.23947143554688, 29.337871551513672, 79.51776123046875, 128.65011596679688, 94.85746002197266, 101.97659301757812, 83.40629577636719], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000131.npy"}
{"epoch": 0.19236417033773862, "step": 132, "batch_size": 64, "mean": 54.263389587402344, "std": 86.76148223876953, "min": -157.40643310546875, "p10": -15.287681007385254, "median": 42.99641990661621, "p90": 145.11161193847659, "max": 367.01983642578125, "pos_frac": 0.796875, "sample": [13.61709213256836, 11.49874496459961, 148.0994873046875, -62.938323974609375, 68.46481323242188, 37.43439483642578, 63.932010650634766, 288.7401428222656, 312.897216796875, 7.478221893310547, 68.65493774414062, 109.35497283935547, 20.09507179260254, 76.96395874023438, -157.40643310546875, 5.006492614746094, 214.26177978515625, -34.911376953125, 65.60628509521484, -15.240148544311523, 1.7409038543701172, -12.167045593261719, 126.73761749267578, 67.11908721923828, 20.567363739013672, -15.308052062988281, -0.0056171417236328125, 112.37845611572266, 7.88677978515625, 4.232887268066406, -13.997467041015625, -12.71490478515625, 19.540353775024414, 92.96666717529297, 79.73513793945312, 147.454345703125, 48.779273986816406, 66.38165283203125, 76.52851867675781, 49.22959518432617, 55.002105712890625, 33.6065788269043, 29.51034927368164, 139.64523315429688, -105.93472290039062, 48.55844497680664, 119.3442153930664, -66.11287689208984, 13.79141616821289, 91.45524597167969, -20.060083389282227, 58.69733428955078, 105.21286010742188, 17.4500732421875, 58.079010009765625, -13.23968505859375, 31.177810668945312, 102.18020629882812, 26.961074829101562, 12.182615280151367, 178.15234375, 28.50082015991211, 52.98186492919922, 367.01983642578125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000132.npy"}
{"epoch": 0.19383259911894274, "step": 133, "batch_size": 64, "mean": 48.42201232910156, "std": 75.60839080810547, "min": -103.19270324707031, "p10": -20.578828430175776, "median": 36.99626159667969, "p90": 145.51931762695318, "max": 249.8863067626953, "pos_frac": 0.765625, "sample": [10.548149108886719, 70.50592041015625, -9.618545532226562, 109.29400634765625, 8.136943817138672, -40.25572967529297, -8.33013916015625, 36.57942199707031, -73.66593933105469, 249.8863067626953, 52.168731689453125, 72.34627532958984, -4.739326477050781, 103.53626251220703, -79.64305114746094, 20.426223754882812, 91.48320007324219, -4.60205078125, 214.3107452392578, 6.460916519165039, 64.6657943725586, 82.42341613769531, -14.717109680175781, 130.65692138671875, 2.1672592163085938, 104.48603820800781, 47.620277404785156, 77.35699462890625, 86.18421173095703, 67.13031005859375, 10.726333618164062, 20.586044311523438, 25.64864158630371, 243.37478637695312, -2.2066726684570312, 51.30121612548828, 71.9318618774414, -66.3563232421875, 209.01409912109375, 37.41310119628906, 40.193519592285156, 35.327735900878906, 53.276710510253906, 239.32293701171875, -103.19270324707031, -15.454193115234375, 65.01292419433594, 176.95785522460938, 18.663070678710938, 49.307491302490234, 7.865234375, 119.49700927734375, -53.64125061035156, 151.888916015625, 100.59893035888672, -22.775100708007812, 67.81779479980469, 47.44403076171875, -8.988136291503906, 20.103986740112305, 7.772132873535156, 0.20631790161132812, 9.745725631713867, 17.82209014892578], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000133.npy"}
{"epoch": 0.19530102790014683, "step": 134, "batch_size": 64, "mean": 51.5308952331543, "std": 67.78575134277344, "min": -43.89728546142578, "p10": -31.626653099060057, "median": 42.29172897338867, "p90": 131.77286071777345, "max": 332.2581787109375, "pos_frac": 0.8125, "sample": [16.891708374023438, -43.89728546142578, -36.78056335449219, 123.64405822753906, 200.17425537109375, 33.15599060058594, 16.580852508544922, 41.17717361450195, -32.94182586669922, 49.02428436279297, 120.46736145019531, 24.93771743774414, 12.244375228881836, 8.165565490722656, 52.48194122314453, 41.22509765625, -28.55791664123535, 46.34002685546875, 78.67819213867188, 24.162189483642578, 5.459081649780273, 11.552772521972656, 97.26261138916016, 17.53417205810547, 167.2054901123047, -1.53448486328125, 107.57437133789062, 95.76519775390625, 50.41218185424805, 17.112648010253906, 49.05375671386719, 132.47821044921875, 2.1514110565185547, 48.10564422607422, 166.21478271484375, 112.56647491455078, 130.12704467773438, -41.12110137939453, 85.77525329589844, -17.644725799560547, 58.896209716796875, -35.22404479980469, 64.84619140625, 7.591543197631836, 332.2581787109375, 12.50667953491211, 78.46417236328125, 40.353919982910156, 72.67463684082031, -4.026622772216797, 63.75218200683594, -26.00562286376953, -34.18585968017578, 177.18807983398438, 62.332618713378906, 3.91864013671875, 60.300621032714844, 39.17868423461914, 43.358360290527344, 71.6239242553711, -40.04966735839844, 79.57154846191406, 31.30289077758789, 154.12606811523438], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000134.npy"}
{"epoch": 0.19676945668135096, "step": 135, "batch_size": 64, "mean": 74.21546936035156, "std": 94.34196472167969, "min": -88.45156860351562, "p10": -33.372328186035155, "median": 52.89524841308594, "p90": 218.25039520263675, "max": 359.28314208984375, "pos_frac": 0.8125, "sample": [29.678504943847656, 132.99075317382812, 32.17317581176758, 239.68106079101562, 126.02301025390625, 77.40826416015625, 246.43821716308594, 43.4178352355957, 242.14096069335938, 222.4547576904297, 133.224853515625, 89.97315979003906, 190.74017333984375, 128.66233825683594, 273.81622314453125, 117.97128295898438, 31.556838989257812, 199.33798217773438, 12.214797973632812, -40.127113342285156, 0.7946491241455078, 113.19659423828125, 159.56805419921875, 78.28211975097656, 38.42131042480469, 97.58416748046875, 4.0941619873046875, 359.28314208984375, -23.045833587646484, 114.85064697265625, -60.57078552246094, 201.68157958984375, 208.44021606445312, -74.14274597167969, 40.109561920166016, -17.149822235107422, 27.578617095947266, 57.270904541015625, 127.37799072265625, -3.99884033203125, -60.61144256591797, 53.9862060546875, 81.67390441894531, 57.14201354980469, 1.8626136779785156, 51.804290771484375, 5.027111053466797, -14.8154296875, 46.901580810546875, 17.121429443359375, 32.3306999206543, 112.35797119140625, 93.64460754394531, 87.6006851196289, 6.7385101318359375, 98.86557006835938, 44.33390808105469, 229.86813354492188, 13.673851013183594, -88.45156860351562, -29.405166625976562, -35.072540283203125, -42.24275207519531, 6.053306579589844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000135.npy"}
{"epoch": 0.19823788546255505, "step": 136, "batch_size": 64, "mean": 49.29758834838867, "std": 92.72386169433594, "min": -76.58349609375, "p10": -46.58245506286621, "median": 28.15107536315918, "p90": 121.36379623413086, "max": 411.82568359375, "pos_frac": 0.6875, "sample": [66.08154296875, 74.53401947021484, 101.13095092773438, 98.380859375, -6.053375244140625, -49.87100601196289, 299.58831787109375, 133.04209899902344, -20.230712890625, 65.40249633789062, 101.90901184082031, 66.09501647949219, -24.186904907226562, 14.314437866210938, 77.03717041015625, 53.4007568359375, 79.34519958496094, 34.485504150390625, 116.90342712402344, 321.1131286621094, -2.922943115234375, 71.87652587890625, 411.82568359375, 2.4134292602539062, 115.4034423828125, -0.20127105712890625, 223.41510009765625, -6.959621429443359, 256.76739501953125, 24.544090270996094, 8.543403625488281, 2.593780517578125, 58.90765380859375, -72.3009033203125, 56.08824157714844, -63.50868225097656, 20.05823516845703, -25.317283630371094, 87.84699249267578, 88.77455139160156, 11.062833786010742, -16.366512298583984, -14.3377685546875, 64.3917236328125, 9.660287857055664, 2.367612838745117, -25.265331268310547, 31.758060455322266, 93.30459594726562, 120.1381607055664, -47.01818084716797, -59.767005920410156, 35.12432861328125, 79.29277801513672, -76.58349609375, 1.0724029541015625, 70.39369201660156, -21.578426361083984, -45.56576156616211, 24.116252899169922, -5.1497955322265625, 121.88906860351562, 3.0137100219726562, -61.17736053466797], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000136.npy"}
{"epoch": 0.19970631424375918, "step": 137, "batch_size": 64, "mean": 77.56993103027344, "std": 113.73035430908203, "min": -158.5084228515625, "p10": -27.434626007080073, "median": 50.44227981567383, "p90": 251.61129760742213, "max": 390.66436767578125, "pos_frac": 0.84375, "sample": [-42.42720031738281, 10.864673614501953, 72.30604553222656, 120.52050018310547, 153.48086547851562, 32.141578674316406, 46.87993621826172, 88.57565307617188, -23.087482452392578, 103.0462646484375, 4.037242889404297, 97.15133666992188, 385.91656494140625, 18.458768844604492, 39.763099670410156, -113.9449462890625, 113.89920043945312, 10.5872802734375, 35.946311950683594, 154.2236785888672, 158.42770385742188, 80.7750473022461, 2.632801055908203, 390.66436767578125, 110.59169006347656, 330.63232421875, 190.31216430664062, 125.94267272949219, 19.2313175201416, 167.42385864257812, -95.47962951660156, 9.1387939453125, 70.78438568115234, 11.092582702636719, 132.64627075195312, 9.598594665527344, 126.99154663085938, 50.896392822265625, 9.795326232910156, 49.98816680908203, 64.61033630371094, 44.36106872558594, -1.9841537475585938, -158.5084228515625, -46.44355010986328, -73.0953598022461, 364.8089599609375, -29.297687530517578, 38.98365020751953, 298.0579833984375, 54.56580352783203, 129.43475341796875, 3.3166275024414062, 93.52130126953125, -17.357236862182617, 27.925683975219727, 52.951683044433594, 9.349674224853516, 1.500131607055664, 101.4340591430664, 307.2861328125, 35.65596008300781, 125.09000396728516, 277.8823547363281], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000137.npy"}
{"epoch": 0.2011747430249633, "step": 138, "batch_size": 64, "mean": 58.859375, "std": 72.58277893066406, "min": -115.477783203125, "p10": -16.22010269165039, "median": 44.32511329650879, "p90": 152.50761108398441, "max": 276.62603759765625, "pos_frac": 0.78125, "sample": [157.31735229492188, -12.008987426757812, 72.11447143554688, 140.42507934570312, 7.369071960449219, 115.66338348388672, -17.131576538085938, -1.237966537475586, 8.300689697265625, 174.5746612548828, 32.598915100097656, 21.279815673828125, 43.40769958496094, 11.178852081298828, 27.62018394470215, -115.477783203125, 16.507394790649414, -23.03402328491211, -1.9382781982421875, -12.013298034667969, 186.5556640625, 177.69210815429688, 15.871326446533203, 9.500518798828125, 119.14353942871094, 131.97433471679688, 132.6937255859375, -4.508636474609375, 92.15633392333984, 137.33009338378906, 56.704559326171875, 29.34113121032715, 126.75874328613281, 8.540924072265625, 140.11233520507812, 171.17430114746094, 74.75064086914062, 45.24252700805664, -69.87684631347656, 62.073265075683594, 64.48860931396484, 28.25943374633789, 86.60948181152344, 0.5612087249755859, -6.068183898925781, -55.86444854736328, 97.86144256591797, 141.28488159179688, 16.310970306396484, 37.90446472167969, 111.86929321289062, 34.43042755126953, 136.0685577392578, -36.477012634277344, -21.233213424682617, 61.57468032836914, -14.093330383300781, 72.08316040039062, 20.287734985351562, 276.62603759765625, 104.33846282958984, 56.72290802001953, 106.9753646850586, 157.73287963867188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000138.npy"}
{"epoch": 0.2026431718061674, "step": 139, "batch_size": 64, "mean": 70.50794982910156, "std": 87.63885498046875, "min": -110.58010864257812, "p10": -28.93995971679687, "median": 56.37874221801758, "p90": 167.13395080566409, "max": 375.5392150878906, "pos_frac": 0.796875, "sample": [-18.232933044433594, 157.10494995117188, 100.36975860595703, 173.4999542236328, -31.181175231933594, 66.91313171386719, 120.6574478149414, -11.632572174072266, 83.67123413085938, 146.18682861328125, -110.58010864257812, 57.66007995605469, -48.12876510620117, 275.8296203613281, 140.62933349609375, 48.067832946777344, 87.7271728515625, 55.09740447998047, 33.66166687011719, 64.20072937011719, 144.14682006835938, 74.2029037475586, -73.10327911376953, 123.58673858642578, 48.14691162109375, 1.3917007446289062, 158.0395050048828, 84.8372802734375, 16.53421974182129, 38.73279571533203, 155.0090789794922, 24.63627052307129, 125.29476928710938, 119.2213363647461, 39.40464782714844, 91.69145202636719, -19.390525817871094, -1.9519290924072266, 9.730777740478516, 16.69408416748047, 80.0706558227539, -0.4867095947265625, 16.960609436035156, 150.2122802734375, 22.93552017211914, 265.88726806640625, 96.0505142211914, -23.71045684814453, 77.43390655517578, -47.65227508544922, 83.8825912475586, -45.69347381591797, 34.515953063964844, 86.10957336425781, 375.5392150878906, 39.043643951416016, 250.8064727783203, 38.35264587402344, 47.504798889160156, -38.68341064453125, 191.41265869140625, 29.775115966796875, 42.83306121826172, 171.0315704345703], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000139.npy"}
{"epoch": 0.20411160058737152, "step": 140, "batch_size": 64, "mean": 63.72399139404297, "std": 89.924072265625, "min": -128.01792907714844, "p10": -50.64300155639648, "median": 64.09249496459961, "p90": 173.31105499267582, "max": 348.23626708984375, "pos_frac": 0.734375, "sample": [-26.172348022460938, 122.62382507324219, 348.23626708984375, -75.89031219482422, -12.750343322753906, 123.13041687011719, 84.61347198486328, 148.87611389160156, 57.50798034667969, 166.0392303466797, 117.66934204101562, 71.06855010986328, 115.22013854980469, 41.01471710205078, 23.705276489257812, 84.2791748046875, 275.20428466796875, 13.184185028076172, 4.430225372314453, 75.07714080810547, 21.145523071289062, 65.25206756591797, 87.17630004882812, 138.03143310546875, -2.1726150512695312, -53.90455627441406, 256.18231201171875, -52.33992004394531, 37.732940673828125, 76.48960876464844, 21.73997688293457, 59.808509826660156, 8.918462753295898, 116.93136596679688, -32.13230895996094, 62.40478515625, 93.26669311523438, 64.25383758544922, -8.12646484375, 161.18264770507812, 132.47079467773438, -33.96961975097656, 179.1928253173828, -128.01792907714844, -2.8805618286132812, 63.93115234375, 62.68321228027344, 93.04924011230469, 189.63763427734375, 23.41217803955078, -9.716150283813477, 155.9425506591797, -12.771478652954102, 20.615020751953125, -90.21287536621094, -52.826908111572266, -71.52924346923828, 65.54345703125, 181.69046020507812, 176.42755126953125, -46.68352508544922, 85.5917739868164, 82.16888427734375, 135.67916870117188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000140.npy"}
{"epoch": 0.2055800293685756, "step": 141, "batch_size": 64, "mean": 60.48833465576172, "std": 95.23878479003906, "min": -243.17013549804688, "p10": -46.37356185913085, "median": 60.9108943939209, "p90": 180.18937530517582, "max": 295.82110595703125, "pos_frac": 0.78125, "sample": [65.59457397460938, 39.33271026611328, -14.277019500732422, 102.42272186279297, 84.01036071777344, -32.91849899291992, 106.87478637695312, 62.534759521484375, 0.5495071411132812, -7.938697814941406, 59.47532272338867, 255.9205322265625, -243.17013549804688, 50.84589385986328, -4.934959411621094, 34.49275207519531, 0.6703433990478516, -53.45831298828125, 264.97869873046875, -63.20783996582031, -36.87761688232422, 201.59596252441406, 48.22331619262695, 84.14901733398438, 150.78878784179688, 81.97036743164062, 106.76811218261719, 3.3520584106445312, -58.69012451171875, 75.07859802246094, 112.33273315429688, 78.81193542480469, 33.54993438720703, 168.79689025878906, 247.66561889648438, 26.045347213745117, 116.34383392333984, 24.376127243041992, -19.165014266967773, 18.26645278930664, -25.714149475097656, 50.12702941894531, -50.44325256347656, 185.07186889648438, 5.7214813232421875, 139.07012939453125, 194.75634765625, 119.64312744140625, 11.479940414428711, -160.05471801757812, 19.209938049316406, 96.75482177734375, 62.346466064453125, 89.25920104980469, 115.36380004882812, 135.34146118164062, 295.82110595703125, 127.8770523071289, 1.2772579193115234, 161.6833953857422, 68.67198181152344, 103.79867553710938, -53.17023468017578, 6.1807861328125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000141.npy"}
{"epoch": 0.20704845814977973, "step": 142, "batch_size": 64, "mean": 69.34351348876953, "std": 107.79285430908203, "min": -212.8826904296875, "p10": -44.837870407104475, "median": 36.37330627441406, "p90": 236.43939819335938, "max": 296.6500549316406, "pos_frac": 0.765625, "sample": [-21.978134155273438, 262.9488220214844, 32.65332794189453, 43.232086181640625, -30.843528747558594, 246.3195037841797, -23.388957977294922, -23.299062728881836, -93.06790161132812, 31.613872528076172, 133.32061767578125, 97.60589599609375, 159.5721435546875, 28.603830337524414, 36.64958190917969, 223.54750061035156, 15.32406234741211, -212.8826904296875, -150.2769775390625, 73.71698760986328, 22.50489044189453, 36.09703063964844, 18.307865142822266, 296.6500549316406, 167.09695434570312, 36.0748291015625, 147.26341247558594, 26.021093368530273, 147.59393310546875, -22.480295181274414, 106.39300537109375, 120.61988830566406, 236.69932556152344, 84.32044982910156, -1.0471038818359375, 209.9464874267578, 213.8915252685547, 16.950834274291992, 19.142026901245117, 41.693824768066406, -64.03742980957031, -50.835445404052734, 3.382720947265625, 235.83290100097656, 67.5722885131836, 245.75914001464844, 13.769195556640625, 23.086524963378906, 71.17288208007812, -53.74115753173828, 58.17599868774414, -19.036636352539062, 9.486917495727539, -83.40336608886719, 191.9261932373047, 245.8380126953125, 28.271408081054688, 196.78182983398438, 25.918350219726562, -9.727529525756836, 125.1919937133789, 255.43026733398438, 42.91820526123047, 125.14075469970703], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000142.npy"}
{"epoch": 0.20851688693098386, "step": 143, "batch_size": 64, "mean": 44.15364456176758, "std": 94.26882934570312, "min": -155.04360961914062, "p10": -56.941184234619136, "median": 28.292137145996094, "p90": 143.93782653808594, "max": 336.7385559082031, "pos_frac": 0.65625, "sample": [83.33895874023438, -7.3578948974609375, 54.607749938964844, 26.764854431152344, 285.316650390625, 89.00772857666016, -57.59623718261719, 101.95528411865234, 107.02496337890625, 24.220369338989258, 143.28433227539062, -84.91264343261719, -55.41272735595703, 135.44699096679688, 83.9624252319336, -123.78348541259766, -48.608917236328125, -2.603473663330078, 104.68313598632812, -5.97552490234375, 34.963478088378906, 255.82305908203125, -66.73696899414062, 97.13082885742188, 19.517242431640625, -16.968189239501953, 46.4364013671875, 144.2178955078125, 29.819419860839844, 16.944293975830078, -13.088874816894531, -49.41911315917969, -58.082977294921875, 3.9634456634521484, 134.710205078125, 5.660499572753906, 42.302345275878906, 68.94808197021484, 24.737396240234375, 336.7385559082031, -33.02239227294922, -12.140539169311523, 37.32933807373047, 83.85714721679688, -2.652873992919922, 56.19871520996094, 154.74249267578125, 139.50436401367188, -6.764308929443359, -47.95703125, -91.31106567382812, 287.0279541015625, -33.291656494140625, 78.56241607666016, 64.23664855957031, 30.2507266998291, 110.17991638183594, 25.018836975097656, 179.46542358398438, -155.04360961914062, -10.584110260009766, 41.575889587402344, 3.3489151000976562, 16.322341918945312], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000143.npy"}
{"epoch": 0.20998531571218795, "step": 144, "batch_size": 64, "mean": 54.19027328491211, "std": 78.64970397949219, "min": -105.89666748046875, "p10": -33.61781234741211, "median": 45.34720993041992, "p90": 169.77442932128906, "max": 244.05746459960938, "pos_frac": 0.734375, "sample": [-33.2454833984375, 110.9063949584961, -4.10621452331543, 170.7989501953125, -103.0281753540039, -20.922744750976562, 68.84733581542969, 121.58267974853516, 44.94628143310547, 15.212764739990234, 64.78511810302734, -39.76509475708008, 73.48088073730469, 105.9366455078125, 57.73139190673828, 167.38388061523438, 51.690147399902344, 202.18130493164062, 104.48807525634766, 143.47674560546875, 174.345458984375, 190.26776123046875, -23.51295280456543, -105.89666748046875, 103.53157806396484, -26.14483642578125, 48.74909210205078, -3.5704574584960938, 68.01617431640625, 22.696578979492188, -54.51936340332031, 71.98670196533203, 65.93280792236328, 17.935470581054688, -47.322303771972656, 138.6802978515625, 47.54576110839844, 27.174678802490234, 25.22008514404297, 88.59569549560547, 29.379745483398438, 92.85424041748047, 244.05746459960938, 28.80604362487793, 26.78618621826172, 76.15568542480469, -33.777381896972656, 24.549537658691406, 22.430984497070312, 103.31759643554688, 123.94908905029297, 231.39573669433594, -2.1591796875, -29.22503662109375, -21.071319580078125, -21.08570098876953, 220.3206329345703, 15.761798858642578, 23.897855758666992, 1.9012832641601562, 45.748138427734375, 29.949615478515625, 148.50537109375, -46.363121032714844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000144.npy"}
{"epoch": 0.21145374449339208, "step": 145, "batch_size": 64, "mean": 58.01053237915039, "std": 80.9017562866211, "min": -119.76181030273438, "p10": -43.093291091918935, "median": 44.6391544342041, "p90": 149.29158325195314, "max": 263.7781066894531, "pos_frac": 0.75, "sample": [24.150001525878906, -46.801727294921875, -98.00900268554688, 263.7781066894531, 146.59869384765625, 115.28528594970703, 245.866455078125, 2.3500022888183594, -119.76181030273438, 58.11143112182617, 6.938201904296875, 76.64479064941406, 143.8167266845703, -52.64946365356445, 11.25197982788086, 119.09449005126953, 53.07349395751953, 41.577796936035156, 120.28163146972656, -48.331687927246094, -10.165246963500977, 38.54896545410156, 42.37529754638672, 138.80374145507812, 92.56074523925781, -65.07764434814453, -9.486099243164062, 29.333499908447266, -11.60107421875, 178.76116943359375, 24.327390670776367, 14.896514892578125, 113.45651245117188, -34.44027328491211, -54.18174743652344, 119.50086975097656, 16.489532470703125, 21.595260620117188, 80.63475799560547, -9.893608093261719, 69.17727661132812, -7.080387115478516, 117.01251983642578, -15.33194351196289, 113.81771850585938, 193.8894500732422, 101.62879943847656, -21.72480010986328, 50.16099166870117, 123.86479949951172, 29.94211196899414, 41.50262451171875, 144.03187561035156, 157.85330200195312, 61.40230941772461, 18.424850463867188, -7.049629211425781, 66.79734802246094, 133.12576293945312, 119.5760726928711, 233.5308837890625, 46.903011322021484, 11.069503784179688, 150.4456787109375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000145.npy"}
{"epoch": 0.21292217327459617, "step": 146, "batch_size": 64, "mean": 44.91979217529297, "std": 88.55123138427734, "min": -234.5234375, "p10": -71.60041046142578, "median": 58.950172424316406, "p90": 158.13821868896488, "max": 244.2855987548828, "pos_frac": 0.671875, "sample": [33.70349884033203, -35.30174255371094, 108.9287109375, -66.05555725097656, 18.867761611938477, 3.284454345703125, 77.01988983154297, 68.97903442382812, 78.32612609863281, 64.898193359375, 212.2713623046875, 100.93636322021484, 43.389495849609375, -9.846176147460938, 24.556747436523438, -7.223823547363281, 93.2442398071289, -10.6055908203125, 39.052833557128906, -18.782365798950195, -5.709386825561523, 29.216384887695312, 62.39379119873047, 114.56108093261719, -98.79191589355469, 68.81851959228516, 150.2222137451172, -234.5234375, -73.97677612304688, 204.9688720703125, -80.45136260986328, 74.42855072021484, -4.992525100708008, 74.25656127929688, 189.4229278564453, -82.37883758544922, 46.400787353515625, 60.47938537597656, 117.63536834716797, 244.2855987548828, 7.304588317871094, -4.617561340332031, 60.55720901489258, 78.34759521484375, 60.63908386230469, 103.19207000732422, 103.28421783447266, -100.86625671386719, 57.42095947265625, -76.20785522460938, 81.82034301757812, 139.56161499023438, 161.53079223632812, 74.77215576171875, -9.061935424804688, 93.88910675048828, 36.93329620361328, -35.78247833251953, 85.09661865234375, -51.90069580078125, 229.0569610595703, -34.35150909423828, 188.97439575195312, -50.63520812988281], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000146.npy"}
{"epoch": 0.2143906020558003, "step": 147, "batch_size": 64, "mean": 61.123626708984375, "std": 90.97154998779297, "min": -150.99014282226562, "p10": -32.018942260742186, "median": 46.63227844238281, "p90": 194.4804809570313, "max": 299.2396240234375, "pos_frac": 0.75, "sample": [108.34030151367188, -63.8995475769043, 41.38520812988281, 233.08401489257812, -27.532752990722656, -10.45562744140625, 97.98904418945312, 98.07390594482422, 52.34715270996094, -43.39398956298828, 29.635160446166992, 76.9283218383789, 60.169036865234375, 126.10427856445312, 25.121280670166016, 197.30050659179688, 115.93125915527344, 19.875452041625977, -2.869720458984375, 187.90042114257812, 161.6136932373047, -26.28392791748047, 174.9115753173828, 24.0327205657959, 272.56298828125, 48.13176727294922, 201.12310791015625, 107.68255615234375, -12.760931015014648, 45.132789611816406, 29.67275619506836, -132.35357666015625, 4.214160919189453, 116.44447326660156, 105.19442749023438, -28.442764282226562, 61.19743347167969, 299.2396240234375, 77.2400894165039, 22.28738784790039, 178.82534790039062, 14.975372314453125, -15.702108383178711, 14.643486022949219, 35.1424560546875, 59.648887634277344, -9.417495727539062, 79.98469543457031, 66.74632263183594, -34.094200134277344, 20.87945556640625, 50.37480545043945, 19.77376937866211, 174.03175354003906, -24.010597229003906, 58.560848236083984, 239.75189208984375, -33.55158996582031, 32.729248046875, -150.99014282226562, 214.13241577148438, -58.99395751953125, 60.676231384277344, 44.921051025390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000147.npy"}
{"epoch": 0.21585903083700442, "step": 148, "batch_size": 64, "mean": 52.45569610595703, "std": 83.38066864013672, "min": -103.42143249511719, "p10": -29.771527862548826, "median": 36.24748229980469, "p90": 145.40213317871095, "max": 339.39599609375, "pos_frac": 0.796875, "sample": [-7.736240386962891, 51.132164001464844, 5.384185791015625, 83.87408447265625, -60.479393005371094, 99.6413345336914, 81.7017822265625, 39.60439682006836, -42.98198699951172, 194.9795684814453, 19.627235412597656, 143.70932006835938, 20.379777908325195, -12.017082214355469, 155.7838592529297, 254.27102661132812, 2.515033721923828, 15.632766723632812, -40.9862060546875, -68.71804809570312, 136.54376220703125, 132.2263641357422, -11.225912094116211, 91.8592529296875, 98.88398742675781, 42.36607360839844, 63.830230712890625, 48.68648910522461, 108.186767578125, 0.7803192138671875, 146.12762451171875, 34.351287841796875, 37.580955505371094, 70.05537414550781, -31.348800659179688, 47.0318717956543, 32.3209228515625, 17.051956176757812, 112.19867706298828, 56.21267318725586, 132.17422485351562, 35.52227783203125, -82.49969482421875, 47.410400390625, 36.972686767578125, 23.22449493408203, 186.10369873046875, 48.169189453125, -3.9108810424804688, -26.091224670410156, -103.42143249511719, 41.173675537109375, 9.192550659179688, 6.731943130493164, 34.385887145996094, 6.786380767822266, 34.262290954589844, 339.39599609375, 321.34588623046875, 7.844518661499023, -4.284708023071289, 1.8624954223632812, 89.78684997558594, 5.989534378051758], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000148.npy"}
{"epoch": 0.2173274596182085, "step": 149, "batch_size": 64, "mean": 64.97525024414062, "std": 82.18740844726562, "min": -186.9498748779297, "p10": -7.881933593749999, "median": 58.48173713684082, "p90": 152.81048736572265, "max": 309.88983154296875, "pos_frac": 0.828125, "sample": [111.1567611694336, 67.32315063476562, 208.16717529296875, 60.67275619506836, 105.38780975341797, 69.14472961425781, 73.50735473632812, -12.864194869995117, -20.60124969482422, -8.28506088256836, 22.06391716003418, -6.642768859863281, 58.8624382019043, 44.58545684814453, 62.382171630859375, -64.77249145507812, 88.66107177734375, 95.7430419921875, 45.13909912109375, 134.77374267578125, 109.25401306152344, 170.32095336914062, 103.75988006591797, 29.56923484802246, 14.79517936706543, 97.65922546386719, 48.822998046875, 124.80110931396484, 74.69093322753906, 128.50918579101562, 64.06810760498047, -1.8471508026123047, 309.88983154296875, 38.476654052734375, 152.9779052734375, 25.323989868164062, 226.12158203125, 65.0462417602539, 29.245101928710938, 108.03685760498047, 24.215999603271484, 75.69808959960938, 12.352716445922852, 45.38458251953125, -1.7980880737304688, 53.41222381591797, 13.012264251708984, 58.101036071777344, -14.7293701171875, -186.9498748779297, 97.2493896484375, 16.1490478515625, 1.6552581787109375, 152.4198455810547, 12.177549362182617, 278.8190002441406, 17.474651336669922, 41.251373291015625, 142.93728637695312, 265.796630859375, 6.0940399169921875, -6.941303253173828, 87.78814697265625, -87.08113861083984], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000149.npy"}
{"epoch": 0.21879588839941264, "step": 150, "batch_size": 64, "mean": 63.64070129394531, "std": 94.79381561279297, "min": -84.0924072265625, "p10": -23.391272735595695, "median": 37.71773719787598, "p90": 171.27693634033204, "max": 416.4287109375, "pos_frac": 0.8125, "sample": [294.9749755859375, 322.9481201171875, 16.600242614746094, 30.2957763671875, 61.4356689453125, -37.84284210205078, 181.38351440429688, 36.614654541015625, -80.37628936767578, 70.13689422607422, 416.4287109375, -5.994932174682617, -16.24474334716797, 19.266799926757812, -8.587066650390625, 130.62608337402344, 19.133895874023438, 51.15086364746094, 47.957672119140625, 74.51073455810547, 38.82081985473633, 56.21813201904297, 150.31546020507812, 28.107391357421875, 107.58892822265625, 88.93611145019531, 191.9693145751953, 131.9008331298828, 15.606258392333984, 69.77655029296875, 14.973976135253906, -46.830322265625, 82.93643951416016, 173.3606719970703, 70.12551879882812, 8.321395874023438, 166.41488647460938, 21.918195724487305, 144.8212890625, -26.454071044921875, 93.95545959472656, -39.22657012939453, -40.85621643066406, 5.522148132324219, 119.98302459716797, 5.886054992675781, 27.042503356933594, 64.4048080444336, 16.76036834716797, 39.068092346191406, 57.51628112792969, 68.70529174804688, 65.04694366455078, 18.352317810058594, 32.24340057373047, 351.09393310546875, 14.221105575561523, 16.962234497070312, 12.800714492797852, 31.22931671142578, 94.60906219482422, -10.008842468261719, -84.0924072265625, -1.460479736328125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000150.npy"}
{"epoch": 0.22026431718061673, "step": 151, "batch_size": 64, "mean": 63.97663497924805, "std": 79.69890594482422, "min": -145.37985229492188, "p10": -27.358609390258785, "median": 56.06666946411133, "p90": 180.6449401855469, "max": 247.423095703125, "pos_frac": 0.84375, "sample": [-145.37985229492188, 219.5823211669922, 47.96214294433594, 0.06195831298828125, 39.89875030517578, 56.35795593261719, 68.83818054199219, 113.04080200195312, 72.43305969238281, 28.696609497070312, 181.47433471679688, -42.62416076660156, -28.43665313720703, 103.32035827636719, 55.77538299560547, -45.4798469543457, -7.153629302978516, 41.77973937988281, 57.22035217285156, 50.695945739746094, 231.88055419921875, 55.517215728759766, 146.13246154785156, 56.45885467529297, -55.60492706298828, 176.6710205078125, 24.436935424804688, 3.548431396484375, -24.84317398071289, 21.336456298828125, 37.367469787597656, 11.056417465209961, 19.858570098876953, 9.35029411315918, 106.7130126953125, 178.70968627929688, -63.39115905761719, 65.15544128417969, 104.07942962646484, 103.66462707519531, 7.971609115600586, 57.80180358886719, 99.49615478515625, 82.24346923828125, 11.880218505859375, 193.74478149414062, 106.10334777832031, 57.00877380371094, 119.92941284179688, -2.5465316772460938, 209.5596160888672, 8.443405151367188, 2.7805347442626953, 140.55044555664062, 15.388351440429688, 14.74431037902832, 203.64495849609375, 109.3356704711914, 247.423095703125, 176.59811401367188, -48.80390167236328, 76.35371398925781, 21.35177993774414, 107.34024047851562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000151.npy"}
{"epoch": 0.22173274596182085, "step": 152, "batch_size": 64, "mean": 77.73429870605469, "std": 94.58830261230469, "min": -89.9403076171875, "p10": -39.78428497314452, "median": 60.51862144470215, "p90": 213.02692718505864, "max": 313.92730712890625, "pos_frac": 0.796875, "sample": [28.725692749023438, 55.123779296875, 164.42852783203125, 177.2931671142578, 75.54270935058594, 201.37994384765625, 61.48945236206055, 139.92791748046875, 313.92730712890625, 35.64983367919922, 154.69174194335938, 128.3113250732422, 34.697200775146484, -64.4804458618164, -3.1575698852539062, 228.3402099609375, 143.19943237304688, 49.37322235107422, 68.72006225585938, 64.27336120605469, 57.27635955810547, 51.99107360839844, -46.65647888183594, 73.77911376953125, 2.4193878173828125, 312.81787109375, 130.95416259765625, 220.9734344482422, 43.02583312988281, 22.68498992919922, 61.56596374511719, -54.768585205078125, 22.55093002319336, -5.9158172607421875, 1.8531150817871094, 59.54779052734375, 18.545982360839844, 199.32534790039062, 33.311424255371094, 46.18495178222656, 283.2729797363281, -22.559288024902344, 54.4014892578125, 203.9090576171875, 67.44772338867188, -30.9959716796875, 239.84510803222656, 33.370140075683594, -72.98987579345703, 108.01763916015625, 3.7517337799072266, 140.3135528564453, 90.14071655273438, 70.81981658935547, 216.93458557128906, -44.55479431152344, 160.80075073242188, -89.9403076171875, -7.6394805908203125, 115.56538391113281, -43.55070495605469, -30.077980041503906, 134.46170043945312, 85.32743072509766], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000152.npy"}
{"epoch": 0.22320117474302498, "step": 153, "batch_size": 64, "mean": 66.30891418457031, "std": 100.70272827148438, "min": -173.40496826171875, "p10": -35.15238914489746, "median": 54.118690490722656, "p90": 195.9188171386719, "max": 413.03076171875, "pos_frac": 0.75, "sample": [82.84722900390625, -2.19366455078125, 10.200103759765625, -6.14794921875, 19.908992767333984, -8.800247192382812, 29.018035888671875, 159.9783935546875, -39.13038635253906, 188.5614471435547, 95.6507568359375, 7.411994934082031, -41.0736198425293, 83.87522888183594, 242.51246643066406, -68.09335327148438, 79.24398803710938, 16.827224731445312, 54.45860290527344, 104.13871002197266, 53.778778076171875, 413.03076171875, 40.11682891845703, 64.54161834716797, 80.8909912109375, 185.52133178710938, -23.492774963378906, -41.999229431152344, 199.0719757080078, 204.02236938476562, 138.62936401367188, 38.30693817138672, 239.16455078125, 5.887172698974609, 1.9129867553710938, 1.0827484130859375, 164.20773315429688, 96.65348815917969, -34.2165412902832, -35.553466796875, 126.43887329101562, 12.96551513671875, 10.904953002929688, 263.6733093261719, 23.16303253173828, -14.963409423828125, -59.82416534423828, 64.24595642089844, 10.226242065429688, -173.40496826171875, 90.0238037109375, 354.30169677734375, 74.89581298828125, -31.112316131591797, 65.52996063232422, -4.583774566650391, 96.19742584228516, 43.59278869628906, 133.99325561523438, 134.45779418945312, 75.64237976074219, -20.309768676757812, 61.25933074951172, 105.70535278320312], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000153.npy"}
{"epoch": 0.22466960352422907, "step": 154, "batch_size": 64, "mean": 63.691261291503906, "std": 105.8705825805664, "min": -204.57073974609375, "p10": -54.22415390014648, "median": 55.08279037475586, "p90": 196.8136642456055, "max": 296.6632385253906, "pos_frac": 0.65625, "sample": [-10.295719146728516, 55.36662292480469, -204.57073974609375, 25.265657424926758, 296.6632385253906, 54.79895782470703, -11.564149856567383, 148.22299194335938, 176.367431640625, -52.87777328491211, 89.66529083251953, 231.33604431152344, -87.23667907714844, 126.43862915039062, 120.18603515625, 62.21316146850586, 2.5756263732910156, 20.696807861328125, 35.39558792114258, -5.008392333984375, 111.53956604003906, 58.92300796508789, -5.085514068603516, 61.73603439331055, -28.610626220703125, 188.84030151367188, -54.80117416381836, 211.36630249023438, 152.365234375, -88.20185852050781, -35.292022705078125, 181.54876708984375, -75.52364349365234, 168.06976318359375, -51.43292236328125, 200.55697631835938, 296.47222900390625, 171.54254150390625, -3.3242034912109375, 179.0016326904297, 200.23081970214844, 118.77035522460938, 19.260417938232422, 80.56251525878906, 187.12757873535156, -12.950294494628906, -18.106948852539062, -7.392366409301758, -89.77014923095703, 116.65177917480469, 156.4422149658203, -20.608123779296875, 3.5511112213134766, 26.42132568359375, 7.7357177734375, -79.11891174316406, 110.48678588867188, 141.42727661132812, 119.56897735595703, -50.47801971435547, -8.5196533203125, 1.292013168334961, 81.31922912597656, 279.0081787109375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000154.npy"}
{"epoch": 0.2261380323054332, "step": 155, "batch_size": 64, "mean": 64.17315673828125, "std": 95.49504089355469, "min": -211.83900451660156, "p10": -23.313384246826168, "median": 62.36770248413086, "p90": 168.85712890625007, "max": 357.54449462890625, "pos_frac": 0.734375, "sample": [-8.242698669433594, 88.9460678100586, 58.142845153808594, -7.51409912109375, 92.01031494140625, 13.370418548583984, 81.87337493896484, -28.748870849609375, 74.70836639404297, 23.08559799194336, -7.931854248046875, 63.771080017089844, 101.19336700439453, -78.6279296875, 107.9126968383789, 73.03477478027344, -25.878559112548828, 47.89403533935547, 60.964324951171875, -1.896697998046875, 73.72659301757812, 69.44701385498047, 39.758392333984375, 357.54449462890625, 190.16604614257812, 8.483175277709961, 134.57228088378906, 104.4472427368164, 8.810691833496094, -123.22993469238281, 22.320146560668945, -1.5773773193359375, 23.72527313232422, 96.022705078125, 190.57687377929688, -211.83900451660156, 280.95751953125, 48.066078186035156, 133.9407196044922, -17.08625030517578, 1.8471946716308594, 93.64866638183594, 148.73336791992188, 43.470977783203125, 282.63031005859375, 98.45718383789062, 288.56805419921875, 149.98370361328125, -77.3360595703125, 99.46792602539062, -2.63214111328125, 176.94573974609375, -15.789249420166016, 111.9359359741211, 88.35406494140625, -49.873497009277344, 59.845787048339844, 131.23095703125, 123.2922592163086, 11.466346740722656, 134.18519592285156, -3.601961135864258, -17.32797622680664, 72.68024444580078], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000155.npy"}
{"epoch": 0.2276064610866373, "step": 156, "batch_size": 64, "mean": 36.849365234375, "std": 107.44945526123047, "min": -279.02392578125, "p10": -97.2720993041992, "median": 24.46210289001465, "p90": 137.18368225097657, "max": 284.0477294921875, "pos_frac": 0.734375, "sample": [129.13162231445312, 58.575416564941406, 284.0477294921875, 256.1448974609375, 175.81146240234375, 115.14730834960938, -198.45913696289062, 2.3913726806640625, -84.35293579101562, 20.47844696044922, -201.8380584716797, 132.24990844726562, 96.91983795166016, -22.72191619873047, -63.949527740478516, -135.24447631835938, 41.58877182006836, 104.60366821289062, 248.5637664794922, 76.00065612792969, 21.092941284179688, 121.99472045898438, 49.451934814453125, 19.434818267822266, 4.109811782836914, -9.549934387207031, 15.93315315246582, 30.480873107910156, 253.182373046875, -16.526885986328125, 243.02322387695312, 139.29815673828125, 121.57125854492188, 68.51042175292969, 9.196441650390625, 7.104825973510742, 75.32970428466797, -123.2247314453125, -279.02392578125, -77.72122955322266, 80.18314361572266, -53.254493713378906, 2.5025177001953125, 124.73802185058594, 12.617141723632812, -102.80888366699219, 84.02633666992188, 118.06803131103516, 78.52482604980469, 7.444757461547852, -3.3461952209472656, 78.46498107910156, 59.51366424560547, 54.39777374267578, -108.8396224975586, -48.630584716796875, 9.801284790039062, 21.79559326171875, 6.7004241943359375, -3.2331161499023438, 27.128612518310547, 73.9726333618164, 10.16506576538086, 119.6707763671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000156.npy"}
{"epoch": 0.2290748898678414, "step": 157, "batch_size": 64, "mean": 81.02685546875, "std": 105.24240112304688, "min": -301.76251220703125, "p10": -28.232052040100097, "median": 72.12530517578125, "p90": 197.43164520263676, "max": 380.7120361328125, "pos_frac": 0.828125, "sample": [160.23654174804688, 166.857421875, 84.30216217041016, 187.06065368652344, 132.3082275390625, 8.6319580078125, 76.54104614257812, 143.78170776367188, 4.269233703613281, 149.00888061523438, 57.1068229675293, 205.57437133789062, 51.759117126464844, 87.838134765625, 224.67022705078125, 201.81605529785156, 148.32843017578125, 106.88497924804688, 41.25337219238281, 160.35861206054688, 12.773200988769531, 38.17854309082031, 67.70956420898438, -29.080982208251953, 112.625, 249.2563934326172, -4.195343017578125, 31.048137664794922, -1.04669189453125, 114.11320495605469, -301.76251220703125, -40.96263885498047, 25.832046508789062, 49.801490783691406, 128.3794708251953, 80.35388946533203, 251.23501586914062, 128.00445556640625, 105.84117126464844, -34.056800842285156, 30.300609588623047, 283.6600036621094, 34.193031311035156, 61.59687042236328, 8.949817657470703, 179.8236846923828, 107.54014587402344, -26.2512149810791, 174.11477661132812, 126.94332885742188, 185.77413940429688, 44.680572509765625, -59.95267868041992, 20.0509033203125, 187.20135498046875, 156.51904296875, 380.7120361328125, 55.73656463623047, 44.177642822265625, -142.98306274414062, 18.081512451171875, -61.15185546875, 13.187736511230469, -19.820693969726562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000157.npy"}
{"epoch": 0.2305433186490455, "step": 158, "batch_size": 64, "mean": 86.34577178955078, "std": 131.8433074951172, "min": -172.3934326171875, "p10": -45.75433158874511, "median": 52.530113220214844, "p90": 275.0997207641603, "max": 484.8415222167969, "pos_frac": 0.71875, "sample": [7.053623199462891, 11.76576042175293, 42.30729675292969, 50.15538024902344, 51.83435821533203, -29.07453155517578, 32.546085357666016, 84.4594955444336, 35.878448486328125, -53.27437973022461, 213.2076416015625, -112.85446166992188, 250.35203552246094, 83.0469970703125, -14.282180786132812, -54.27161407470703, -16.715600967407227, 106.28567504882812, -11.38764762878418, 15.000495910644531, 228.6588134765625, -37.22029113769531, 285.70587158203125, -172.3934326171875, 298.35906982421875, 76.25305938720703, 140.13954162597656, 20.862550735473633, 36.09869384765625, 12.23293685913086, 77.28169250488281, 85.6518783569336, -20.285354614257812, 155.05233764648438, -56.07265090942383, -6.3163909912109375, 338.049072265625, 102.08768463134766, -118.9567642211914, 334.70782470703125, 417.83447265625, -1.7029876708984375, 66.3503646850586, 484.8415222167969, 20.721649169921875, 12.006172180175781, -49.41177749633789, 119.60354614257812, 235.3123779296875, 108.98657989501953, 378.3426513671875, 171.36386108398438, 100.73786926269531, 172.8768768310547, -16.0434627532959, 178.331787109375, 97.75636291503906, -8.579645156860352, 24.634315490722656, 99.14688873291016, 248.27926635742188, 53.225868225097656, 158.4935302734375, -18.90785026550293], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000158.npy"}
{"epoch": 0.23201174743024963, "step": 159, "batch_size": 64, "mean": 58.78101348876953, "std": 104.94033813476562, "min": -206.7349090576172, "p10": -58.55917892456054, "median": 56.68996047973633, "p90": 192.9366394042969, "max": 316.788330078125, "pos_frac": 0.75, "sample": [10.2713623046875, 40.52814483642578, 139.98619079589844, -48.58119201660156, 243.75155639648438, 135.36056518554688, -206.7349090576172, -39.05429458618164, 120.49058532714844, 162.104248046875, 91.52662658691406, -59.76747131347656, 14.890512466430664, -20.950225830078125, 89.74540710449219, -75.34293365478516, -115.367919921875, 55.56393051147461, 185.531982421875, 141.36700439453125, 316.788330078125, 8.77457046508789, -46.533843994140625, 18.895767211914062, 31.429645538330078, -73.47633361816406, 20.27684783935547, -54.605621337890625, 87.62757873535156, 53.80741882324219, -20.98736572265625, 194.70758056640625, 84.51277923583984, 13.155261993408203, 115.4240493774414, -22.77610206604004, 56.92761993408203, -98.9150390625, -3.7536964416503906, 219.88650512695312, 142.86935424804688, 220.95260620117188, 61.1929931640625, 70.94637298583984, 0.27294921875, 276.3139343261719, 13.31572151184082, 1.1434364318847656, 72.39155578613281, 56.452301025390625, 132.57553100585938, 32.17328643798828, -55.739830017089844, -166.64779663085938, 69.83319854736328, 99.01448059082031, 95.9964370727539, 93.97065734863281, 23.82750129699707, 302.1992492675781, 188.804443359375, 67.08932495117188, 70.50840759277344, 126.04339599609375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000159.npy"}
{"epoch": 0.23348017621145375, "step": 160, "batch_size": 64, "mean": 79.83772277832031, "std": 128.94635009765625, "min": -144.22312927246094, "p10": -68.76609268188476, "median": 64.32367324829102, "p90": 244.6802429199219, "max": 458.209228515625, "pos_frac": 0.6875, "sample": [100.73914337158203, -8.171194076538086, 192.07534790039062, 156.32240295410156, -10.593387603759766, -71.38813781738281, 193.98837280273438, 178.5273895263672, 169.89178466796875, 13.516260147094727, -80.31810760498047, -8.662063598632812, 141.33351135253906, 238.44509887695312, 118.0422134399414, -69.71775817871094, 38.90143966674805, -24.222488403320312, -144.22312927246094, 229.87135314941406, 275.27667236328125, -117.99577331542969, 247.35244750976562, -63.076942443847656, 142.78749084472656, 47.201904296875, 2.4636611938476562, -14.415451049804688, 458.209228515625, 62.80046844482422, 15.171966552734375, 65.84687805175781, -31.744415283203125, 386.47845458984375, -3.735809326171875, 75.66212463378906, 107.7022933959961, 160.9268035888672, 375.3243408203125, 302.55859375, 33.01544952392578, 214.91561889648438, -142.9562530517578, 206.33120727539062, 94.17925262451172, 14.34588623046875, -29.505983352661133, 18.99909782409668, 115.58309173583984, 112.01837158203125, -40.538475036621094, 32.55341339111328, 114.74720764160156, -66.54553985595703, 94.82272338867188, 78.41497802734375, -10.092615127563477, 0.10766410827636719, -95.67266082763672, 158.4948272705078, 32.04423522949219, 254.1634521484375, -42.710906982421875, 113.74745178222656], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000160.npy"}
{"epoch": 0.23494860499265785, "step": 161, "batch_size": 64, "mean": 70.11195373535156, "std": 103.75558471679688, "min": -133.17938232421875, "p10": -48.507591247558594, "median": 48.72218894958496, "p90": 229.9694381713868, "max": 320.79327392578125, "pos_frac": 0.765625, "sample": [77.78748321533203, 198.07281494140625, -49.75126647949219, 138.33384704589844, 302.3898010253906, -133.17938232421875, -50.309478759765625, -66.86105346679688, 98.02284240722656, -44.96428680419922, 44.95973205566406, 97.67254638671875, 79.40994262695312, -70.03387451171875, 1.6907768249511719, 122.64436340332031, 320.79327392578125, 79.01242065429688, 142.55523681640625, 265.3077392578125, 17.929847717285156, 58.29291915893555, -7.7006378173828125, -34.89569854736328, 33.4937744140625, -10.962383270263672, 123.8713150024414, 38.309288024902344, 279.0981140136719, 298.4710693359375, 123.46359252929688, -31.688241958618164, 9.604175567626953, -26.551799774169922, 238.49832153320312, 113.74285125732422, 34.052825927734375, -45.605682373046875, 18.206008911132812, 86.17781066894531, -9.257049560546875, 28.137245178222656, 10.681526184082031, 32.395721435546875, 14.667213439941406, 26.821258544921875, 205.1287841796875, 74.62252044677734, 15.64657211303711, 144.0406494140625, 210.06871032714844, -105.59165954589844, 71.27188110351562, 169.65298461914062, 34.45178985595703, 41.59492492675781, 32.75459671020508, 123.44648742675781, 89.87501525878906, -88.4205551147461, 52.48464584350586, 258.1369323730469, 113.66654968261719, 71.52692413330078], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000161.npy"}
{"epoch": 0.23641703377386197, "step": 162, "batch_size": 64, "mean": 69.01253509521484, "std": 110.04146575927734, "min": -171.9070587158203, "p10": -41.58489608764648, "median": 59.48511505126953, "p90": 200.58472137451173, "max": 451.7046813964844, "pos_frac": 0.734375, "sample": [-0.18424224853515625, 53.1878776550293, 105.26446533203125, 244.5373992919922, 58.64082336425781, 218.92800903320312, 31.197927474975586, 135.8738250732422, 38.799591064453125, -9.640188217163086, -21.275407791137695, 195.5872344970703, 69.93254089355469, -108.70964813232422, -37.6048583984375, -17.26824951171875, 159.55470275878906, 165.09896850585938, -5.072418212890625, 122.10513305664062, 72.23873138427734, 27.957839965820312, 50.15156555175781, 7.973182678222656, 21.37550926208496, 202.72650146484375, 127.09367370605469, -33.48692321777344, -171.9070587158203, 145.72190856933594, 99.87153625488281, -109.50778198242188, -15.386384963989258, 17.649568557739258, 149.59036254882812, 68.36149597167969, 229.9298095703125, 451.7046813964844, 64.4131088256836, 115.13018035888672, 85.7401351928711, 93.14372253417969, 147.7036895751953, 161.00083923339844, 145.58920288085938, 2.8403778076171875, -43.290626525878906, -1.5803909301757812, 94.03482055664062, 41.358551025390625, -77.98683166503906, 164.10157775878906, 3.6207122802734375, -93.24971771240234, 386.21929931640625, 26.279708862304688, -112.54009246826172, 32.93421936035156, 75.84748840332031, 86.23297882080078, 60.32940673828125, -32.669708251953125, 39.200408935546875, 211.387451171875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000162.npy"}
{"epoch": 0.23788546255506607, "step": 163, "batch_size": 64, "mean": 67.38839721679688, "std": 107.95478820800781, "min": -156.95059204101562, "p10": -49.14996337890624, "median": 59.94000244140625, "p90": 255.52014160156256, "max": 345.3812255859375, "pos_frac": 0.671875, "sample": [-2.9799270629882812, -10.193901062011719, 260.79998779296875, 345.3812255859375, -28.077110290527344, -123.61593627929688, 94.17281341552734, -10.232589721679688, -8.874320983886719, 78.89418029785156, 131.18267822265625, -29.965835571289062, 101.31487274169922, 316.41375732421875, 105.97396087646484, 44.717994689941406, 96.97737884521484, -12.065322875976562, 14.116889953613281, -61.712867736816406, 54.089298248291016, 154.768798828125, 106.51165771484375, -24.821144104003906, 98.9544677734375, -0.5410366058349609, 121.89326477050781, -156.95059204101562, -66.6556625366211, 107.61602020263672, 260.7489013671875, 11.89859390258789, 135.93417358398438, 13.568227767944336, 67.29976654052734, -10.639904022216797, -51.40119934082031, 297.29180908203125, 129.5155029296875, 65.55427551269531, -69.29032897949219, 35.039329528808594, -43.89707946777344, 243.3197021484375, 43.456634521484375, 101.6003189086914, -31.30854034423828, 274.62799072265625, 143.60629272460938, 29.655324935913086, 59.35157775878906, 108.00286865234375, 60.52842712402344, 181.58865356445312, 80.4487075805664, -22.444419860839844, 81.60395050048828, 10.598251342773438, -93.97615814208984, 0.7972545623779297, 262.8555603027344, 90.55572509765625, 160.29061889648438, -11.016754150390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000163.npy"}
{"epoch": 0.2393538913362702, "step": 164, "batch_size": 64, "mean": 82.47865295410156, "std": 104.09668731689453, "min": -149.0464324951172, "p10": -21.54691677093506, "median": 70.69278335571289, "p90": 207.3106262207032, "max": 420.13641357421875, "pos_frac": 0.84375, "sample": [-145.28028869628906, 164.22796630859375, 164.93646240234375, 163.8968505859375, 223.56390380859375, 50.839149475097656, 78.29638671875, 124.92376708984375, 71.01780700683594, 73.68321990966797, 240.57675170898438, 48.40460968017578, 21.436403274536133, 49.987693786621094, -63.55421447753906, 129.14572143554688, 61.88874053955078, 64.43948364257812, -21.691682815551758, 70.36775970458984, 4.358854293823242, 128.04452514648438, 72.29247283935547, 190.82666015625, 30.934846878051758, 148.48678588867188, -23.890384674072266, -87.6006851196289, 146.57620239257812, 15.966264724731445, 44.29328918457031, -19.306182861328125, 150.8522186279297, 173.18829345703125, 137.18368530273438, 112.60142517089844, 27.402320861816406, 100.65877532958984, 15.112144470214844, 167.1182403564453, 39.76591491699219, 90.61593627929688, 67.3746337890625, 34.345367431640625, 44.057334899902344, 3.124603271484375, 73.03570556640625, 36.244712829589844, -149.0464324951172, 136.21388244628906, 78.90946197509766, 51.43206024169922, 335.5709533691406, 0.886474609375, 346.01263427734375, 241.302001953125, 0.2325592041015625, 214.37518310546875, 83.51026916503906, -54.24186706542969, -21.209129333496094, 104.07669067382812, 420.13641357421875, -4.297607421875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000164.npy"}
{"epoch": 0.24082232011747431, "step": 165, "batch_size": 64, "mean": 53.16825485229492, "std": 99.0621337890625, "min": -148.7236328125, "p10": -81.37128295898438, "median": 39.51768112182617, "p90": 202.8422912597657, "max": 276.6439208984375, "pos_frac": 0.703125, "sample": [91.33280181884766, 86.39925384521484, -81.19096374511719, 18.635971069335938, -90.60113525390625, -2.657358169555664, 79.13530731201172, 113.11834716796875, -15.912338256835938, -83.22811126708984, 127.32979583740234, -148.7236328125, -81.44856262207031, 37.81803894042969, 35.743255615234375, -92.57137298583984, -10.958744049072266, -48.511871337890625, 155.19146728515625, 148.85699462890625, 79.99681091308594, 24.87480926513672, 15.789180755615234, 276.6439208984375, -142.53443908691406, 35.4083251953125, -17.541418075561523, -47.45996856689453, -1.9852294921875, 138.525146484375, -89.21636199951172, 18.831396102905273, 211.836181640625, 186.50790405273438, 49.076507568359375, -25.36829376220703, 78.72441864013672, 60.26393127441406, 79.44438934326172, 3.991701126098633, 41.217323303222656, 62.52268981933594, 154.4408721923828, 76.4814682006836, 214.74322509765625, -18.672393798828125, 96.969970703125, 60.08538818359375, 4.659278869628906, -68.34805297851562, 32.570709228515625, 223.26913452148438, 30.547096252441406, -43.68347930908203, 59.73418045043945, 265.01263427734375, 28.72430419921875, 262.2296142578125, 209.84274291992188, 119.81388854980469, 113.5003662109375, 13.178550720214844, 111.3701171875, 178.99270629882812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000165.npy"}
{"epoch": 0.2422907488986784, "step": 166, "batch_size": 64, "mean": 98.27601623535156, "std": 138.3394012451172, "min": -92.02621459960938, "p10": -35.51995162963866, "median": 65.75030708312988, "p90": 271.9027435302736, "max": 557.4837646484375, "pos_frac": 0.8125, "sample": [451.36016845703125, 312.20758056640625, 30.01132583618164, 488.19488525390625, -39.340065002441406, 7.761425018310547, 20.733829498291016, 73.34725952148438, 174.14959716796875, 43.067352294921875, 123.32148742675781, -10.46047592163086, 11.21634292602539, 156.4696044921875, -61.60060119628906, 76.95028686523438, 159.10687255859375, 20.956939697265625, 2.4375648498535156, 189.87728881835938, 2.9930286407470703, 557.4837646484375, 38.34022903442383, -47.49667739868164, -55.10023498535156, -52.40878677368164, 39.63457489013672, 142.36837768554688, -92.02621459960938, 235.20779418945312, 287.629150390625, 464.9535217285156, 110.03995513916016, 68.42062377929688, 26.994415283203125, 359.18096923828125, 11.215629577636719, 203.511474609375, 34.991363525390625, 148.97662353515625, -19.09653091430664, 86.5819091796875, 18.207923889160156, 11.562606811523438, 177.20155334472656, 128.9292755126953, 212.12071228027344, -5.715702056884766, 77.46281433105469, 171.468017578125, 32.09142303466797, 83.70193481445312, -62.654991149902344, 63.07999038696289, -5.60308837890625, -26.606353759765625, 113.93775177001953, 2.7675933837890625, 117.70196533203125, 6.40435791015625, 89.77345275878906, 17.50514793395996, 196.9781494140625, 87.18677520751953], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000166.npy"}
{"epoch": 0.24375917767988253, "step": 167, "batch_size": 64, "mean": 102.81171417236328, "std": 114.12342834472656, "min": -135.66213989257812, "p10": -17.381314086914063, "median": 88.16933822631836, "p90": 248.9689682006837, "max": 416.73626708984375, "pos_frac": 0.84375, "sample": [105.78703308105469, 64.35887145996094, 143.90835571289062, 209.3494415283203, 112.28814697265625, 121.6527099609375, 92.99835205078125, 68.74273681640625, 93.93009185791016, 50.86228561401367, 416.73626708984375, 187.86068725585938, 72.15605163574219, -14.964553833007812, 23.562286376953125, 61.2642936706543, 80.79724884033203, -56.349693298339844, 131.61172485351562, 186.95123291015625, 24.376298904418945, 41.12303161621094, 79.46290588378906, 57.232444763183594, 106.76750183105469, -135.66213989257812, 272.98876953125, 210.66705322265625, -17.8165283203125, 173.51754760742188, -12.059114456176758, 104.45297241210938, 77.45683288574219, 54.78276062011719, 366.5274658203125, 154.19375610351562, 266.4932556152344, 83.34032440185547, 203.0648956298828, -126.15643310546875, 224.69349670410156, 259.37274169921875, 329.0176696777344, 145.29067993164062, 399.61663818359375, -16.365814208984375, 176.0743408203125, 137.02310180664062, 216.69403076171875, 174.95176696777344, 100.4859619140625, 13.210220336914062, 1.1122684478759766, 31.966358184814453, 57.38018798828125, 132.86978149414062, 98.81348419189453, 58.695457458496094, 22.887863159179688, -96.3230972290039, 25.48099136352539, 14.441713333129883, -45.31965637207031, -20.377609252929688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000167.npy"}
{"epoch": 0.24522760646108663, "step": 168, "batch_size": 64, "mean": 75.8309555053711, "std": 103.14295196533203, "min": -85.4189453125, "p10": -51.034277725219724, "median": 53.82217597961426, "p90": 196.01741943359374, "max": 399.4186096191406, "pos_frac": 0.796875, "sample": [17.766082763671875, 114.85799407958984, 26.302719116210938, 20.528675079345703, 4.167203903198242, 32.74775695800781, 135.66961669921875, -47.5874137878418, 26.75619888305664, 14.40179443359375, -85.4189453125, 186.15760803222656, 399.4186096191406, 93.28314208984375, -52.511505126953125, 194.46408081054688, 175.55996704101562, 231.4940643310547, 196.68313598632812, 47.23493194580078, 216.52127075195312, 272.231201171875, 179.17803955078125, 166.4266357421875, -44.762107849121094, -52.52018737792969, -16.012359619140625, 128.0475616455078, 150.53970336914062, 134.94891357421875, 86.74760437011719, 34.436927795410156, -62.76991271972656, 95.40486145019531, 36.28472900390625, 116.86154174804688, -69.77400207519531, -11.063243865966797, -27.042102813720703, 31.064559936523438, 28.39555549621582, 89.20020294189453, 61.26220703125, 94.66972351074219, 112.52790832519531, 67.16057586669922, 28.439268112182617, -60.97180938720703, 279.0287170410156, 56.907737731933594, 152.86842346191406, 21.960599899291992, 26.066965103149414, 27.30999755859375, 126.43228912353516, 54.50822448730469, 29.0571346282959, -82.78446197509766, 53.13612747192383, -35.36424255371094, 375.1299133300781, 19.60601043701172, 96.98699951171875, 134.9222412109375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000168.npy"}
{"epoch": 0.24669603524229075, "step": 169, "batch_size": 64, "mean": 67.05125427246094, "std": 129.48065185546875, "min": -270.453369140625, "p10": -68.95516281127928, "median": 44.00858116149902, "p90": 223.31069793701175, "max": 557.361572265625, "pos_frac": 0.75, "sample": [76.30461883544922, 88.17542266845703, 5.224224090576172, 18.14038848876953, 216.6363983154297, 112.08379364013672, 18.078018188476562, 159.49378967285156, 89.81957244873047, 34.78791427612305, -1.6906929016113281, 46.026283264160156, -270.453369140625, 2.1766204833984375, 55.68851852416992, -24.341598510742188, 26.681156158447266, -84.62751770019531, 86.25613403320312, 166.75485229492188, 148.6192626953125, -55.74229431152344, 66.41070556640625, -86.75210571289062, 209.112060546875, 226.17111206054688, 63.93973922729492, 3.927703857421875, -14.3048095703125, 31.307373046875, 210.91427612304688, -6.412818908691406, 17.6641788482666, 69.04356384277344, 91.14251708984375, 130.53707885742188, 251.1644287109375, 25.382360458374023, 69.87765502929688, 231.4363555908203, 5.992246627807617, 387.63336181640625, -121.36211395263672, 89.58624267578125, 8.292274475097656, 54.70881652832031, 30.2142333984375, -74.6178207397461, 340.0373229980469, -14.694450378417969, 557.361572265625, -4.2309112548828125, 96.57698059082031, 16.640174865722656, 289.820068359375, -8.54388427734375, 81.32675170898438, 15.802078247070312, 41.99087905883789, -28.84813690185547, -122.88600158691406, 92.01455688476562, 197.87025451660156, -144.0572509765625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000169.npy"}
{"epoch": 0.24816446402349487, "step": 170, "batch_size": 64, "mean": 78.93023681640625, "std": 105.91825103759766, "min": -231.65023803710938, "p10": -32.301135253906246, "median": 64.77691268920898, "p90": 210.05998840332035, "max": 324.6358947753906, "pos_frac": 0.828125, "sample": [188.20693969726562, -133.37445068359375, 139.651611328125, 84.10619354248047, 199.06414794921875, 80.09541320800781, 125.2147216796875, 15.303695678710938, 142.38967895507812, 4.5766143798828125, 90.3355712890625, -3.443920135498047, 73.5715103149414, -34.38865661621094, 8.59245491027832, -50.49967956542969, 115.19121551513672, 251.074462890625, 16.378005981445312, 294.9562683105469, 4.758611679077148, -20.538480758666992, 168.5037384033203, 47.865962982177734, 2.613800048828125, 19.75359344482422, -231.65023803710938, -30.047080993652344, 69.32705688476562, 194.8303680419922, 78.11793518066406, 127.21673583984375, 54.2112922668457, 79.02064514160156, 178.66348266601562, -53.16603088378906, 310.71588134765625, 5.990165710449219, 7.970281600952148, 324.6358947753906, 73.97813415527344, -92.44219970703125, 59.95964431762695, 121.7105712890625, 196.95376586914062, -33.26715850830078, 27.897249221801758, 5.262788772583008, 39.99291229248047, 60.226768493652344, 90.34180450439453, 46.899940490722656, 232.115966796875, 42.500423431396484, 134.90493774414062, 58.85862731933594, 186.58937072753906, 214.77249145507812, -15.942279815673828, 192.74697875976562, 235.61984252929688, 17.535886764526367, 189.77532958984375, 18.747684478759766], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000170.npy"}
{"epoch": 0.24963289280469897, "step": 171, "batch_size": 64, "mean": 64.21272277832031, "std": 130.15737915039062, "min": -249.20896911621094, "p10": -90.87361373901366, "median": 58.522403717041016, "p90": 211.61434326171874, "max": 576.6045532226562, "pos_frac": 0.703125, "sample": [-79.28892517089844, 216.81101989746094, 32.37311553955078, 92.09156799316406, -98.52654266357422, -49.86443328857422, 6.769477844238281, 114.46234130859375, -81.28060913085938, 139.06744384765625, 12.630550384521484, 155.6733856201172, 94.12950134277344, 108.15904235839844, 51.41771697998047, -50.96331787109375, 96.32830810546875, -97.58460998535156, 105.08479309082031, -6.942481994628906, 204.71804809570312, 76.87905883789062, 51.547760009765625, 85.22608947753906, 321.4692077636719, 69.28257751464844, -94.98490142822266, 576.6045532226562, -96.28675842285156, 49.92840576171875, 72.94139099121094, -249.20896911621094, 415.07733154296875, 138.20654296875, 90.79209899902344, -114.74066162109375, 212.33636474609375, 100.8045654296875, 15.986122131347656, 282.1785888671875, 18.802520751953125, 68.1643295288086, 209.92962646484375, -23.593061447143555, -95.61673736572266, 11.050048828125, 65.4970474243164, 86.29103088378906, -60.695735931396484, 104.84722900390625, -25.943199157714844, -50.26908874511719, 136.68402099609375, 42.18870544433594, 81.8079833984375, -11.666841506958008, 32.596435546875, 157.31863403320312, -19.76300811767578, -1.655364990234375, 89.20369720458984, 16.954957962036133, 258.218994140625, 49.95769119262695], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000171.npy"}
{"epoch": 0.2511013215859031, "step": 172, "batch_size": 64, "mean": 66.97691345214844, "std": 110.3448257446289, "min": -184.9056396484375, "p10": -71.17470893859861, "median": 72.92423248291016, "p90": 194.43138732910157, "max": 446.79217529296875, "pos_frac": 0.6875, "sample": [-26.809036254882812, 103.52181243896484, -111.77397155761719, 88.61927032470703, 36.703094482421875, -46.05670166015625, 140.8298797607422, 100.94624328613281, 72.9594955444336, -77.71033477783203, -122.34928894042969, 52.582088470458984, 54.91803741455078, -78.01751708984375, 148.53570556640625, 88.49301147460938, 17.84237289428711, 67.23490905761719, -58.03513717651367, 70.84307098388672, -35.246299743652344, 92.11976623535156, 182.98635864257812, 142.14578247070312, 22.29669952392578, 72.88896942138672, 223.66287231445312, 195.83929443359375, -76.80595397949219, 246.4764404296875, -28.09381866455078, 109.21782684326172, 108.3728256225586, 90.60894775390625, 120.26631164550781, 304.0008239746094, 446.79217529296875, 46.98400115966797, 119.08757781982422, 31.224185943603516, -19.48566436767578, -45.797088623046875, 74.47674560546875, 78.41957092285156, 191.14627075195312, -23.885223388671875, -14.242448806762695, 26.538434982299805, -29.564071655273438, 170.68051147460938, 157.83436584472656, -4.750633239746094, -57.502655029296875, 98.40193176269531, 108.2679672241211, -184.9056396484375, 71.70724487304688, 100.00265502929688, 229.44064331054688, 97.02752685546875, -15.005699157714844, -78.17523193359375, 241.54193115234375, 176.24923706054688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000172.npy"}
{"epoch": 0.2525697503671072, "step": 173, "batch_size": 64, "mean": 67.36067199707031, "std": 96.69850158691406, "min": -188.19107055664062, "p10": -25.562897109985343, "median": 49.57112693786621, "p90": 213.93699340820316, "max": 347.09747314453125, "pos_frac": 0.78125, "sample": [46.148284912109375, 79.98377227783203, 37.304229736328125, -5.568275451660156, -1.8951034545898438, 44.73179626464844, 0.5183601379394531, 137.99559020996094, 119.1331787109375, 9.018604278564453, -9.820068359375, -41.057945251464844, 16.5900821685791, -0.47672271728515625, 80.7513427734375, 267.37689208984375, 70.41779327392578, 56.865604400634766, 68.02684783935547, 205.79620361328125, 115.09281921386719, 88.31088256835938, 84.59312438964844, -68.43013763427734, 50.16022491455078, 27.200599670410156, 347.09747314453125, 28.868011474609375, 160.18026733398438, 217.4259033203125, 66.41364288330078, 17.201345443725586, -59.256866455078125, 115.24668884277344, 104.08522033691406, -16.824264526367188, 116.95146942138672, 9.7987060546875, -2.6868972778320312, 230.1873779296875, 72.313720703125, 189.294677734375, -29.308025360107422, 36.973777770996094, -5.788307189941406, 15.08156967163086, 222.2022705078125, 116.32334899902344, -42.949058532714844, 0.427581787109375, 94.7142105102539, 48.98202896118164, 128.47216796875, -68.73974609375, -188.19107055664062, 335.04730224609375, 36.50636291503906, 5.842571258544922, 87.97512817382812, 18.787433624267578, 269.0950927734375, 59.447784423828125, 52.93410873413086, 42.181915283203125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000173.npy"}
{"epoch": 0.2540381791483113, "step": 174, "batch_size": 64, "mean": 80.25811004638672, "std": 117.85913848876953, "min": -153.16664123535156, "p10": -47.14148254394531, "median": 59.81394958496094, "p90": 255.0315628051759, "max": 452.4255065917969, "pos_frac": 0.78125, "sample": [22.828414916992188, -12.90985107421875, -48.19910430908203, -90.45945739746094, -8.558792114257812, 37.87468719482422, -13.13729476928711, 11.82789421081543, 112.06578826904297, 198.87940979003906, 228.3478546142578, 0.6705245971679688, 62.822105407714844, 19.532163619995117, 58.070098876953125, -39.338645935058594, 129.557373046875, 160.4837188720703, 150.7655029296875, 266.4674377441406, 56.15025329589844, 34.04862594604492, 61.55780029296875, -16.120481491088867, 326.6227722167969, -83.29896545410156, 134.71853637695312, 298.8973083496094, 28.601974487304688, 92.63069915771484, 11.652837753295898, -14.170219421386719, 63.55803680419922, -80.3397216796875, 141.63497924804688, -44.67369842529297, 163.97622680664062, 108.46404266357422, -123.36196899414062, 274.92828369140625, 312.75762939453125, 332.6152038574219, 33.4276123046875, 123.1536865234375, 9.303171157836914, -153.16664123535156, 3.1288814544677734, 132.68316650390625, 124.58073425292969, 143.30523681640625, 78.88870239257812, 48.90495300292969, 24.340980529785156, 141.75686645507812, -92.64362335205078, 187.57569885253906, 146.39990234375, 96.37489318847656, 48.47534942626953, 14.110843658447266, 452.4255065917969, 122.66867065429688, 90.63795471191406, 31.74678611755371], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000174.npy"}
{"epoch": 0.2555066079295154, "step": 175, "batch_size": 64, "mean": 108.91580200195312, "std": 108.1343765258789, "min": -89.57522583007812, "p10": -7.375426292419434, "median": 87.94187545776367, "p90": 239.50115051269532, "max": 423.298828125, "pos_frac": 0.84375, "sample": [20.914907455444336, 37.27067565917969, 57.83116912841797, -89.57522583007812, 87.39297485351562, 161.18496704101562, 234.70584106445312, 88.49077606201172, 24.214933395385742, 148.1456756591797, 304.9114990234375, -68.806640625, -9.507431030273438, 311.98876953125, 115.8865966796875, 33.541015625, 216.38613891601562, 219.2981414794922, 236.79995727539062, 162.60635375976562, 179.39077758789062, -57.931907653808594, 202.56825256347656, 423.298828125, 41.35596466064453, 136.04962158203125, 322.0374755859375, 47.18827819824219, 128.28097534179688, 84.62947845458984, -7.270530700683594, 59.79676818847656, 304.28143310546875, 19.484683990478516, 3.2477798461914062, 35.30369567871094, -0.6321296691894531, 263.49127197265625, 7.485361099243164, 82.71339416503906, 127.13444519042969, 119.31927490234375, 122.56951141357422, 215.46002197265625, -0.2712078094482422, 214.57095336914062, 238.63958740234375, 187.00111389160156, -31.321231842041016, 101.1095199584961, 173.21337890625, 93.24987030029297, 86.9687728881836, 154.39236450195312, 42.3500862121582, 69.25297546386719, 144.79441833496094, -7.420381546020508, 42.065330505371094, 239.87039184570312, -53.633094787597656, 67.46846008300781, 0.37359619140625, 55.0028076171875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000175.npy"}
{"epoch": 0.25697503671071953, "step": 176, "batch_size": 64, "mean": 91.70521545410156, "std": 120.69734954833984, "min": -66.16990661621094, "p10": -34.219139099121094, "median": 63.833940505981445, "p90": 220.64554901123046, "max": 508.3087158203125, "pos_frac": 0.796875, "sample": [5.36370849609375, 28.064285278320312, 98.40097045898438, 220.5888214111328, 63.85222244262695, 22.823001861572266, 14.339942932128906, 331.6427917480469, 18.151260375976562, -27.533905029296875, -33.23065185546875, 470.8995056152344, 13.537599563598633, -66.16990661621094, 220.66986083984375, 68.86161041259766, 32.543701171875, 212.81727600097656, 175.94534301757812, -4.877166748046875, -11.512283325195312, 69.4151840209961, 144.14707946777344, -50.86675262451172, 229.3948974609375, 1.8724384307861328, 27.31272315979004, 196.87672424316406, 185.58258056640625, 14.005683898925781, 116.61238098144531, -63.43511962890625, 150.23873901367188, -47.80101776123047, 40.95588684082031, 171.97177124023438, -42.104373931884766, 80.73169708251953, 181.53985595703125, 140.77330017089844, 185.09384155273438, 0.16052627563476562, -42.631439208984375, -0.5393180847167969, 148.7639617919922, 140.97122192382812, 129.94102478027344, 70.76166534423828, 134.74917602539062, 292.28204345703125, 35.598838806152344, -34.64277648925781, 20.494247436523438, 15.85493278503418, 137.63223266601562, 63.81565856933594, -16.05797576904297, 16.309276580810547, 132.72142028808594, 1.1819953918457031, 171.7539825439453, 508.3087158203125, 20.559722900390625, 333.649169921875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000176.npy"}
{"epoch": 0.25844346549192365, "step": 177, "batch_size": 64, "mean": 89.99525451660156, "std": 119.0173110961914, "min": -108.86076354980469, "p10": -24.124926185607908, "median": 62.730770111083984, "p90": 243.24726562500007, "max": 581.483154296875, "pos_frac": 0.75, "sample": [166.95208740234375, 53.1961669921875, 225.8658905029297, 125.09679412841797, 64.64678955078125, 126.58338928222656, -108.86076354980469, 88.39323425292969, 201.94412231445312, 170.23153686523438, 254.9230194091797, 41.795318603515625, 80.3498764038086, 261.10028076171875, 129.64488220214844, 85.85707092285156, 21.03968048095703, 360.1422424316406, 29.81891632080078, 168.95098876953125, 57.00224304199219, 170.509521484375, -76.14297485351562, 146.9613494873047, -3.338308334350586, 43.99848937988281, 207.16091918945312, -21.227157592773438, -3.4544830322265625, 4.7538299560546875, 60.81475067138672, 283.99871826171875, 106.14344787597656, -20.80413818359375, 16.656768798828125, 80.99031066894531, -19.107376098632812, 250.69642639160156, 0.015817642211914062, -28.552183151245117, -79.83790588378906, 155.55270385742188, -4.696500778198242, 51.52680969238281, -84.68707275390625, 152.35609436035156, 129.6723175048828, 16.098461151123047, -10.192100524902344, -4.4330902099609375, 200.6802520751953, 10.927186965942383, 75.76957702636719, -14.881721496582031, 191.7215576171875, 33.89297103881836, 39.89051055908203, -25.3668270111084, 157.20840454101562, 46.760398864746094, 581.483154296875, 285.72021484375, 77.76061248779297, -27.977462768554688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000177.npy"}
{"epoch": 0.2599118942731278, "step": 178, "batch_size": 64, "mean": 114.02528381347656, "std": 143.0945587158203, "min": -146.0558319091797, "p10": -34.021150970458976, "median": 98.60264587402344, "p90": 322.1253692626954, "max": 565.0966796875, "pos_frac": 0.765625, "sample": [45.11503219604492, 22.65087890625, 160.00372314453125, -9.309478759765625, 136.46731567382812, 90.70905303955078, 111.1912612915039, 38.73612976074219, 21.53420639038086, 295.6988525390625, -0.6090927124023438, 113.06657409667969, 285.4479064941406, 110.9757080078125, 50.05956268310547, 157.8271484375, 403.99578857421875, 118.16549682617188, 565.0966796875, -53.32777404785156, 204.92430114746094, 59.76055145263672, 9.612945556640625, 58.467628479003906, -10.897485733032227, 176.7086181640625, 377.5810241699219, -49.045631408691406, 64.48260498046875, -5.344520568847656, -79.39997863769531, 105.14508056640625, 220.7196044921875, 21.328392028808594, 125.3318099975586, 187.72239685058594, 136.09121704101562, -93.12519836425781, 70.03447723388672, 256.8778991699219, 32.11262893676758, 116.62677001953125, -146.0558319091797, 447.6101989746094, -7.923585891723633, 159.72225952148438, 117.510498046875, 141.73019409179688, -38.592491149902344, 457.8758239746094, 37.59282684326172, 140.33169555664062, 333.4510192871094, -0.7512073516845703, 12.612548828125, 219.2868194580078, 92.06021118164062, -23.354690551757812, 59.39216613769531, 222.49169921875, 341.0816650390625, -88.94572448730469, 182.57870483398438, -11.296531677246094], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000178.npy"}
{"epoch": 0.26138032305433184, "step": 179, "batch_size": 64, "mean": 81.88909149169922, "std": 139.83355712890625, "min": -271.30279541015625, "p10": -82.27136993408203, "median": 78.44160842895508, "p90": 299.0905151367188, "max": 385.5741882324219, "pos_frac": 0.703125, "sample": [121.86214447021484, 161.09727478027344, 179.78182983398438, 385.5741882324219, 67.9586181640625, -95.65603637695312, 182.07191467285156, 31.048458099365234, -8.021049499511719, 14.325811386108398, 98.50955200195312, 61.622459411621094, 47.615447998046875, 90.05115509033203, -23.76984405517578, 100.50856018066406, -71.71316528320312, 315.73748779296875, -45.45127487182617, -50.421539306640625, 75.69943237304688, -85.28018188476562, 355.9463195800781, 25.46210289001465, 231.8518829345703, 128.08831787109375, 161.26681518554688, 308.8916015625, 166.35391235351562, 40.177574157714844, 78.65949249267578, 287.8092956542969, 339.2161865234375, 96.89900207519531, 81.90465545654297, 130.28662109375, -36.04352569580078, -14.269916534423828, 0.39365386962890625, 86.86151885986328, 88.37602996826172, -36.89185333251953, 78.22372436523438, -97.12066650390625, 134.59036254882812, 340.6177673339844, -93.37187957763672, 250.59324645996094, 114.02936553955078, -130.17919921875, -51.44158172607422, 238.6372833251953, 24.831497192382812, 247.02508544921875, 303.9253234863281, 223.2139129638672, -151.36065673828125, 157.30117797851562, -60.783363342285156, -75.25080871582031, -271.30279541015625, 20.608551025390625, 25.36070442199707, -61.63629913330078], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000179.npy"}
{"epoch": 0.26284875183553597, "step": 180, "batch_size": 64, "mean": 104.31163024902344, "std": 123.97949981689453, "min": -203.05198669433594, "p10": -30.609467697143547, "median": 87.42215728759766, "p90": 260.5847320556641, "max": 495.5740966796875, "pos_frac": 0.828125, "sample": [100.9349594116211, 164.70953369140625, -45.59461975097656, 3.2288246154785156, -67.67667388916016, 495.5740966796875, -23.946834564208984, 68.38510131835938, 104.14195251464844, 66.405517578125, 6.347270965576172, 227.01547241210938, -203.05198669433594, 146.53662109375, 180.00770568847656, 128.48736572265625, 85.13753509521484, 82.28952026367188, 102.92853546142578, 63.92829895019531, 169.0858154296875, -134.50393676757812, 54.31813430786133, 295.13494873046875, 76.24698638916016, 226.31362915039062, 156.27001953125, 5.225517272949219, 37.9925537109375, 56.57088088989258, 277.1527099609375, 3.5086498260498047, 248.99346923828125, 174.032470703125, 83.20577239990234, -5.505208969116211, 88.1570053100586, 203.70590209960938, -23.26641845703125, 185.96725463867188, 86.68730926513672, 15.543952941894531, 251.06900024414062, 69.03396606445312, -125.72764587402344, 82.31118774414062, 3.794769287109375, 264.66290283203125, 238.35621643066406, 89.46631622314453, 137.45545959472656, 249.23788452148438, -33.464881896972656, 338.1416015625, 91.02729034423828, -23.920242309570312, 191.53683471679688, 155.2414093017578, 29.431114196777344, -70.5704574584961, 268.5480651855469, 284.847412109375, 29.927078247070312, 188.91387939453125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000180.npy"}
{"epoch": 0.2643171806167401, "step": 181, "batch_size": 64, "mean": 65.1822509765625, "std": 114.9933853149414, "min": -200.1212921142578, "p10": -55.999814605712885, "median": 48.8984432220459, "p90": 214.63294982910162, "max": 381.36651611328125, "pos_frac": 0.765625, "sample": [129.6314239501953, -53.681785583496094, 15.88817024230957, 62.11830520629883, 111.0376205444336, -3.8597145080566406, 33.70547103881836, 41.490028381347656, 81.91956329345703, 122.24031066894531, -0.9429893493652344, 17.010988235473633, 4.762746810913086, 13.835227966308594, -153.11032104492188, 222.31869506835938, 112.36799621582031, -25.43651580810547, 79.36505126953125, -11.082935333251953, 50.74001693725586, -56.993255615234375, -17.681793212890625, 231.71278381347656, -78.46420288085938, 83.48719787597656, 125.68203735351562, 280.5992431640625, 68.06587982177734, 51.573822021484375, 132.31759643554688, 79.9850845336914, -200.1212921142578, 47.05686950683594, 23.268020629882812, 80.65591430664062, 13.087448120117188, 183.67359924316406, 138.00669860839844, -61.28129577636719, 23.59326934814453, -196.47515869140625, 219.32969665527344, -18.349308013916016, 351.4163818359375, 163.172607421875, -160.2332305908203, 58.62005615234375, -30.189605712890625, 132.04202270507812, 203.6738739013672, 381.36651611328125, 148.75128173828125, 31.347942352294922, 132.01809692382812, 5.345745086669922, 4.819263458251953, 1.8031082153320312, 294.8857421875, 45.36024856567383, 29.64862823486328, 181.84414672851562, 149.06094360351562, 43.86420440673828], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000181.npy"}
{"epoch": 0.2657856093979442, "step": 182, "batch_size": 64, "mean": 111.9610366821289, "std": 132.792236328125, "min": -231.8861083984375, "p10": -50.718293380737286, "median": 109.52497863769531, "p90": 274.03650817871096, "max": 375.3179626464844, "pos_frac": 0.828125, "sample": [290.17724609375, 79.82762145996094, 312.5550537109375, 190.94577026367188, 84.90289306640625, 221.63499450683594, 234.79873657226562, 270.95501708984375, 343.69207763671875, 73.29945373535156, 133.4269256591797, 47.890926361083984, 29.01371192932129, -17.525035858154297, 258.25482177734375, -231.8861083984375, 243.26361083984375, 62.27825927734375, 5.115283966064453, 46.82952880859375, 370.37152099609375, -2.8124847412109375, 257.3255920410156, 119.85307312011719, 146.63278198242188, -58.263038635253906, 241.57449340820312, 99.19688415527344, 17.722652435302734, 69.72064971923828, 150.47833251953125, 260.1484680175781, -101.19678497314453, -119.66014862060547, 161.92730712890625, 233.84378051757812, 46.88954162597656, 147.85226440429688, 208.58047485351562, 234.99322509765625, -67.33861541748047, 75.69780731201172, 33.564361572265625, 128.3806915283203, 153.0204315185547, 179.37399291992188, 181.03582763671875, 122.14348602294922, -121.77346801757812, 6.265037536621094, 375.3179626464844, 145.11868286132812, 49.475799560546875, 1.5198993682861328, -30.572036743164062, 8.412700653076172, -127.6257553100586, 72.9740982055664, 275.3571472167969, 20.579322814941406, 343.089111328125, -33.113887786865234, 50.144561767578125, 159.8296356201172], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000182.npy"}
{"epoch": 0.26725403817914833, "step": 183, "batch_size": 64, "mean": 60.8377685546875, "std": 107.99752044677734, "min": -159.3704376220703, "p10": -72.80771484374999, "median": 58.51554870605469, "p90": 187.50315246582036, "max": 351.4654541015625, "pos_frac": 0.765625, "sample": [20.838424682617188, 70.16581726074219, -4.363203048706055, 62.239097595214844, 13.405086517333984, 166.68850708007812, 148.08106994628906, 135.30889892578125, 47.31536865234375, 50.71098327636719, -73.93556213378906, 142.34881591796875, 60.202640533447266, 18.76011085510254, 27.012603759765625, 85.59376525878906, 331.3614501953125, 245.12860107421875, 141.2846221923828, 31.26307487487793, 66.63196563720703, 113.24371337890625, 290.22882080078125, 102.3140869140625, -11.301673889160156, 4.9474029541015625, 72.47329711914062, 56.82845687866211, 3.3930816650390625, -159.3704376220703, 169.03378295898438, 80.97522735595703, -52.453941345214844, 13.626953125, 173.25302124023438, 61.19737243652344, 44.342342376708984, 101.09481048583984, 135.616455078125, 232.98016357421875, -123.82461547851562, -41.25395965576172, 351.4654541015625, -70.17607116699219, 31.427093505859375, -37.71031188964844, 79.22148895263672, -101.43154907226562, -108.97209930419922, -104.12286376953125, 163.82623291015625, 110.51336669921875, 198.92613220214844, 193.6103515625, 81.29405212402344, 54.563636779785156, 26.440269470214844, -66.86418914794922, 137.73190307617188, 84.97071838378906, -146.01470947265625, -62.04982376098633, 19.315872192382812, 4.26568603515625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000183.npy"}
{"epoch": 0.2687224669603524, "step": 184, "batch_size": 64, "mean": 63.524295806884766, "std": 110.95415496826172, "min": -151.66934204101562, "p10": -48.042992401123044, "median": 32.04216957092285, "p90": 208.4887603759766, "max": 338.3529052734375, "pos_frac": 0.75, "sample": [27.008453369140625, -103.92778778076172, 66.63428497314453, 78.65164947509766, 187.44578552246094, 180.60247802734375, 7.3966064453125, 12.303268432617188, 158.18893432617188, -17.262863159179688, 316.621826171875, 338.3529052734375, 61.55192565917969, 210.66854858398438, -44.45923614501953, 191.87579345703125, 3.40777587890625, 215.22988891601562, 97.16698455810547, 266.92535400390625, -24.803192138671875, 305.1112060546875, -85.944091796875, 16.716827392578125, 36.194217681884766, 36.00763702392578, 41.782569885253906, 8.270500183105469, 89.27479553222656, 3.163663864135742, 40.70182800292969, 98.9105224609375, 142.31080627441406, 196.9093780517578, -91.86392211914062, 203.402587890625, 15.78082275390625, -20.860923767089844, -36.14018630981445, 25.778797149658203, 72.92382049560547, -19.62816619873047, 30.598018646240234, 7.110536575317383, 17.07689666748047, 197.8582000732422, 69.13825988769531, 54.43452835083008, -49.578887939453125, -130.8525848388672, -50.697200775146484, 26.0277156829834, -4.589801788330078, 3.65533447265625, -151.66934204101562, 27.408432006835938, -39.7669677734375, 177.19528198242188, 33.48632049560547, 103.61792755126953, 9.382499694824219, -1.954132080078125, 96.90956115722656, 332.3822326660156], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000184.npy"}
{"epoch": 0.2701908957415565, "step": 185, "batch_size": 64, "mean": 50.99299621582031, "std": 119.45226287841797, "min": -189.21121215820312, "p10": -63.58280448913573, "median": 26.959014892578125, "p90": 197.87610168457036, "max": 425.9419860839844, "pos_frac": 0.609375, "sample": [-161.41021728515625, -66.8831787109375, -12.145570755004883, 88.05026245117188, 72.56881713867188, 18.35088539123535, -5.273954391479492, 74.46534729003906, 401.8919982910156, -1.1157150268554688, 16.686763763427734, 110.26509857177734, 107.72695922851562, -5.846927642822266, 255.95114135742188, 36.158843994140625, -11.327009201049805, 34.83660125732422, 137.0773468017578, -93.35726928710938, -9.921371459960938, 35.0275993347168, 25.422954559326172, 71.76722717285156, 78.67765808105469, -6.105033874511719, -113.0853271484375, 19.792945861816406, 83.65143585205078, -130.33798217773438, 35.343414306640625, 17.547130584716797, 95.4985122680664, 95.22523498535156, 27.8756103515625, 356.15789794921875, -51.16105651855469, 33.32935333251953, 185.52691650390625, -43.13862991333008, 31.70125961303711, 4.202116012573242, -18.527069091796875, -11.442367553710938, -22.13640022277832, -2.0257511138916016, 321.19659423828125, 138.4529266357422, 146.4986114501953, -0.22583580017089844, 425.9419860839844, -28.28020477294922, 26.04241943359375, 64.86305236816406, -13.595245361328125, 203.16860961914062, 40.74298095703125, 126.63896179199219, -189.21121215820312, 257.1491394042969, -69.8344955444336, 93.75580596923828, -9.406936645507812, -55.88193130493164], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000185.npy"}
{"epoch": 0.27165932452276065, "step": 186, "batch_size": 64, "mean": 96.39396667480469, "std": 116.72457885742188, "min": -189.321533203125, "p10": -21.847058105468747, "median": 74.23047256469727, "p90": 233.269515991211, "max": 445.7630615234375, "pos_frac": 0.84375, "sample": [-189.321533203125, 129.07046508789062, 313.5786437988281, 149.23863220214844, 150.89581298828125, 8.06640625, 73.54074096679688, 44.16193771362305, 213.99850463867188, 150.76351928710938, 66.667236328125, 218.8695068359375, 1.8561935424804688, 83.51905822753906, 39.65473556518555, -16.990177154541016, 92.66697692871094, -114.86746215820312, 41.2904167175293, 42.7296142578125, 303.4456481933594, 45.0583381652832, 0.32865142822265625, 175.98773193359375, 42.011451721191406, 73.99000549316406, 101.274658203125, 148.13864135742188, 318.936767578125, 102.13401794433594, 176.37554931640625, -31.217927932739258, 148.16696166992188, 69.44146728515625, 31.59916114807129, 123.92153930664062, 395.23272705078125, 239.44094848632812, 45.18461990356445, 8.117374420166016, 154.97720336914062, 32.455291748046875, 74.47093963623047, 63.0849609375, -66.1461181640625, 395.23675537109375, -20.073089599609375, 70.01477813720703, -22.607330322265625, -40.47163391113281, 445.7630615234375, 157.33770751953125, 67.151123046875, 146.47781372070312, 5.7545166015625, 141.29879760742188, 93.01618957519531, -27.42107391357422, 3.1548690795898438, 115.58720397949219, 141.1893310546875, 131.22828674316406, -2.979217529296875, 93.75601196289062], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000186.npy"}
{"epoch": 0.27312775330396477, "step": 187, "batch_size": 64, "mean": 79.68223571777344, "std": 106.41858673095703, "min": -150.66989135742188, "p10": -35.552785110473636, "median": 61.99078559875488, "p90": 213.4085235595703, "max": 385.13739013671875, "pos_frac": 0.78125, "sample": [-150.66989135742188, 206.85769653320312, -23.6455135345459, 60.385623931884766, -82.61602783203125, 230.77816772460938, 83.95901489257812, 159.969970703125, 54.667625427246094, 42.21018600463867, 72.76007080078125, 66.744140625, 332.86639404296875, 78.4155502319336, 119.0115966796875, 203.79934692382812, 163.3640899658203, 63.595947265625, 111.84970092773438, -41.49467849731445, 57.59955596923828, 32.813812255859375, 385.13739013671875, -35.708526611328125, 140.52232360839844, 56.23899841308594, 237.13577270507812, -33.30550765991211, 189.0817108154297, 39.355010986328125, -43.81048583984375, 83.5489730834961, 19.871551513671875, -60.72196960449219, -35.189388275146484, 161.41860961914062, -34.74121856689453, 9.105352401733398, -10.578773498535156, 213.06832885742188, 147.67222595214844, 345.49945068359375, 94.36437225341797, 41.68896484375, 64.31036376953125, 33.87242126464844, 52.06703567504883, 42.90319061279297, -125.77933502197266, 38.148292541503906, 102.72876739501953, -7.431251525878906, 155.7565155029297, 100.12913513183594, 75.85894012451172, 190.1470489501953, 87.97611999511719, 2.1261520385742188, 58.170501708984375, 231.42698669433594, 213.5543212890625, 36.57494354248047, -19.991315841674805, 14.23861312866211], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000187.npy"}
{"epoch": 0.2745961820851689, "step": 188, "batch_size": 64, "mean": 97.3951187133789, "std": 119.21813201904297, "min": -131.7470703125, "p10": -18.19176597595214, "median": 71.20540618896484, "p90": 282.8461944580078, "max": 486.06854248046875, "pos_frac": 0.828125, "sample": [307.82427978515625, 121.98509979248047, 245.2884979248047, 62.45143508911133, 73.5965805053711, 1.8631744384765625, -30.40594482421875, -122.38712310791016, 288.3460693359375, 78.60145568847656, 51.49147033691406, -23.54534149169922, 117.12584686279297, 75.69335174560547, 281.96002197265625, 244.73597717285156, 92.88297271728516, -21.13152313232422, 58.902557373046875, 9.349998474121094, 11.430099487304688, 27.72698211669922, 42.97601318359375, 148.0587158203125, 68.31790924072266, 30.14232635498047, 14.494552612304688, -5.865917205810547, 322.3149108886719, 45.66680908203125, 366.58349609375, -131.7470703125, 283.2259826660156, 110.62953186035156, 147.38796997070312, 113.20748901367188, 63.3072509765625, 189.30126953125, -10.643373489379883, 189.061767578125, 168.7155303955078, 26.978195190429688, -87.01498413085938, -25.514822006225586, 189.64671325683594, 40.070030212402344, 215.98263549804688, 47.39894485473633, 3.9948272705078125, 326.90753173828125, 86.01050567626953, 164.95396423339844, 54.47157669067383, -10.998275756835938, 157.8731689453125, 79.45523071289062, 102.05825805664062, 486.06854248046875, 95.30693054199219, 68.8142318725586, 75.98089599609375, -11.332332611083984, 19.956153869628906, 17.29851722717285], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000188.npy"}
{"epoch": 0.27606461086637296, "step": 189, "batch_size": 64, "mean": 80.23283386230469, "std": 132.10853576660156, "min": -178.82855224609375, "p10": -62.069775390625, "median": 57.26345252990723, "p90": 194.68218383789065, "max": 561.529052734375, "pos_frac": 0.8125, "sample": [40.97154235839844, 171.9915313720703, -113.76881408691406, 346.01806640625, -64.52130126953125, -56.34954833984375, 44.824745178222656, 4.505455017089844, 0.46870994567871094, 40.168121337890625, 150.35858154296875, 154.908447265625, 144.76791381835938, 0.04266357421875, 162.73272705078125, 46.566925048828125, 185.898681640625, 43.693809509277344, -164.14459228515625, 5.616416931152344, -80.58441925048828, 31.840784072875977, 78.20927429199219, 189.08453369140625, 149.31137084960938, 78.64962768554688, -44.653106689453125, 57.30400466918945, 33.554176330566406, 186.39047241210938, 3.0267066955566406, 20.831350326538086, 95.48046875, 472.2060546875, 57.222900390625, -21.791526794433594, 24.34456443786621, 226.0902557373047, 90.31373596191406, 14.539802551269531, -162.7669219970703, 118.2431411743164, -51.6072998046875, 118.38797760009766, 60.701629638671875, 331.0030517578125, 178.96812438964844, 561.529052734375, -17.671852111816406, 134.5627899169922, 169.5933837890625, 29.888416290283203, 79.58439636230469, 150.33580017089844, 106.57414245605469, 197.0811767578125, 243.70175170898438, 91.37541198730469, 0.6879043579101562, -178.82855224609375, 54.38365173339844, -73.06288146972656, 26.975540161132812, 159.14044189453125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000189.npy"}
{"epoch": 0.2775330396475771, "step": 190, "batch_size": 64, "mean": 94.80111694335938, "std": 138.2601776123047, "min": -207.3631134033203, "p10": -60.5732536315918, "median": 67.73681259155273, "p90": 301.12537841796876, "max": 420.2190246582031, "pos_frac": 0.796875, "sample": [-3.842041015625, 112.8069839477539, 186.35980224609375, 157.1011962890625, 16.983646392822266, -81.75389099121094, 248.53903198242188, 41.62665557861328, 12.61545181274414, 357.37451171875, 75.3575439453125, 68.45857238769531, 132.9139862060547, 21.215614318847656, 368.32513427734375, 142.366455078125, 104.414794921875, 85.02821350097656, 194.45736694335938, 26.594512939453125, 2.8148860931396484, 264.7259216308594, -207.3631134033203, 67.01505279541016, 259.23101806640625, -22.638259887695312, 60.9156494140625, 319.513671875, 160.52906799316406, 55.38037109375, -18.037704467773438, 303.107421875, 20.74852752685547, -2.2234058380126953, 124.35257720947266, 77.94918823242188, -95.2228775024414, -57.980377197265625, -151.04876708984375, 17.472152709960938, 109.26757049560547, -61.684486389160156, 112.24181365966797, 50.59974670410156, -27.944116592407227, -87.45281982421875, 111.8721923828125, 296.5006103515625, 41.025299072265625, 55.608070373535156, -191.59327697753906, 382.57861328125, 121.20884704589844, 26.35025978088379, 203.40882873535156, 106.18988800048828, 65.9094467163086, 390.78118896484375, 5.211030960083008, 420.2190246582031, 192.38223266601562, 218.83543395996094, 39.3706169128418, 40.20074462890625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000190.npy"}
{"epoch": 0.2790014684287812, "step": 191, "batch_size": 64, "mean": 136.34432983398438, "std": 149.69845581054688, "min": -140.9805908203125, "p10": -5.916384124755858, "median": 113.79074096679688, "p90": 320.24621582031256, "max": 568.1961669921875, "pos_frac": 0.859375, "sample": [472.7754211425781, 109.97396850585938, 568.1961669921875, 31.997222900390625, 292.82379150390625, 68.79499816894531, 140.75662231445312, 262.7707824707031, 15.096372604370117, 359.13568115234375, 99.80610656738281, 163.55258178710938, -6.712181091308594, 262.1221008300781, 280.30572509765625, 38.597389221191406, 36.91893768310547, 79.0936050415039, 27.946762084960938, 76.73340606689453, 257.4605712890625, 55.209930419921875, 42.53955078125, 293.73004150390625, 123.76331329345703, 11.144760131835938, 8.53233528137207, 547.4090576171875, 176.74754333496094, -18.768898010253906, 223.30078125, -75.64430236816406, 17.008934020996094, -48.037967681884766, 139.18568420410156, 174.64892578125, 181.71563720703125, 146.95654296875, 324.43658447265625, 30.438596725463867, 8.153656005859375, 243.52658081054688, 227.03369140625, -140.9805908203125, 66.52204895019531, 497.3912048339844, 38.378021240234375, -53.65385437011719, 117.60751342773438, 100.42994689941406, 181.60643005371094, 310.46868896484375, 29.301856994628906, -2.0011940002441406, 125.44473266601562, 170.02978515625, -68.5846939086914, 120.68927001953125, -4.0595245361328125, 121.12627410888672, 357.25225830078125, 38.44048309326172, 85.12922668457031, 164.32241821289062], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000191.npy"}
{"epoch": 0.28046989720998533, "step": 192, "batch_size": 64, "mean": 91.00912475585938, "std": 144.7425079345703, "min": -250.23117065429688, "p10": -49.317901611328125, "median": 64.31451034545898, "p90": 311.1256042480469, "max": 528.5696411132812, "pos_frac": 0.734375, "sample": [-46.347633361816406, -140.8507843017578, 43.40258026123047, 164.82469177246094, 138.05213928222656, -30.225975036621094, -50.59087371826172, -2.8756237030029297, 32.097747802734375, -250.23117065429688, 25.3446044921875, 96.6141128540039, -37.69114685058594, 223.16624450683594, 17.563919067382812, 38.95057678222656, -66.50999450683594, 101.75804138183594, -46.1510009765625, 172.84291076660156, -115.43494415283203, 406.2932434082031, -40.629268646240234, 74.80570983886719, -17.262496948242188, 314.32659912109375, 350.7130126953125, 374.5386047363281, 153.3441619873047, 356.1666259765625, 93.75291442871094, 78.14540100097656, 528.5696411132812, 116.6383056640625, 174.43226623535156, 27.19407081604004, 119.99655151367188, 212.6453399658203, 267.7253112792969, 61.21540451049805, 49.79545593261719, 303.6566162109375, 8.507684707641602, 39.94993591308594, 132.53341674804688, 59.209877014160156, 62.089195251464844, -166.93055725097656, 198.00747680664062, 62.80670928955078, 83.89974975585938, -10.25115966796875, -36.340293884277344, -60.579994201660156, 12.538553237915039, 225.81024169921875, 76.46907043457031, -28.178375244140625, 38.583099365234375, 206.82708740234375, 135.970947265625, 113.66265869140625, 65.82231140136719, 330.4043273925781], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000192.npy"}
{"epoch": 0.28193832599118945, "step": 193, "batch_size": 64, "mean": 53.444305419921875, "std": 138.79598999023438, "min": -410.51190185546875, "p10": -79.66521148681639, "median": 52.623992919921875, "p90": 221.21300811767583, "max": 362.22247314453125, "pos_frac": 0.671875, "sample": [113.47634887695312, -20.922706604003906, 27.65215301513672, 181.34765625, -410.51190185546875, 102.7469711303711, 67.5392837524414, 57.70734405517578, -401.69293212890625, 304.79010009765625, 66.3711929321289, -13.216436386108398, 190.69528198242188, -36.63485336303711, -19.410308837890625, 94.81340026855469, 99.47679138183594, 125.57133483886719, -1.6657638549804688, 226.660400390625, 3.1863441467285156, -16.25518035888672, 13.586315155029297, 156.31491088867188, 26.249570846557617, 125.08546447753906, -55.53590774536133, -32.40009307861328, 7.1793975830078125, -56.922882080078125, -24.399581909179688, 208.50242614746094, -85.59878540039062, -89.37152862548828, 249.33966064453125, -126.9466781616211, 175.97555541992188, -89.67586517333984, 12.128190994262695, 47.54064178466797, 9.817756652832031, 139.23333740234375, 266.4005126953125, 164.63345336914062, 112.2418212890625, -134.62193298339844, 315.411865234375, 84.90316009521484, 123.00981903076172, 123.59960174560547, -60.59857177734375, 91.44036102294922, 14.605152130126953, -65.82020568847656, -38.89497375488281, 141.32757568359375, 362.22247314453125, 132.15895080566406, -62.10539627075195, 23.269317626953125, 278.8957214355469, 76.01685333251953, 16.899368286132812, 103.61438751220703], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000193.npy"}
{"epoch": 0.2834067547723935, "step": 194, "batch_size": 64, "mean": 86.15937805175781, "std": 98.57442474365234, "min": -98.83485412597656, "p10": -48.97726287841796, "median": 69.02230834960938, "p90": 237.95194244384766, "max": 296.5897521972656, "pos_frac": 0.8125, "sample": [138.18539428710938, 19.781578063964844, 32.73064422607422, 8.570510864257812, 118.72767639160156, 255.33291625976562, 103.3292236328125, 165.05860900878906, -22.56683349609375, 19.554290771484375, 30.60264015197754, 250.32666015625, -68.65216064453125, -52.0875244140625, 43.83448028564453, 60.40458679199219, 206.68704223632812, 54.578704833984375, 67.26942443847656, 131.41500854492188, 36.08846664428711, 78.15281677246094, 16.921039581298828, 7.295322418212891, 117.4696273803711, -78.70106506347656, -1.8282337188720703, 230.49710083007812, 66.25831604003906, 83.40437316894531, 246.7957000732422, 132.3272247314453, 238.79031372070312, 169.7088623046875, 29.575393676757812, 278.80419921875, 66.6792221069336, 27.737131118774414, 143.58242797851562, 47.86991882324219, 183.87156677246094, -76.75247192382812, 42.46290588378906, -8.272102355957031, 103.44319152832031, 162.44778442382812, -2.3151283264160156, -98.83485412597656, 37.657203674316406, -55.2822265625, 110.641357421875, -62.004356384277344, 62.03065490722656, 235.99574279785156, 76.02053833007812, 296.5897521972656, 131.6566925048828, 70.77519226074219, 283.36114501953125, 92.0648193359375, 172.63027954101562, 179.11264038085938, 118.10877990722656, -41.71998596191406], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000194.npy"}
{"epoch": 0.28487518355359764, "step": 195, "batch_size": 64, "mean": 93.73036193847656, "std": 116.96430969238281, "min": -129.95333862304688, "p10": -51.056222915649414, "median": 77.0123176574707, "p90": 236.19914093017582, "max": 394.5634765625, "pos_frac": 0.78125, "sample": [82.81050109863281, -23.245025634765625, 219.20155334472656, 51.554927825927734, 70.47824096679688, 394.5634765625, 76.03339385986328, 331.40435791015625, 212.8143310546875, -7.951118469238281, -6.018280029296875, 48.28672790527344, -0.7720108032226562, -97.41249084472656, 240.45037841796875, 114.87844848632812, 58.36872863769531, 226.2795867919922, -33.876625061035156, 337.34771728515625, -50.33640670776367, 109.24606323242188, -99.47454833984375, 38.0162353515625, 102.72634887695312, 106.45618438720703, 220.45306396484375, 301.416748046875, 145.9394073486328, 127.3164291381836, 147.8134765625, -80.69478607177734, 73.74171447753906, 181.11209106445312, 14.988983154296875, 148.91824340820312, -28.85283088684082, 10.253562927246094, 172.28164672851562, 211.59544372558594, -129.95333862304688, 250.32546997070312, 44.958621978759766, 73.23204803466797, 385.5281066894531, 138.37762451171875, 80.28632354736328, 60.994171142578125, -55.90766143798828, 14.272092819213867, 161.3037567138672, -64.72259521484375, 218.15228271484375, 106.73751831054688, 28.157577514648438, 77.99124145507812, 34.230125427246094, 58.7060546875, 18.41823387145996, 90.7251205444336, -51.364715576171875, 53.2784538269043, 147.0260772705078, 109.87649536132812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000195.npy"}
{"epoch": 0.28634361233480177, "step": 196, "batch_size": 64, "mean": 105.5745849609375, "std": 142.866455078125, "min": -211.85574340820312, "p10": -49.74595336914062, "median": 78.3057632446289, "p90": 303.9257293701172, "max": 450.68719482421875, "pos_frac": 0.765625, "sample": [192.503173828125, 196.78895568847656, 305.3267517089844, 214.7422332763672, -49.627723693847656, 293.81243896484375, 300.65667724609375, 433.27392578125, 227.4837646484375, 71.74040222167969, 6.688343048095703, 47.72914123535156, 289.318603515625, 194.97024536132812, 48.639801025390625, 10.362932205200195, -20.41855239868164, 85.99214172363281, -7.8324432373046875, 47.157470703125, 429.222900390625, -23.841289520263672, 60.70606994628906, 38.182743072509766, 131.25289916992188, 118.6127700805664, 122.7392578125, 140.4193115234375, 47.87200164794922, -10.554250717163086, 113.61283874511719, 76.49095153808594, 433.32708740234375, 450.68719482421875, -17.974929809570312, 178.24925231933594, 84.72294616699219, 58.87272644042969, 108.97093200683594, 80.12057495117188, 63.22522735595703, 57.448028564453125, 224.04681396484375, -2.9979686737060547, 141.10665893554688, 32.868778228759766, 83.56986236572266, -153.1088104248047, 64.49137115478516, -211.85574340820312, -73.9920654296875, 152.48355102539062, 88.60359191894531, -63.4323616027832, 320.4356689453125, -49.79662322998047, -130.62527465820312, 70.09516906738281, -0.8719482421875, 128.34698486328125, 259.5794677734375, 315.8297119140625, -75.20120239257812, 5.523876190185547], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000196.npy"}
{"epoch": 0.2878120411160059, "step": 197, "batch_size": 64, "mean": 81.59483337402344, "std": 113.4122314453125, "min": -132.0366973876953, "p10": -35.535371398925776, "median": 55.823726654052734, "p90": 235.47472381591797, "max": 402.5718078613281, "pos_frac": 0.796875, "sample": [56.75244140625, 330.0973815917969, 138.79685974121094, -19.888282775878906, 165.00863647460938, 8.275474548339844, 23.949386596679688, -64.35331726074219, 63.664207458496094, 402.5718078613281, 358.712646484375, 236.29066467285156, 67.72643280029297, 123.40673828125, -100.05284881591797, -1.1440086364746094, 127.42290496826172, 277.5804443359375, 28.308494567871094, 50.75090408325195, 102.95098876953125, 209.3842315673828, 39.21592330932617, -1.33123779296875, 54.89501190185547, 7.163825988769531, -101.95319366455078, 129.67654418945312, 79.98912811279297, 96.34121704101562, 262.7475280761719, 146.32467651367188, 59.970237731933594, 233.57086181640625, 1.72320556640625, 190.90110778808594, -37.20567321777344, 1.4847564697265625, -72.52200317382812, -132.0366973876953, 32.791839599609375, 117.8260726928711, -0.3157005310058594, 6.347160339355469, 23.915632247924805, 79.46551513671875, 302.2030029296875, 200.87481689453125, 20.391250610351562, 185.01478576660156, -13.61741828918457, 52.1843147277832, 92.70186614990234, 23.87029266357422, 18.687828063964844, 109.02498626708984, 169.51902770996094, 73.4808120727539, 202.20986938476562, -31.63800048828125, -74.654052734375, 16.26653289794922, 21.14752197265625, 49.203826904296875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000197.npy"}
{"epoch": 0.28928046989721, "step": 198, "batch_size": 64, "mean": 104.55337524414062, "std": 143.56292724609375, "min": -229.1312255859375, "p10": -28.43513584136963, "median": 85.63031768798828, "p90": 233.18683929443364, "max": 590.418701171875, "pos_frac": 0.84375, "sample": [-127.77547454833984, 103.93980407714844, -38.01964569091797, 525.537109375, 124.77952575683594, 200.0255126953125, 324.8471984863281, 55.39840316772461, 148.15953063964844, 52.135215759277344, 118.83865356445312, 164.13107299804688, -143.25753784179688, 9.920066833496094, 4.0755615234375, 236.5219268798828, 145.0347900390625, 108.6068115234375, 80.19073486328125, 84.4427261352539, -28.929128646850586, -27.282485961914062, 34.885292053222656, 159.90550231933594, 9.418581008911133, 70.9260025024414, -70.5274887084961, 201.18374633789062, 85.58335876464844, 82.71772766113281, 88.47733306884766, 120.70978546142578, 135.27423095703125, 76.74815368652344, 129.83274841308594, 13.48806381225586, 77.70329284667969, 157.94558715820312, 154.71511840820312, 225.40496826171875, -229.1312255859375, 547.2842407226562, 95.70076751708984, 62.89978790283203, 2.048603057861328, 9.260543823242188, 243.01815795898438, -40.26210021972656, 3.399921417236328, 72.53750610351562, 37.11436080932617, 89.35284423828125, 85.67727661132812, 136.39407348632812, -6.611490249633789, 143.38165283203125, 153.46189880371094, 208.0081787109375, 16.37420654296875, 417.8076171875, -5.34759521484375, 590.418701171875, 44.117881774902344, 138.79750061035156], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000198.npy"}
{"epoch": 0.2907488986784141, "step": 199, "batch_size": 64, "mean": 104.07836151123047, "std": 141.8766326904297, "min": -167.77215576171875, "p10": -62.678912353515614, "median": 85.94961166381836, "p90": 282.65552062988286, "max": 468.70172119140625, "pos_frac": 0.796875, "sample": [208.63421630859375, 166.2356719970703, -135.76473999023438, -8.278457641601562, 272.5226745605469, 78.28177642822266, -67.12432861328125, 201.5992889404297, -167.77215576171875, 122.54918670654297, 4.201637268066406, -80.63113403320312, -165.00332641601562, 220.36424255371094, 201.8704833984375, 63.201133728027344, 11.666618347167969, 138.89370727539062, 394.58416748046875, 39.02049255371094, 91.22744750976562, 167.30523681640625, 387.444580078125, 301.68609619140625, -92.69430541992188, 38.84013366699219, -46.499794006347656, 120.81746673583984, 207.69808959960938, 24.839889526367188, 37.12372589111328, 253.38226318359375, 468.70172119140625, 78.79832458496094, 70.9723892211914, 207.16366577148438, -20.600997924804688, 115.24598693847656, 23.152862548828125, 93.05612182617188, 64.86892700195312, 276.0636901855469, 125.7083969116211, 35.6049919128418, 285.4805908203125, 122.29479217529297, 73.89805603027344, -52.3062744140625, -41.909488677978516, -164.50833129882812, 272.35797119140625, 81.47698211669922, 103.88306427001953, 91.71940612792969, 349.8515625, 179.6419219970703, 5.308000564575195, 85.17093658447266, 57.10820007324219, 86.72828674316406, 42.76029968261719, 145.12130737304688, 426.74481201171875, -18.764724731445312], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000199.npy"}
{"epoch": 0.2922173274596182, "step": 200, "batch_size": 64, "mean": 103.42639923095703, "std": 156.4627227783203, "min": -307.46612548828125, "p10": -45.82288856506346, "median": 82.35055923461914, "p90": 331.60117797851564, "max": 532.32373046875, "pos_frac": 0.78125, "sample": [234.88046264648438, 107.35621643066406, 58.77616500854492, 410.8849792480469, 69.91737365722656, 157.68447875976562, 16.284576416015625, -0.1319427490234375, 406.54144287109375, -83.59220886230469, 319.7857360839844, 239.64483642578125, 98.94709014892578, 4.130748748779297, 78.98391723632812, 325.3357849121094, 209.08523559570312, 379.32318115234375, 0.65740966796875, 334.2863464355469, 67.43150329589844, 0.6921138763427734, 160.19430541992188, 221.7256317138672, -26.69723892211914, 74.63540649414062, 199.30374145507812, -194.70941162109375, 532.32373046875, 90.0373306274414, 104.4591064453125, -30.175189971923828, -1.6668243408203125, 74.438720703125, 53.19438934326172, -26.771635055541992, -307.46612548828125, 3.82666015625, 101.34095764160156, 341.3061218261719, 3.727712631225586, 86.6574935913086, 59.7061767578125, -4.760841369628906, 1.8956565856933594, -159.37338256835938, 85.71720123291016, -52.52904510498047, 294.5806579589844, -25.044057846069336, 2.959014892578125, 408.58685302734375, 45.73884582519531, 21.532936096191406, 101.29306030273438, 256.388916015625, 175.79136657714844, 164.90670776367188, 100.42459106445312, 172.95973205566406, -121.79981994628906, 142.47000122070312, -69.55133056640625, 120.80570220947266], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000200.npy"}
{"epoch": 0.2936857562408223, "step": 201, "batch_size": 64, "mean": 102.65122985839844, "std": 146.82598876953125, "min": -292.3990173339844, "p10": -45.36436614990233, "median": 80.95663452148438, "p90": 291.10467529296886, "max": 497.54534912109375, "pos_frac": 0.8125, "sample": [268.62213134765625, 17.530935287475586, 212.91775512695312, -231.00814819335938, 53.28941345214844, -87.37506103515625, 8.278564453125, 15.074041366577148, -29.558151245117188, 83.7086181640625, 1.67803955078125, 26.4461669921875, 36.508209228515625, -32.42909240722656, 467.5879211425781, 106.9178466796875, 59.2030029296875, 120.57908630371094, 23.660682678222656, 237.40090942382812, 358.06829833984375, 134.265380859375, 4.4430084228515625, 98.24183654785156, 215.31964111328125, 256.00347900390625, -79.42056274414062, 226.51268005371094, 194.17703247070312, -14.593626022338867, 77.06558227539062, 122.95301818847656, 123.78409576416016, 122.62486267089844, -33.144561767578125, -67.87248229980469, 173.68807983398438, 66.49637603759766, 48.615020751953125, 497.54534912109375, 106.0458984375, -28.649436950683594, 168.79937744140625, 78.20465087890625, 44.21204376220703, 169.71250915527344, 152.87432861328125, 247.1263885498047, 391.9196472167969, 65.09053039550781, 84.43268585205078, -68.96925354003906, 145.45382690429688, 69.94806671142578, -292.3990173339844, 300.74005126953125, 416.6422424316406, 141.50552368164062, 11.038507461547852, -50.60142517089844, 303.0257873535156, 51.968101501464844, 14.79931640625, 162.95281982421875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000201.npy"}
{"epoch": 0.29515418502202645, "step": 202, "batch_size": 64, "mean": 71.60740661621094, "std": 144.4905242919922, "min": -221.72317504882812, "p10": -100.02004165649412, "median": 61.310585021972656, "p90": 256.50519104003905, "max": 540.5421752929688, "pos_frac": 0.703125, "sample": [540.5421752929688, -54.70364761352539, 91.66838836669922, 313.789794921875, 3.343698501586914, -51.91736602783203, 227.50039672851562, -50.818267822265625, 124.75125122070312, 95.51749420166016, 53.73599624633789, -197.28248596191406, 174.91851806640625, 245.955078125, 211.71051025390625, 43.88929748535156, -200.78659057617188, 47.4708251953125, 106.45796203613281, 342.81842041015625, 119.41301727294922, 285.092041015625, -139.69631958007812, 193.80535888671875, 24.83751678466797, 29.20599365234375, 392.5770263671875, -15.96449089050293, 104.84242248535156, 140.03610229492188, 100.13514709472656, 128.0525360107422, 257.0137939453125, -34.46490478515625, -41.511688232421875, -125.8480224609375, 82.57328796386719, 60.962860107421875, 2.2123947143554688, 230.736572265625, 102.34526062011719, -9.597091674804688, 21.60129165649414, 255.31845092773438, 149.78851318359375, 113.03111267089844, 85.79134368896484, -221.72317504882812, 61.65830993652344, 91.728515625, 14.729816436767578, 45.652427673339844, -53.89616394042969, -70.24605560302734, 39.6790771484375, -112.63229370117188, 82.43567657470703, -70.5914535522461, -47.878665924072266, 55.71381378173828, 257.1009216308594, -138.02313232421875, 89.75640869140625, -21.441593170166016], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000202.npy"}
{"epoch": 0.2966226138032305, "step": 203, "batch_size": 64, "mean": 89.3485336303711, "std": 124.5948257446289, "min": -189.55157470703125, "p10": -64.25613861083983, "median": 76.18901824951172, "p90": 255.9620010375977, "max": 445.27630615234375, "pos_frac": 0.78125, "sample": [12.866291046142578, 135.4822540283203, 155.62481689453125, 69.72620391845703, -30.17544174194336, 286.7605895996094, -72.54725646972656, 136.34176635742188, 26.941877365112305, 100.994384765625, -2.3450775146484375, 103.5318374633789, 110.3038558959961, 165.71902465820312, -105.7833251953125, 198.98988342285156, 124.90994262695312, -23.055557250976562, 186.4254913330078, 131.65643310546875, 274.02288818359375, 430.3377685546875, -68.33975219726562, 112.7725830078125, 137.75527954101562, 39.623130798339844, 445.27630615234375, 13.238494873046875, -107.51873779296875, 141.6583709716797, 2.1482467651367188, 168.09024047851562, 195.66265869140625, 265.8489685058594, 18.61785888671875, 184.31959533691406, 77.721435546875, 107.2537612915039, 74.65660095214844, 131.50286865234375, 51.582855224609375, 65.57416534423828, 42.533241271972656, 242.7274169921875, -53.367149353027344, -189.55157470703125, -54.72770690917969, 69.09385681152344, -33.716285705566406, 8.127689361572266, 169.67979431152344, 5.229637145996094, 19.740459442138672, 46.502784729003906, -75.39523315429688, -76.458984375, 143.7451171875, 88.50601196289062, 261.2667541503906, 336.7688903808594, -3.888460159301758, 243.58424377441406, 30.69902992248535, 23.03290557861328], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000203.npy"}
{"epoch": 0.29809104258443464, "step": 204, "batch_size": 64, "mean": 82.42413330078125, "std": 147.43942260742188, "min": -230.768798828125, "p10": -74.20380477905272, "median": 71.08407211303711, "p90": 311.3234161376954, "max": 476.8958740234375, "pos_frac": 0.703125, "sample": [223.9494171142578, 77.45420837402344, 43.27314376831055, 41.58503723144531, 85.4899673461914, -9.418441772460938, 211.03570556640625, -110.61566925048828, 83.65319061279297, 17.626611709594727, -40.328975677490234, 29.96217155456543, 13.326332092285156, 86.42716979980469, 55.471435546875, -44.284812927246094, -230.768798828125, 192.17044067382812, 50.227874755859375, 12.767976760864258, 295.67230224609375, -146.67608642578125, -23.911209106445312, -7.977481842041016, 77.17483520507812, -13.895523071289062, 73.30853271484375, 152.1611785888672, 148.2687530517578, -84.08161926269531, 404.2466125488281, -20.972511291503906, 74.68659973144531, 32.365211486816406, -48.56431579589844, 141.47164916992188, 48.51356506347656, 166.47683715820312, 442.73248291015625, -201.40834045410156, 349.927734375, 161.93972778320312, 164.77804565429688, 73.90919494628906, 103.09197998046875, -8.806291580200195, 43.92036437988281, 224.3539581298828, 318.0310363769531, -8.430267333984375, 84.11868286132812, 148.897705078125, -116.78436279296875, 68.85961151123047, 476.8958740234375, 372.9793701171875, -40.139984130859375, 148.28897094726562, 24.5280704498291, 363.3220520019531, -124.09024047851562, 83.9365234375, 114.17683410644531, -51.15557098388672], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000204.npy"}
{"epoch": 0.29955947136563876, "step": 205, "batch_size": 64, "mean": 107.63642883300781, "std": 142.58949279785156, "min": -264.07891845703125, "p10": -72.64306106567382, "median": 111.06857681274414, "p90": 278.00032043457037, "max": 399.3807067871094, "pos_frac": 0.78125, "sample": [37.022315979003906, 137.3988037109375, 219.4447021484375, 377.1792297363281, 123.94800567626953, 7.114648818969727, -8.606496810913086, 23.50372314453125, 11.428054809570312, 241.8387451171875, 254.51890563964844, -86.30804443359375, -104.09193420410156, 93.01496124267578, -18.124130249023438, 83.70986938476562, 14.883275985717773, 383.2620544433594, 184.3834991455078, 29.201000213623047, 105.84497833251953, 98.86434936523438, -153.13235473632812, 171.16453552246094, 300.26177978515625, 282.1990661621094, 192.4254150390625, -6.935489654541016, -73.5541000366211, 205.1864013671875, -48.6243896484375, 384.0495300292969, 122.60110473632812, 83.38736724853516, 34.95496368408203, 118.40188598632812, 399.3807067871094, 249.34259033203125, 115.95317077636719, 132.62693786621094, -213.44992065429688, 153.76023864746094, 119.18463897705078, 237.8230743408203, 146.46572875976562, -70.51730346679688, 144.49977111816406, 86.18829345703125, 268.2032470703125, 76.70519256591797, 252.14613342285156, 196.09620666503906, 174.4848175048828, 73.28958892822266, 154.9130401611328, 367.5386047363281, 106.1839828491211, -10.842292785644531, 18.768081665039062, -264.07891845703125, -78.10540771484375, 219.65829467773438, -34.235687255859375, 44.9324951171875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000205.npy"}
{"epoch": 0.3010279001468429, "step": 206, "batch_size": 64, "mean": 117.94447326660156, "std": 127.91874694824219, "min": -245.01885986328125, "p10": -36.655757904052706, "median": 105.92851257324219, "p90": 290.30573272705095, "max": 398.985595703125, "pos_frac": 0.828125, "sample": [53.737030029296875, 184.20086669921875, 88.25407409667969, 121.425048828125, 398.985595703125, 307.0252685546875, 81.86453247070312, 46.056182861328125, 219.2250213623047, 251.7147979736328, 92.14036560058594, 136.7417755126953, 17.577978134155273, 104.0893783569336, -245.01885986328125, 60.901248931884766, 78.98872375488281, 175.8027801513672, 60.11248016357422, -8.498029708862305, 214.42352294921875, -47.532691955566406, 236.11114501953125, 352.7777099609375, 127.83290100097656, 190.23297119140625, 29.223358154296875, 210.36741638183594, -89.92486572265625, -106.87615203857422, 199.45033264160156, 305.96148681640625, 213.87954711914062, 229.82899475097656, 13.942750930786133, 247.187744140625, 71.3212661743164, -63.37148666381836, 160.75521850585938, -143.45516967773438, 46.414817810058594, 216.29574584960938, -2.468486785888672, 155.77682495117188, 75.3073501586914, -11.2762451171875, 116.1360092163086, 84.09092712402344, 197.26321411132812, 93.25895690917969, -60.63563537597656, 1.4711532592773438, 107.76764678955078, 184.52798461914062, 371.826904296875, 254.49412536621094, -3.594369888305664, 130.11093139648438, 102.26173400878906, 317.0445861816406, 66.1377182006836, 19.159435272216797, 305.653564453125, 203.95907592773438], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000206.npy"}
{"epoch": 0.302496328928047, "step": 207, "batch_size": 64, "mean": 135.8834686279297, "std": 156.7523193359375, "min": -129.85247802734375, "p10": -26.39290542602539, "median": 93.11896514892578, "p90": 306.55677795410156, "max": 572.9993286132812, "pos_frac": 0.84375, "sample": [23.995479583740234, -129.85247802734375, 65.16304779052734, 103.55900573730469, 182.67523193359375, 38.336524963378906, 308.9212646484375, -24.194046020507812, -7.8714141845703125, 10.501678466796875, 82.46575927734375, 74.4867935180664, 301.0396423339844, -9.553359985351562, 133.24526977539062, 168.26406860351562, 51.274200439453125, 103.31829833984375, 34.12985610961914, 38.61035919189453, 52.21688461303711, 150.69635009765625, 32.07231903076172, 244.3083953857422, 32.05970764160156, -56.149688720703125, 301.0274658203125, 63.46040344238281, 35.462623596191406, -48.4623908996582, 288.0262145996094, 284.48046875, -63.119049072265625, 0.8257675170898438, 34.660980224609375, 257.286376953125, 36.193603515625, 233.99391174316406, 144.3809051513672, 249.68702697753906, 259.1641845703125, 132.09088134765625, 101.24253845214844, 142.25550842285156, 256.5520324707031, 247.12347412109375, 92.994140625, 382.4942626953125, 522.0948486328125, 276.806884765625, 48.09124755859375, 444.88104248046875, -48.68464660644531, 48.759254455566406, 572.9993286132812, 42.802459716796875, 99.65692138671875, 90.55023193359375, 93.24378967285156, 568.6925659179688, -92.08405303955078, 189.08340454101562, 431.4431457519531, -27.33527374267578], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000207.npy"}
{"epoch": 0.3039647577092511, "step": 208, "batch_size": 64, "mean": 102.14736938476562, "std": 155.59530639648438, "min": -337.81317138671875, "p10": -45.77013854980467, "median": 77.45724487304688, "p90": 323.0085479736329, "max": 432.59112548828125, "pos_frac": 0.796875, "sample": [-221.51060485839844, 142.6525115966797, -154.8674774169922, 276.40142822265625, 19.316078186035156, -16.603153228759766, 377.400634765625, 21.370878219604492, 116.8762435913086, 250.787353515625, 232.59071350097656, 63.984169006347656, 74.94580078125, 168.83934020996094, 186.17291259765625, 59.398277282714844, 432.59112548828125, 16.4156494140625, 393.0145263671875, 122.98255920410156, 5.308929443359375, 3.8564529418945312, 240.03585815429688, 171.47030639648438, 68.802490234375, 24.025047302246094, 49.23724365234375, 9.501617431640625, -179.64671325683594, 273.3638000488281, -134.9329071044922, -6.910449981689453, 51.942752838134766, 108.09134674072266, 134.97702026367188, 294.32275390625, 253.70437622070312, 40.37607955932617, -28.03765106201172, 237.88119506835938, -52.434783935546875, 62.20838928222656, 27.326797485351562, 341.1540832519531, 125.15005493164062, 343.53302001953125, 103.06836700439453, -337.81317138671875, -29.83710479736328, 157.34078979492188, 14.957427978515625, 102.53791809082031, 335.3024597167969, 107.21946716308594, 54.55348205566406, -127.37167358398438, 25.047210693359375, 389.5757751464844, 185.95523071289062, 278.59381103515625, 208.4854736328125, 79.96868896484375, -6.9990081787109375, -30.21929931640625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000208.npy"}
{"epoch": 0.3054331864904552, "step": 209, "batch_size": 64, "mean": 91.54013061523438, "std": 146.53550720214844, "min": -329.4758605957031, "p10": -77.01113586425781, "median": 82.95284652709961, "p90": 315.15308532714846, "max": 381.6580505371094, "pos_frac": 0.71875, "sample": [323.18084716796875, 73.50103759765625, 163.11013793945312, -40.56304168701172, 363.380615234375, 381.6580505371094, 314.6083679199219, 115.68855285644531, 131.04949951171875, 100.98184204101562, -58.32961654663086, 96.78125762939453, 156.46615600585938, 81.61421966552734, 166.10426330566406, 132.4703369140625, 161.9613494873047, -156.21710205078125, -72.0814208984375, 59.777122497558594, 242.86968994140625, 219.669677734375, -2.1268539428710938, -89.37771606445312, 53.71479797363281, -42.67008972167969, -23.047632217407227, 4.430625915527344, 92.86517333984375, 157.0272674560547, 113.58334350585938, 84.29147338867188, -111.1394271850586, -26.80429458618164, 66.90479278564453, 61.7352294921875, -79.12387084960938, -34.07455062866211, 364.92547607421875, 285.02978515625, 45.0810546875, 375.86993408203125, 5.0179901123046875, -34.27782440185547, -30.117843627929688, 43.540504455566406, 56.06898498535156, 74.17144775390625, 242.3254852294922, 139.1309356689453, 133.20956420898438, -33.16747283935547, -141.81167602539062, -329.4758605957031, 177.09951782226562, 85.59854125976562, 45.580570220947266, 52.80131530761719, 141.6097412109375, -147.18370056152344, 327.192138671875, 315.38653564453125, 224.58877563476562, 256.5044860839844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000209.npy"}
{"epoch": 0.3069016152716593, "step": 210, "batch_size": 64, "mean": 107.78253173828125, "std": 143.84039306640625, "min": -231.87930297851562, "p10": -25.594954681396484, "median": 79.61280059814453, "p90": 305.71020507812506, "max": 478.58038330078125, "pos_frac": 0.765625, "sample": [-14.65576171875, 21.08316993713379, 130.4479522705078, 69.2001953125, -13.150894165039062, -71.840576171875, 187.27890014648438, -9.39265251159668, 125.44154357910156, -41.575340270996094, 181.1988525390625, 16.79912567138672, 27.29616928100586, 110.69994354248047, 269.50079345703125, 149.14315795898438, -12.823577880859375, 73.61985778808594, 204.09768676757812, 21.237857818603516, 283.09979248046875, 100.88004302978516, 33.82637023925781, 228.53689575195312, 256.316650390625, 3.4992809295654297, 5.34149169921875, 312.22381591796875, 195.84039306640625, 353.2586364746094, 120.3995590209961, 241.8868865966797, 114.2034683227539, 282.91864013671875, -129.1452178955078, -25.905105590820312, -79.22671508789062, -3.6355438232421875, 36.136871337890625, 11.549304962158203, 159.36911010742188, 163.90475463867188, 342.16973876953125, 111.98332214355469, 7.31193733215332, -1.230804443359375, 422.7432556152344, -10.313522338867188, 128.66783142089844, 147.48345947265625, 290.51177978515625, 478.58038330078125, 286.9076232910156, 50.329689025878906, -54.288414001464844, 85.60574340820312, -24.87126922607422, 2.683452606201172, 13.340538024902344, 353.40625, 5.218505859375, 17.68389129638672, -231.87930297851562, 387.1514892578125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000210.npy"}
{"epoch": 0.30837004405286345, "step": 211, "batch_size": 64, "mean": 91.54325103759766, "std": 162.15272521972656, "min": -216.43798828125, "p10": -58.783681488037104, "median": 65.3193473815918, "p90": 258.9692626953126, "max": 568.314697265625, "pos_frac": 0.71875, "sample": [123.7894515991211, -56.628326416015625, 236.03253173828125, 64.31893157958984, 69.86221313476562, 158.79173278808594, -46.33305358886719, 115.44544219970703, 513.3870239257812, -216.43798828125, -30.24309539794922, 64.7892074584961, 29.040485382080078, 10.03924560546875, 71.59524536132812, 221.58082580566406, 147.44581604003906, 195.12591552734375, 154.81326293945312, -39.35951232910156, -3.1492538452148438, -50.230499267578125, 190.21820068359375, 107.86463165283203, -59.70740509033203, 18.721847534179688, -43.74359893798828, 507.8653869628906, 44.08224868774414, 568.314697265625, 469.482666015625, -37.91193389892578, -119.18974304199219, 132.93199157714844, 246.04306030273438, 311.71173095703125, -144.25765991210938, 139.24978637695312, 40.85772705078125, -1.743316650390625, 157.5533905029297, 408.743408203125, -23.80023956298828, 73.21704864501953, 13.19141960144043, 52.24958038330078, 70.79408264160156, -93.41975402832031, 7.295917510986328, 213.93655395507812, -154.65773010253906, 65.8494873046875, 201.53297424316406, 22.048187255859375, 149.88394165039062, 264.5090637207031, 90.88872528076172, 241.5712890625, 20.738525390625, 130.7891082763672, 38.48744583129883, 16.887481689453125, -17.775100708007812, -196.21258544921875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000211.npy"}
{"epoch": 0.30983847283406757, "step": 212, "batch_size": 64, "mean": 132.50680541992188, "std": 170.44638061523438, "min": -255.48622131347656, "p10": -61.92900276184082, "median": 130.2234992980957, "p90": 317.31414489746095, "max": 592.3057861328125, "pos_frac": 0.75, "sample": [-255.48622131347656, -87.22806549072266, 318.9256286621094, 267.06072998046875, 55.74657440185547, 240.99127197265625, 112.3875732421875, -62.831295013427734, -33.98432159423828, -1.4462814331054688, 227.09449768066406, 176.08908081054688, -196.1791534423828, -36.959747314453125, -21.496028900146484, 6.745552062988281, 331.18389892578125, 217.34783935546875, 430.58795166015625, 251.6942596435547, 116.72329711914062, -109.78687286376953, 31.73117446899414, 97.57365417480469, 140.0963134765625, 174.55966186523438, 220.81268310546875, -78.41500854492188, 313.55401611328125, -14.058389663696289, 124.03752899169922, 216.1746063232422, 592.3057861328125, 190.05690002441406, 54.31036376953125, -231.52284240722656, 301.1519775390625, -59.82365417480469, 127.94709014892578, 552.8311157226562, 285.5797424316406, 132.49990844726562, 37.98023223876953, 235.177978515625, 16.182952880859375, 124.096435546875, -28.610050201416016, 54.36159896850586, 301.14935302734375, 53.89108657836914, 145.19357299804688, 369.89166259765625, 379.33575439453125, 275.14642333984375, 311.6306457519531, -8.664134979248047, 225.17539978027344, 64.10250854492188, 155.82177734375, 142.0766143798828, 69.36833953857422, 295.9276428222656, -34.904022216796875, 177.52081298828125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000212.npy"}
{"epoch": 0.31130690161527164, "step": 213, "batch_size": 64, "mean": 126.71992492675781, "std": 155.74801635742188, "min": -206.3963623046875, "p10": -67.17323608398436, "median": 107.84010696411133, "p90": 344.79062194824223, "max": 447.5928039550781, "pos_frac": 0.78125, "sample": [283.3516845703125, -91.72689819335938, 31.77922821044922, -105.63555145263672, 213.4770965576172, 15.739730834960938, 56.05200958251953, 98.36466979980469, -206.3963623046875, -81.46707153320312, 10.338842391967773, 324.90020751953125, 366.9107971191406, -40.524192810058594, 78.00859832763672, 171.826904296875, -8.718114852905273, 64.13648986816406, 175.32322692871094, 322.46673583984375, 427.36431884765625, 319.0301818847656, 52.40833282470703, 415.9836730957031, 185.1082763671875, 280.68365478515625, 376.4698486328125, -16.893802642822266, -9.432411193847656, -3.531322479248047, 184.0911865234375, 123.5330581665039, 207.80084228515625, 117.31554412841797, -51.693939208984375, 11.061492919921875, 160.13186645507812, 338.14495849609375, 289.7763366699219, 138.4757080078125, -10.866939544677734, 193.37120056152344, 347.6387634277344, -73.80722045898438, 222.03517150878906, 61.031005859375, 182.29946899414062, 447.5928039550781, -143.4974365234375, 202.88888549804688, 253.30569458007812, 307.43988037109375, 358.9293212890625, 41.3531379699707, 13.268447875976562, 10.400405883789062, -129.76324462890625, 94.85263061523438, 93.3882827758789, 63.009437561035156, 21.109031677246094, 29.383453369140625, 171.3324432373047, 129.34490966796875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000213.npy"}
{"epoch": 0.31277533039647576, "step": 214, "batch_size": 64, "mean": 112.87818145751953, "std": 148.93080139160156, "min": -186.34564208984375, "p10": -45.725853729248044, "median": 84.60902786254883, "p90": 294.9085571289063, "max": 507.98028564453125, "pos_frac": 0.78125, "sample": [51.19098663330078, 396.8843688964844, 21.297237396240234, 190.7293701171875, -23.02754783630371, -8.45145034790039, -86.25004577636719, -186.34564208984375, 272.3131103515625, -81.73648071289062, 160.30809020996094, 40.99897003173828, 98.9674301147461, -0.776214599609375, 26.291671752929688, 113.5869140625, 231.36624145507812, -28.930437088012695, 15.255050659179688, 192.0999755859375, 68.296142578125, 71.07662200927734, 103.97896575927734, 274.1802062988281, 275.69140625, 75.43211364746094, 5.994239807128906, 1.4481868743896484, 127.6810302734375, -44.48682403564453, 93.38169860839844, 31.363197326660156, 10.837333679199219, 345.86566162109375, -74.40656280517578, 502.0473937988281, 339.8128356933594, 5.4247283935546875, 25.146377563476562, 409.8414611816406, 226.48924255371094, 207.751708984375, 263.59552001953125, 297.0558166503906, -142.7261505126953, 507.98028564453125, 24.8785457611084, 193.96170043945312, 219.91171264648438, 105.77690887451172, -53.282859802246094, 184.02967834472656, -40.81023406982422, 116.6573486328125, -46.256866455078125, 230.42074584960938, 31.931568145751953, 289.8982849121094, 75.83635711669922, 170.22860717773438, 123.86007690429688, 166.55213928222656, -43.93315124511719, 70.01895141601562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000214.npy"}
{"epoch": 0.3142437591776799, "step": 215, "batch_size": 64, "mean": 119.77651977539062, "std": 140.6836395263672, "min": -147.9462432861328, "p10": -67.3736099243164, "median": 100.90655899047852, "p90": 303.9629272460938, "max": 516.3278198242188, "pos_frac": 0.828125, "sample": [70.69482421875, 340.7974548339844, 120.8011474609375, 263.588623046875, -65.1783447265625, -83.99679565429688, 43.90532684326172, 118.0261459350586, 134.5604705810547, 100.7534408569336, 145.19252014160156, 45.07206726074219, -0.11704063415527344, -35.85509490966797, 428.00189208984375, 261.1252746582031, 251.59423828125, 300.2886962890625, 516.3278198242188, 156.806396484375, 101.05967712402344, 423.65460205078125, 122.7305679321289, -79.85380554199219, 73.08589172363281, 197.11288452148438, -68.31443786621094, 67.62567901611328, 184.19606018066406, 247.95013427734375, 33.292076110839844, 84.18385314941406, 49.348236083984375, -1.083120346069336, 157.0243377685547, 88.21131896972656, -132.83050537109375, 142.9355926513672, 195.39120483398438, 265.1974792480469, 341.2580261230469, 94.40210723876953, 202.82740783691406, 3.145965576171875, 103.50057983398438, 50.75367736816406, 43.70502471923828, -78.57075500488281, 56.84614181518555, 235.96005249023438, -147.9462432861328, 135.1861572265625, 427.662353515625, 31.17040252685547, 61.16429138183594, 92.261962890625, 305.53759765625, 11.732833862304688, 200.7524871826172, 40.11151885986328, 106.87875366210938, 60.979759216308594, -89.14616394042969, 112.21646881103516], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000215.npy"}
{"epoch": 0.315712187958884, "step": 216, "batch_size": 64, "mean": 117.78302001953125, "std": 168.61341857910156, "min": -277.1417236328125, "p10": -71.50003280639649, "median": 90.3866195678711, "p90": 354.4610992431641, "max": 542.0528564453125, "pos_frac": 0.75, "sample": [218.27560424804688, 407.83819580078125, -74.56194305419922, 148.56951904296875, 210.73837280273438, 38.68901062011719, -183.80894470214844, 151.40667724609375, 211.53350830078125, 2.8559646606445312, 542.0528564453125, 94.90013122558594, 106.86209106445312, -72.0047607421875, -26.864826202392578, -17.141143798828125, -89.120361328125, 218.62542724609375, 37.4261589050293, -178.24935913085938, 4.4962158203125, 327.27252197265625, -14.796722412109375, -24.95263671875, 85.87310791015625, 152.74232482910156, 53.510040283203125, 195.6692657470703, 112.68707275390625, -20.865144729614258, 115.961669921875, 201.23257446289062, 23.473434448242188, -44.632598876953125, 402.4170837402344, 355.48455810546875, -59.30943298339844, 288.0359191894531, 41.90516662597656, 66.5193099975586, 51.01685333251953, 262.8636169433594, 10.341318130493164, -277.1417236328125, 73.223876953125, 189.03564453125, 230.21640014648438, 396.20947265625, 48.25175476074219, -70.32233428955078, 275.53643798828125, 48.3895263671875, 124.55033111572266, 171.31448364257812, -34.88361358642578, 344.7081298828125, 203.90985107421875, 342.41497802734375, 435.39935302734375, 386.0906982421875, 18.67913818359375, -78.60227966308594, 24.09198760986328, 352.0730285644531], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000216.npy"}
{"epoch": 0.31718061674008813, "step": 217, "batch_size": 64, "mean": 137.91058349609375, "std": 156.93167114257812, "min": -180.24644470214844, "p10": -27.592856597900372, "median": 99.55279159545898, "p90": 365.9270385742188, "max": 541.899169921875, "pos_frac": 0.84375, "sample": [156.417236328125, 137.77821350097656, 97.79817962646484, 142.73011779785156, 60.68334197998047, 62.89746856689453, 59.907691955566406, 235.1119842529297, 68.79576873779297, 91.11749267578125, 52.411651611328125, 371.38134765625, 147.71054077148438, 353.2003173828125, -118.20142364501953, -0.9731826782226562, 34.83906555175781, -2.74908447265625, 202.22259521484375, 541.899169921875, 7.629375457763672, 66.30915069580078, 272.668701171875, 430.1499938964844, 108.42852020263672, 207.34329223632812, -38.47022247314453, 75.09255981445312, -180.24644470214844, 103.46728515625, 486.3850402832031, 199.4091796875, 4.731657028198242, 71.83164978027344, 260.0572509765625, 1.7661590576171875, 237.75640869140625, 272.9080505371094, -10.796493530273438, 506.1669921875, 439.02752685546875, -98.27436065673828, 321.947509765625, 273.78936767578125, 142.2982940673828, 480.7908630371094, 15.773361206054688, 101.30740356445312, 75.62328338623047, 65.08401489257812, 67.07957458496094, 169.21530151367188, 255.70046997070312, 5.423906326293945, 148.33384704589844, 71.12562561035156, -38.92473602294922, -34.791297912597656, 32.43207550048828, -40.78611755371094, 137.0716094970703, 221.01707458496094, 214.39967346191406, 24.04620933532715], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000217.npy"}
{"epoch": 0.3186490455212922, "step": 218, "batch_size": 64, "mean": 106.35857391357422, "std": 160.5690460205078, "min": -251.99954223632812, "p10": -79.14162521362304, "median": 90.34661102294922, "p90": 319.0970703125, "max": 583.5761108398438, "pos_frac": 0.75, "sample": [89.16477966308594, 61.363075256347656, 183.89215087890625, 244.9001922607422, 149.04653930664062, 27.35702896118164, 344.8136901855469, 68.90756225585938, 12.994955062866211, 62.04676055908203, 167.83555603027344, 56.968505859375, -22.07213592529297, 125.49333190917969, 435.7431640625, 180.0603790283203, 122.03009033203125, -18.7208251953125, 135.96380615234375, 183.31040954589844, 42.5606689453125, 54.20153045654297, 1.0635948181152344, 232.44338989257812, -80.59169006347656, -251.99954223632812, 316.4117431640625, 100.89445495605469, 188.96875, 189.83108520507812, 33.26756286621094, 421.0712890625, 428.70361328125, 219.91522216796875, 200.626220703125, 64.6864013671875, -20.073720932006836, -20.67930793762207, 95.40201568603516, -36.608341217041016, -30.659225463867188, -94.91180419921875, 143.36044311523438, -112.677001953125, -73.97399139404297, -50.55097198486328, 583.5761108398438, 320.2479248046875, 196.7208709716797, -75.75814056396484, -134.5169677734375, 236.3192596435547, 444.25140380859375, 11.920206069946289, 47.50343704223633, 110.46728515625, 158.6813507080078, 91.5284423828125, 39.43769836425781, -147.252685546875, 75.76294708251953, -129.36231994628906, 295.906982421875, 109.73297119140625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000218.npy"}
{"epoch": 0.3201174743024963, "step": 219, "batch_size": 64, "mean": 93.80690002441406, "std": 175.19166564941406, "min": -414.2403259277344, "p10": -99.40458602905272, "median": 73.01972198486328, "p90": 332.395687866211, "max": 482.9246826171875, "pos_frac": 0.765625, "sample": [455.25433349609375, 159.79205322265625, 5.183858871459961, 315.10894775390625, 3.1128406524658203, 466.278564453125, -414.2403259277344, 17.42861557006836, 35.69866943359375, 300.6694030761719, -9.16253662109375, 103.39359283447266, 209.76397705078125, 457.64215087890625, 482.9246826171875, 155.66488647460938, 62.64604949951172, 272.9610900878906, 116.682373046875, 111.49139404296875, 269.4841003417969, 34.53788757324219, -125.19812774658203, 7.5866241455078125, -68.4062271118164, -62.20983123779297, 339.8042907714844, -4.593250274658203, 357.0217590332031, 387.337646484375, 38.615264892578125, 308.95782470703125, -0.5721282958984375, 55.17034912109375, 20.256759643554688, 173.52944946289062, 112.65413665771484, 20.44689178466797, 109.74577331542969, 17.4228515625, 307.3238220214844, 139.04818725585938, 96.9884033203125, -177.3638916015625, 73.48280334472656, 154.35882568359375, -182.0027618408203, 6.188371658325195, 16.734333038330078, 229.8574981689453, 29.656768798828125, -102.73201751708984, 1.6338882446289062, -59.16831588745117, -229.24220275878906, 85.5306167602539, 170.82162475585938, 84.35057067871094, -91.64057922363281, -37.49143981933594, 144.9881591796875, 72.556640625, 93.07616424560547, -123.20073699951172], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000219.npy"}
{"epoch": 0.32158590308370044, "step": 220, "batch_size": 64, "mean": 78.8044662475586, "std": 123.3990478515625, "min": -235.92947387695312, "p10": -68.47479782104492, "median": 74.85738372802734, "p90": 198.934521484375, "max": 439.0994567871094, "pos_frac": 0.75, "sample": [204.51654052734375, 49.37261962890625, 64.04432678222656, -32.178428649902344, 44.552268981933594, 316.121337890625, 343.0736389160156, 6.035778045654297, 143.6045379638672, 153.24937438964844, 121.22870635986328, 249.38037109375, 140.18002319335938, 143.087890625, -224.38131713867188, 103.92364501953125, 175.7410430908203, 36.302978515625, 131.83181762695312, 198.08340454101562, 72.88945007324219, 23.872690200805664, 4.256927490234375, -235.92947387695312, -71.5860366821289, 188.97007751464844, -25.956247329711914, 50.41644287109375, 199.29928588867188, 66.51788330078125, -61.215240478515625, 160.90257263183594, 126.92640686035156, -26.549240112304688, 224.48988342285156, 133.82176208496094, 439.0994567871094, 169.8726043701172, -4.483558654785156, 111.03072357177734, 17.056711196899414, 52.783634185791016, 0.30471229553222656, 110.84968566894531, -91.4053726196289, 166.2894287109375, 63.359222412109375, 131.0540008544922, -131.47413635253906, 76.8253173828125, 65.86528015136719, -1.4731063842773438, 146.11172485351562, -23.14396858215332, 31.59476089477539, 156.93223571777344, 161.62086486816406, -52.13878631591797, 179.08401489257812, 180.79690551757812, -16.668224334716797, 147.56539916992188, -103.734375, -138.98696899414062], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000220.npy"}
{"epoch": 0.32305433186490456, "step": 221, "batch_size": 64, "mean": 166.45452880859375, "std": 158.01333618164062, "min": -181.23724365234375, "p10": -29.948572921752927, "median": 185.2666778564453, "p90": 368.79209289550784, "max": 538.8380126953125, "pos_frac": 0.828125, "sample": [75.0886001586914, 107.96267700195312, 194.78822326660156, -4.8841400146484375, 98.18412780761719, 538.8380126953125, 18.205299377441406, 374.38140869140625, 226.11459350585938, 264.48193359375, 288.6365051269531, 18.527530670166016, 293.43145751953125, 230.41976928710938, 154.12158203125, 261.67388916015625, -3.73974609375, 438.8640441894531, -11.183839797973633, 92.40230560302734, 218.28872680664062, 298.03094482421875, 356.03265380859375, 453.26171875, 458.85760498046875, 233.52166748046875, 227.46664428710938, -181.23724365234375, 52.86549377441406, 66.5597152709961, 166.02041625976562, 194.95872497558594, -32.79133605957031, 63.828758239746094, 63.67063903808594, 176.67910766601562, 106.19812774658203, -133.09249877929688, 252.62335205078125, 367.38818359375, -43.139610290527344, 288.2615966796875, 227.94847106933594, 310.4220886230469, 120.69610595703125, 220.78036499023438, 32.62107849121094, 369.3937683105469, 317.2322692871094, 222.5904541015625, -28.53628158569336, -99.0315170288086, -30.55384063720703, 252.52413940429688, 49.21244812011719, -68.76136779785156, 29.0645694732666, 117.04134368896484, 49.13692092895508, 193.854248046875, 302.3905029296875, 91.10881042480469, 468.83837890625, 194.5488739013672], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000221.npy"}
{"epoch": 0.3245227606461087, "step": 222, "batch_size": 64, "mean": 125.51266479492188, "std": 157.9297332763672, "min": -205.81304931640625, "p10": -51.96143417358398, "median": 115.59663009643555, "p90": 363.49197387695324, "max": 531.4973754882812, "pos_frac": 0.78125, "sample": [-53.70178985595703, 437.7499084472656, -6.5364532470703125, -121.13656616210938, -18.345481872558594, -84.37783813476562, -13.124420166015625, 115.42887878417969, 7.635162353515625, 203.96542358398438, 103.49605560302734, 117.78047943115234, 422.01495361328125, 94.03096008300781, 121.1189956665039, 227.75656127929688, -205.81304931640625, 154.36758422851562, 257.0884704589844, 174.5612335205078, 157.40599060058594, 98.02667236328125, -190.9493408203125, 320.5381164550781, -120.0545425415039, 265.77032470703125, 490.82537841796875, 87.19354248046875, 30.518165588378906, 17.48468780517578, 152.26010131835938, 23.142501831054688, 185.1914520263672, -47.900604248046875, 194.9876251220703, 197.6537322998047, 44.009864807128906, 168.67520141601562, -9.623401641845703, 209.2762451171875, -28.50281524658203, 92.16181945800781, 84.74213409423828, 59.87202453613281, 329.8634338378906, 251.01837158203125, 115.7643814086914, 249.33395385742188, 146.9430389404297, 51.286598205566406, -27.170516967773438, 160.85653686523438, 379.551025390625, 61.74911117553711, 377.9042053222656, 175.11984252929688, 127.91395568847656, -107.07151794433594, 209.57000732421875, 21.21294403076172, 531.4973754882812, 55.38232421875, 405.43597412109375, 99.98560333251953], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000222.npy"}
{"epoch": 0.32599118942731276, "step": 223, "batch_size": 64, "mean": 107.08901977539062, "std": 174.6146697998047, "min": -318.8013610839844, "p10": -118.61705780029295, "median": 108.57197952270508, "p90": 309.4934814453125, "max": 600.4367065429688, "pos_frac": 0.75, "sample": [63.89247131347656, -66.41497039794922, 210.12954711914062, 137.03305053710938, 132.5582275390625, 377.72125244140625, 188.22650146484375, 104.04956817626953, 190.9393768310547, 153.37106323242188, -147.0413818359375, 54.195953369140625, 90.05616760253906, 234.04910278320312, 294.3536376953125, 303.09649658203125, 142.37176513671875, 87.75862121582031, 507.7491455078125, -18.550914764404297, 153.20077514648438, -60.638343811035156, 94.03565979003906, -51.81138229370117, -56.732948303222656, 291.7929382324219, -125.39967346191406, 357.1236877441406, 100.08580780029297, -124.8611831665039, 8.640960693359375, 19.962997436523438, 259.82733154296875, -318.8013610839844, 113.09439086914062, 192.101806640625, -185.45921325683594, 216.52236938476562, 71.06452941894531, 600.4367065429688, 249.61912536621094, -104.04743194580078, 155.35618591308594, 263.7308044433594, -192.60845947265625, -50.74516677856445, 281.5970153808594, 323.540771484375, 139.58642578125, -23.39287567138672, 50.64537811279297, 46.74549102783203, 58.31048583984375, 412.01953125, 24.665748596191406, 18.77318572998047, -13.650823593139648, 5.880727767944336, 146.28440856933594, 137.04458618164062, 114.17742156982422, 162.31427001953125, -258.1153259277344, 312.23504638671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000223.npy"}
{"epoch": 0.3274596182085169, "step": 224, "batch_size": 64, "mean": 122.58447265625, "std": 180.65655517578125, "min": -352.183837890625, "p10": -67.00712509155274, "median": 66.29911041259766, "p90": 367.32133483886724, "max": 662.0756225585938, "pos_frac": 0.8125, "sample": [241.29957580566406, 3.097412109375, 139.1610870361328, 38.53070831298828, -80.44908142089844, 532.8287353515625, 238.22962951660156, 32.360130310058594, 65.66683197021484, -352.183837890625, 52.849891662597656, 199.52647399902344, 144.45372009277344, 156.56875610351562, 427.99822998046875, 93.78768920898438, 62.290489196777344, 231.62318420410156, 199.62490844726562, 662.0756225585938, 281.2614440917969, 118.50813293457031, 355.4670715332031, 237.96783447265625, -142.501220703125, -77.84161376953125, 269.3603210449219, 66.36494445800781, 221.07537841796875, -17.906112670898438, 25.104476928710938, 41.71746826171875, 37.353858947753906, 602.3603515625, 266.5359802246094, 240.41909790039062, 119.66922760009766, -68.61588287353516, 18.45045280456543, 30.876506805419922, 231.74685668945312, 31.4486083984375, 48.0615234375, -24.188148498535156, 102.39968872070312, -80.21613311767578, -61.47357177734375, 1.2209014892578125, -101.43684387207031, -1.1430397033691406, 178.12855529785156, 116.46955871582031, 372.4017333984375, 18.89592170715332, 8.55938720703125, 66.2332763671875, 4.870758056640625, 318.0808410644531, 37.76432800292969, -63.25335693359375, 82.73468780517578, 417.99957275390625, 411.0788269042969, 14.054195404052734], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000224.npy"}
{"epoch": 0.328928046989721, "step": 225, "batch_size": 64, "mean": 143.213134765625, "std": 177.7547607421875, "min": -244.872802734375, "p10": -86.73645706176758, "median": 150.50027465820312, "p90": 390.70139465332034, "max": 670.456787109375, "pos_frac": 0.734375, "sample": [241.75094604492188, 162.50607299804688, 403.7950439453125, 345.1116027832031, 50.329505920410156, 112.25138092041016, 36.803985595703125, -102.13580322265625, -135.48077392578125, 429.13531494140625, 385.6968688964844, 267.4292907714844, -141.4087371826172, -85.93944549560547, 289.570556640625, -27.571361541748047, -113.12877655029297, -22.57645606994629, -18.937450408935547, 58.14906311035156, 111.6953125, 143.72238159179688, 222.9296875, 182.18594360351562, -9.530168533325195, 158.11251831054688, 55.53759765625, 164.8310089111328, -23.543514251708984, 253.49896240234375, 304.3353271484375, 161.40933227539062, 217.6088409423828, 105.4266586303711, -26.82107925415039, 463.58013916015625, 438.26251220703125, -12.175453186035156, 240.7454833984375, 396.136962890625, 97.24650573730469, 319.9232177734375, 180.7147674560547, -34.07353973388672, 156.0142059326172, 133.40213012695312, 670.456787109375, 279.91717529296875, 152.108154296875, 77.18910217285156, 392.84619140625, 16.98687744140625, 230.5713348388672, -101.204345703125, -87.07803344726562, 375.1927490234375, 284.51806640625, 172.21292114257812, 178.8279266357422, -244.872802734375, 122.83858489990234, 148.89239501953125, -45.668617248535156, 5.38043212890625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000225.npy"}
{"epoch": 0.3303964757709251, "step": 226, "batch_size": 64, "mean": 116.85462951660156, "std": 168.7810516357422, "min": -211.2891387939453, "p10": -38.55992164611815, "median": 70.73094177246094, "p90": 329.4973480224609, "max": 621.9408569335938, "pos_frac": 0.765625, "sample": [621.9408569335938, -153.62030029296875, -45.03139877319336, 22.48895263671875, -13.198482513427734, 173.23963928222656, -4.74151611328125, 67.19624328613281, 175.71835327148438, -18.35787582397461, 116.9073257446289, 16.06884765625, -18.34137725830078, 39.966094970703125, -164.4822235107422, 106.39561462402344, -77.22795104980469, 330.205078125, 317.5128479003906, 37.735130310058594, 334.09869384765625, 327.8459777832031, 69.9970474243164, 125.16802978515625, 187.0522918701172, 295.828125, 30.475852966308594, -23.459808349609375, 125.60406494140625, 435.1475524902344, 298.75628662109375, -65.4305648803711, 187.9517822265625, 78.97301483154297, 54.042144775390625, 530.447021484375, 224.90145874023438, 513.1910400390625, 269.9289245605469, -10.520454406738281, -1.9998626708984375, 139.64044189453125, -211.2891387939453, 155.91976928710938, 41.78630447387695, 108.40574645996094, 15.360755920410156, 9.092132568359375, 9.430078506469727, 71.46483612060547, -5.789276123046875, 250.0124969482422, 109.23468017578125, 186.8406982421875, 66.0437240600586, 122.40489196777344, -94.8341064453125, 7.079408645629883, 505.49957275390625, 52.654075622558594, 38.67908477783203, 145.04588317871094, 25.734832763671875, 211.90658569335938], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000226.npy"}
{"epoch": 0.33186490455212925, "step": 227, "batch_size": 64, "mean": 143.24203491210938, "std": 170.77151489257812, "min": -367.3310546875, "p10": -24.455449485778793, "median": 115.45930480957031, "p90": 355.64946899414065, "max": 625.7612915039062, "pos_frac": 0.875, "sample": [308.1547546386719, 101.47787475585938, 134.93829345703125, 390.72955322265625, 227.30752563476562, 41.898681640625, 61.42835998535156, 109.43798065185547, 312.49603271484375, 3.6858673095703125, 110.97949981689453, 118.2532958984375, 182.36322021484375, 21.377805709838867, -31.255054473876953, 27.58733367919922, 166.47116088867188, 97.40516662597656, 201.4794158935547, 625.7612915039062, 153.2217254638672, 87.70001220703125, 311.08331298828125, 214.73158264160156, 60.24720001220703, 281.533447265625, 173.44903564453125, 248.640380859375, 106.2119140625, 97.23887634277344, 203.43165588378906, 54.53874206542969, 337.6263732910156, 5.622398376464844, -81.96977233886719, -107.92436218261719, 192.65036010742188, 216.71206665039062, 490.5748291015625, 117.62960815429688, 190.5015106201172, 423.51513671875, -107.55702209472656, 574.0459594726562, 9.366376876831055, 20.39702606201172, 187.86846923828125, 35.979469299316406, 216.48291015625, 36.37144470214844, 130.3993377685547, 355.91253662109375, 113.28900146484375, 10.073196411132812, 4.661561965942383, -49.503021240234375, 355.03564453125, -367.3310546875, -8.589704513549805, 79.6055908203125, -56.06354522705078, 25.283517837524414, 132.46139526367188, 480.3571472167969], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000227.npy"}
{"epoch": 0.3333333333333333, "step": 228, "batch_size": 64, "mean": 102.19867706298828, "std": 163.6630859375, "min": -259.48065185546875, "p10": -101.75337677001951, "median": 106.22762680053711, "p90": 316.8976745605469, "max": 495.34686279296875, "pos_frac": 0.734375, "sample": [-174.28610229492188, -235.90692138671875, -259.48065185546875, 229.78030395507812, 148.10760498046875, 318.6103515625, -191.55789184570312, 156.2767791748047, 203.13766479492188, -158.74241638183594, 19.596954345703125, 286.8226013183594, 495.34686279296875, 312.90142822265625, 426.0079345703125, 318.7088623046875, 363.60321044921875, 165.94969177246094, 123.67524719238281, -33.88053894042969, -41.55351257324219, 302.4798583984375, 108.39563751220703, 321.52935791015625, 177.83090209960938, 156.3037567138672, 126.68785095214844, 19.145896911621094, -90.73668670654297, 115.61686706542969, 40.58052062988281, 201.00314331054688, 2.7233428955078125, 194.59881591796875, 223.493896484375, 78.20252227783203, 124.3144302368164, 78.62008666992188, 31.400131225585938, 28.815826416015625, 84.86962127685547, 244.04263305664062, -82.04605102539062, -12.926681518554688, 104.05961608886719, 91.61287689208984, 436.0971374511719, 23.843795776367188, 87.31373596191406, 55.36945343017578, 265.6273193359375, -18.393314361572266, -23.55760955810547, 192.01480102539062, 228.8992462158203, 164.53970336914062, 204.4186553955078, 110.94734954833984, -106.47481536865234, -24.531524658203125, -20.01111602783203, 34.07244873046875, -161.71176147460938, -51.483558654785156], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000228.npy"}
{"epoch": 0.33480176211453744, "step": 229, "batch_size": 64, "mean": 111.81390380859375, "std": 160.08636474609375, "min": -376.68438720703125, "p10": -49.133943939208976, "median": 95.05244064331055, "p90": 344.1722442626953, "max": 539.343994140625, "pos_frac": 0.75, "sample": [-5.2833709716796875, 15.043432235717773, -111.71514892578125, 410.9207763671875, 48.15987014770508, -187.82403564453125, 113.50873565673828, 185.3707733154297, 85.54890441894531, 205.78732299804688, 258.3521423339844, 345.44219970703125, 103.84405517578125, 367.2754821777344, 88.73002624511719, 99.0852279663086, 221.20578002929688, 110.84862518310547, 54.13162612915039, 341.2090148925781, -82.73428344726562, 60.37078857421875, -1.2993354797363281, 228.08265686035156, 126.05828094482422, 46.55787658691406, 539.343994140625, -159.71221923828125, 44.07087707519531, 238.60516357421875, 161.5994873046875, 255.65243530273438, 185.52984619140625, 366.3289794921875, 348.890380859375, 51.86383819580078, 315.6781005859375, 25.20787811279297, 91.0196533203125, -51.816619873046875, 84.18256378173828, 134.74522399902344, 20.086570739746094, 202.7525634765625, -42.874366760253906, 158.75558471679688, 192.3365478515625, 217.06275939941406, -31.556732177734375, -124.34001159667969, 121.0687484741211, 435.48382568359375, 31.801237106323242, 86.26737976074219, 84.72309112548828, 149.0784454345703, -12.561897277832031, 171.12396240234375, -15.017227172851562, 181.96292114257812, -376.68438720703125, -0.9666080474853516, -16.801925659179688, -33.477638244628906], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000229.npy"}
{"epoch": 0.33627019089574156, "step": 230, "batch_size": 64, "mean": 162.08526611328125, "std": 160.9262237548828, "min": -297.5741882324219, "p10": -23.03276672363281, "median": 151.3130340576172, "p90": 386.22105407714844, "max": 482.3273620605469, "pos_frac": 0.828125, "sample": [71.50836181640625, 375.8487854003906, -9.542707443237305, 285.7841796875, 218.53463745117188, 79.66248321533203, -19.241670608520508, 247.88221740722656, 105.2850341796875, 135.234375, 200.0229949951172, 152.10885620117188, -20.769569396972656, 230.720458984375, 18.469100952148438, 112.09390258789062, -51.086585998535156, 87.1533203125, 97.10244750976562, 137.12432861328125, 61.5643310546875, -105.45133972167969, -24.002708435058594, 205.92825317382812, 139.90390014648438, 171.53948974609375, 203.9388885498047, 363.48052978515625, 278.4597473144531, 163.88214111328125, 79.87334442138672, 25.513160705566406, 329.7965087890625, 477.7044982910156, 150.5172119140625, -100.93929290771484, 131.4501953125, 177.49258422851562, 244.56124877929688, 453.59454345703125, -39.77840042114258, -8.697891235351562, -297.5741882324219, -36.069801330566406, 32.83204650878906, 53.340309143066406, 384.9137878417969, 112.34529113769531, 377.11859130859375, 442.573486328125, 159.5625457763672, 93.86668395996094, 106.84892272949219, 20.188844680786133, 402.42169189453125, 291.131591796875, 482.3273620605469, 221.6708526611328, 386.78131103515625, 206.31825256347656, 224.0175323486328, 157.97958374023438, 239.8410186767578, 476.79522705078125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000230.npy"}
{"epoch": 0.3377386196769457, "step": 231, "batch_size": 64, "mean": 112.35234069824219, "std": 151.80450439453125, "min": -128.252685546875, "p10": -44.86960563659668, "median": 84.81239318847656, "p90": 343.67593688964854, "max": 635.993408203125, "pos_frac": 0.796875, "sample": [441.6279296875, 87.39410400390625, -49.848411560058594, 268.75775146484375, 11.111824035644531, 77.70948791503906, 351.7035827636719, -2.83343505859375, 67.07001495361328, 112.63895416259766, 99.79048156738281, 352.2049865722656, -68.22505187988281, 74.52346801757812, 486.4841613769531, 132.66346740722656, 192.32032775878906, 82.27816772460938, 146.42938232421875, 216.62991333007812, 362.3668518066406, 63.068485260009766, 108.28543853759766, 32.31941223144531, 38.18870544433594, -70.24588775634766, 324.94476318359375, -45.315006256103516, 209.91952514648438, 635.993408203125, 443.0706481933594, 251.94747924804688, 124.41485595703125, 26.73781394958496, -128.252685546875, 70.95985412597656, 98.08734130859375, 167.45999145507812, 87.34661865234375, -40.911376953125, -20.936798095703125, 32.331642150878906, 220.83514404296875, 73.62242126464844, -35.47899627685547, 126.24645233154297, -43.83033752441406, -113.79752349853516, 55.978328704833984, 122.63043212890625, 294.1265869140625, 159.32131958007812, 71.91698455810547, 2.8013572692871094, -43.17776870727539, 123.56149291992188, -96.02255249023438, 109.4429931640625, 24.436676025390625, 2.875579833984375, 10.949020385742188, 140.50254821777344, 98.75947570800781, 32.63746643066406], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000231.npy"}
{"epoch": 0.3392070484581498, "step": 232, "batch_size": 64, "mean": 92.69305419921875, "std": 156.90338134765625, "min": -166.30418395996094, "p10": -75.75656585693359, "median": 59.21536445617676, "p90": 313.11934814453144, "max": 591.14501953125, "pos_frac": 0.734375, "sample": [186.71707153320312, 99.66027069091797, 9.800382614135742, 45.103187561035156, 14.6893310546875, -131.48208618164062, 348.9090881347656, 256.6377868652344, -29.643112182617188, 251.958984375, 53.44641876220703, 404.23858642578125, 82.21257781982422, -76.55785369873047, -24.984176635742188, 66.83333587646484, -17.001876831054688, 189.39398193359375, -102.02903747558594, 69.46500396728516, 140.63168334960938, 566.4894409179688, 65.90463256835938, 76.12197875976562, 120.47284698486328, -33.42787551879883, 591.14501953125, -19.514236450195312, 39.05279541015625, 258.81317138671875, 140.16720581054688, 234.05850219726562, -37.14727783203125, 78.30865478515625, -80.8848876953125, 105.69134521484375, -101.00629425048828, 22.944637298583984, -52.22785949707031, 423.4371337890625, 19.211181640625, -73.88689422607422, 1.5697402954101562, -93.41731262207031, 11.567768096923828, 89.63236999511719, 40.4715576171875, -166.30418395996094, 270.4801025390625, 95.62518310546875, 331.393310546875, 23.789684295654297, 0.390655517578125, 332.51409912109375, 227.63531494140625, -21.00120735168457, 112.0014419555664, 246.02410888671875, -23.57784080505371, 50.891883850097656, 58.58207321166992, 35.64011001586914, 66.87510681152344, 59.848655700683594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000232.npy"}
{"epoch": 0.3406754772393539, "step": 233, "batch_size": 64, "mean": 143.58230590820312, "std": 210.76519775390625, "min": -246.95950317382812, "p10": -67.23311500549315, "median": 99.38200378417969, "p90": 460.78534240722666, "max": 661.8760986328125, "pos_frac": 0.71875, "sample": [16.432985305786133, 8.55142593383789, 490.86114501953125, -34.40376281738281, 190.03619384765625, 203.37710571289062, -182.2168426513672, -116.79549407958984, 5.3361663818359375, -188.15257263183594, 114.11482238769531, 55.073699951171875, 160.8305206298828, 274.1110534667969, 269.25738525390625, 661.8760986328125, 121.48188018798828, 79.144775390625, 517.78271484375, 325.60198974609375, 49.45375061035156, 180.00880432128906, 77.77822875976562, 489.9570617675781, 30.46587371826172, 273.98846435546875, -0.09967803955078125, -71.68695068359375, -223.94696044921875, -55.93387222290039, 160.4048309326172, 499.4287414550781, 190.42855834960938, 154.26303100585938, 304.75640869140625, 472.7064514160156, 40.08898162841797, 406.03826904296875, -43.65327453613281, 53.81416320800781, 119.66216278076172, 382.1319580078125, -17.076854705810547, 306.8420104980469, 47.79974365234375, 345.24444580078125, -56.8408317565918, -246.95950317382812, -154.57591247558594, -20.558837890625, 48.772430419921875, -21.316001892089844, 341.7275390625, 84.64918518066406, -39.2154541015625, 209.7598419189453, -24.19558334350586, 432.96942138671875, -37.980472564697266, 640.7597045898438, 362.9556884765625, 26.402633666992188, 302.441650390625, 195.3072509765625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000233.npy"}
{"epoch": 0.342143906020558, "step": 234, "batch_size": 64, "mean": 151.24588012695312, "std": 165.96826171875, "min": -153.62782287597656, "p10": -51.829926300048825, "median": 135.64238739013672, "p90": 375.6836700439454, "max": 622.31689453125, "pos_frac": 0.78125, "sample": [-70.4498062133789, 266.350341796875, 266.3032531738281, 232.89111328125, 320.9425964355469, 88.76959991455078, 160.79574584960938, 168.66455078125, 387.4712219238281, -4.80463981628418, 269.5440368652344, 135.7439422607422, -153.62782287597656, -0.3601646423339844, 89.1656723022461, 254.24337768554688, 68.04011535644531, -50.09833526611328, 92.58990478515625, 313.73345947265625, 597.2789306640625, 125.01062774658203, -16.095544815063477, -27.73047637939453, 72.94503021240234, -53.01031494140625, 192.95166015625, 339.806640625, 4.384666442871094, 348.17938232421875, -10.832452774047852, 135.54083251953125, 436.8033447265625, 140.05572509765625, 479.267578125, 119.35910034179688, 42.870872497558594, 167.71905517578125, 132.35231018066406, 259.0667724609375, 260.2250671386719, 156.78794860839844, 218.44149780273438, 74.73491668701172, 19.26268768310547, 73.03402709960938, -88.18804931640625, -12.324668884277344, 243.1707763671875, 107.77253723144531, 107.49756622314453, 396.7967529296875, 622.31689453125, -69.7520523071289, 300.9806213378906, 156.21234130859375, -52.57203674316406, 3.881277084350586, 182.2481231689453, 49.75199890136719, 136.61691284179688, -110.252685546875, 395.1474914550781, 186.11407470703125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000234.npy"}
{"epoch": 0.3436123348017621, "step": 235, "batch_size": 64, "mean": 98.7876205444336, "std": 166.37889099121094, "min": -333.6348876953125, "p10": -79.74918899536132, "median": 108.0271224975586, "p90": 271.92534484863285, "max": 551.7841186523438, "pos_frac": 0.75, "sample": [27.963912963867188, 240.42056274414062, -333.6348876953125, 23.37078857421875, 169.15585327148438, 501.159912109375, 137.52239990234375, 306.0353698730469, -62.13376235961914, 178.9445037841797, -158.49717712402344, -17.965423583984375, 143.69320678710938, 246.35389709472656, 43.721435546875, 33.71092224121094, -83.53407287597656, 11.787849426269531, 93.0745849609375, -259.1965026855469, 4.085357666015625, 21.914031982421875, -26.03076934814453, 46.250465393066406, 355.2855529785156, 224.8217315673828, -34.103851318359375, -62.74481201171875, 149.934814453125, 219.51893615722656, 128.20071411132812, 118.72373962402344, 276.8941955566406, -92.73516845703125, 97.33050537109375, 211.0316619873047, 15.90461540222168, 244.91064453125, 21.334487915039062, 154.77200317382812, 33.45258712768555, 79.52133178710938, 497.0391540527344, 231.71844482421875, 196.9048614501953, 161.1916961669922, 167.25411987304688, -51.116493225097656, -134.9364013671875, 133.29502868652344, 551.7841186523438, 260.33135986328125, -131.40664672851562, 2.2572021484375, 123.56422424316406, -0.8614788055419922, -70.91779327392578, 178.24957275390625, 121.48336791992188, 13.58510971069336, 189.36172485351562, 142.8795166015625, 371.9867858886719, -61.46580505371094], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000235.npy"}
{"epoch": 0.34508076358296624, "step": 236, "batch_size": 64, "mean": 158.46571350097656, "std": 142.41896057128906, "min": -67.58405303955078, "p10": -9.63164138793945, "median": 131.0281982421875, "p90": 377.5979156494141, "max": 501.4618835449219, "pos_frac": 0.875, "sample": [383.4535827636719, 64.09331512451172, 13.469039916992188, 164.79867553710938, 181.2922821044922, 131.28736877441406, 422.0817565917969, 32.71150588989258, 80.18798828125, 130.76902770996094, 198.6739501953125, 359.7029724121094, 96.25215911865234, 203.64389038085938, 327.263427734375, 295.3174133300781, 82.845947265625, 248.09095764160156, 34.727195739746094, 389.8941650390625, 147.63037109375, -13.93743896484375, 72.47747039794922, 275.48162841796875, 228.69825744628906, 13.739795684814453, 144.328857421875, 404.5909423828125, 229.66140747070312, 40.819801330566406, 501.4618835449219, 100.95587158203125, -62.82975769042969, 252.87538146972656, 14.474620819091797, 253.93011474609375, 105.9106216430664, 446.60943603515625, 97.93926239013672, -30.250675201416016, 119.65210723876953, 115.33423614501953, 44.22058868408203, 43.336830139160156, -6.975879669189453, 224.1696319580078, 128.9123992919922, 68.21585845947266, -64.84996032714844, -67.58405303955078, 142.64044189453125, 441.2151184082031, 363.9346923828125, 163.3520050048828, 193.79779052734375, 128.84310913085938, 284.4174499511719, 97.0647201538086, 5.1214447021484375, 161.73609924316406, -66.102783203125, 340.72283935546875, -10.769824981689453, 226.2759552001953], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000236.npy"}
{"epoch": 0.3465491923641703, "step": 237, "batch_size": 64, "mean": 120.41532135009766, "std": 176.66961669921875, "min": -359.19256591796875, "p10": -55.93853073120117, "median": 105.8105697631836, "p90": 368.9911163330081, "max": 674.6421508789062, "pos_frac": 0.765625, "sample": [169.7842254638672, 197.51727294921875, 201.3782958984375, 202.57244873046875, -1.3829078674316406, 106.31038665771484, 88.32495880126953, 173.92428588867188, 20.306087493896484, 184.74838256835938, 55.392189025878906, 32.337005615234375, 411.3248291015625, 139.0987548828125, 213.58285522460938, -39.34440612792969, -109.76539611816406, 161.10569763183594, -51.47211456298828, 105.31075286865234, 141.96401977539062, -87.81210327148438, 450.4293212890625, 674.6421508789062, -52.43226623535156, 398.0791320800781, 301.11907958984375, 14.651763916015625, 17.024032592773438, 28.75350570678711, 466.6488037109375, 103.97554016113281, 209.4363555908203, 212.6936798095703, 12.991462707519531, 210.19874572753906, 19.819725036621094, 134.41677856445312, 297.320068359375, -38.698341369628906, 195.10867309570312, 92.94258880615234, 134.39068603515625, 272.8608703613281, 410.8856201171875, 45.39208984375, -1.5311279296875, 0.04906463623046875, -359.19256591796875, 288.9900817871094, -57.44121551513672, 67.79283905029297, -183.62220764160156, -43.48826599121094, 163.02645874023438, 282.7725830078125, -73.64642333984375, 464.7425537109375, 17.67363739013672, -157.16461181640625, -47.627197265625, 136.44677734375, 63.906463623046875, 217.0379638671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000237.npy"}
{"epoch": 0.34801762114537443, "step": 238, "batch_size": 64, "mean": 118.37882995605469, "std": 174.4036102294922, "min": -199.5196533203125, "p10": -40.76473999023437, "median": 83.03457260131836, "p90": 283.6727935791016, "max": 771.5066528320312, "pos_frac": 0.765625, "sample": [6.8890838623046875, -32.29248046875, 5.4865264892578125, 132.39634704589844, 21.582324981689453, -33.528900146484375, 138.6576690673828, 267.065673828125, -110.59457397460938, 413.54107666015625, 75.65143585205078, 166.78048706054688, 82.148193359375, 60.477603912353516, -14.216882705688477, 164.02145385742188, 290.7901306152344, -85.16975402832031, 51.403709411621094, 40.58845901489258, 11.503049850463867, 657.17236328125, 33.54298400878906, 154.91770935058594, -43.865814208984375, 525.2415771484375, 110.63623046875, -2.8522567749023438, 158.90780639648438, -86.84757995605469, 128.43771362304688, 130.042724609375, 167.3819580078125, 83.92095184326172, 360.60418701171875, 14.585674285888672, 51.36042404174805, 126.14570617675781, 771.5066528320312, 146.70977783203125, -3.3475685119628906, 10.34149169921875, 53.59480667114258, -93.31661224365234, -24.630020141601562, 266.5860290527344, 254.99664306640625, 170.34524536132812, 73.90393829345703, 484.0536193847656, 220.97203063964844, 18.566879272460938, 182.24212646484375, 215.76119995117188, -199.5196533203125, 263.779296875, 212.41326904296875, 32.94010925292969, -5.162559509277344, -24.687255859375, 152.38674926757812, 159.277587890625, -86.02862548828125, 100.04712677001953], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000238.npy"}
{"epoch": 0.34948604992657856, "step": 239, "batch_size": 64, "mean": 132.2473602294922, "std": 163.02098083496094, "min": -255.47134399414062, "p10": -54.600990676879874, "median": 133.13137817382812, "p90": 365.4525573730469, "max": 475.86846923828125, "pos_frac": 0.8125, "sample": [369.2284240722656, 173.6405792236328, -149.42044067382812, 362.33062744140625, -101.72308349609375, 143.99884033203125, 66.17791748046875, 151.85464477539062, -74.89268493652344, -147.63449096679688, 52.78725051879883, 50.29400634765625, 76.36592102050781, 263.9227294921875, 71.25901794433594, 5.593818664550781, 449.2771911621094, 140.64869689941406, 178.77212524414062, 138.6917724609375, 208.6969451904297, 182.14588928222656, 241.34849548339844, -59.11477279663086, 20.26854705810547, 34.2020263671875, 175.09193420410156, 409.0177917480469, 219.89964294433594, 105.68849182128906, -4.711736679077148, 27.665374755859375, 322.618408203125, 431.258544921875, -179.75697326660156, 75.50244903564453, -31.700584411621094, 335.3536071777344, 6.6410064697265625, 372.8567810058594, 35.713932037353516, 350.69427490234375, 319.6935119628906, 141.3155517578125, -44.06883239746094, 40.190956115722656, -255.47134399414062, 132.27659606933594, -21.799312591552734, 147.4686279296875, 149.34945678710938, 281.5086669921875, 308.21099853515625, 66.8385009765625, 475.86846923828125, 213.93597412109375, 366.79052734375, 78.57196044921875, 116.21102905273438, 133.9861602783203, 49.441925048828125, -19.52698516845703, 270.429443359375, 12.056173324584961], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000239.npy"}
{"epoch": 0.3509544787077827, "step": 240, "batch_size": 64, "mean": 112.4616928100586, "std": 191.52040100097656, "min": -250.0303497314453, "p10": -189.16146545410152, "median": 121.05008316040039, "p90": 370.05748291015624, "max": 580.8192749023438, "pos_frac": 0.765625, "sample": [130.65025329589844, 121.37117004394531, 189.75790405273438, 93.19744110107422, 101.23854064941406, 352.55731201171875, 162.13478088378906, 406.476806640625, -206.30551147460938, -55.47144317626953, 161.8372039794922, 145.68763732910156, -248.7591552734375, 182.29592895507812, 371.5768737792969, 74.4274673461914, -211.354736328125, 78.27655029296875, 42.654624938964844, 235.83609008789062, 197.9556884765625, 421.42584228515625, 150.443359375, 156.87567138671875, 68.31739807128906, 5.499393463134766, -232.50384521484375, 120.72899627685547, 74.34207153320312, 493.5450439453125, 200.28993225097656, 115.24452209472656, 104.12895202636719, 35.907264709472656, -219.919921875, 366.5122375488281, 230.3285369873047, 164.49192810058594, -223.0120849609375, 78.44071197509766, -76.50328063964844, 327.7045593261719, -149.15869140625, 49.762508392333984, 580.8192749023438, -109.87556457519531, 280.9273681640625, -32.99968719482422, 252.056884765625, 178.3991241455078, 124.43089294433594, 492.83892822265625, -43.58796691894531, 101.10503387451172, -128.57762145996094, 138.3679962158203, 80.93878936767578, -49.58094787597656, 129.40719604492188, 184.8743438720703, 48.9622802734375, 154.76463317871094, 475.3751525878906, -250.0303497314453], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000240.npy"}
{"epoch": 0.3524229074889868, "step": 241, "batch_size": 64, "mean": 176.48243713378906, "std": 204.7778778076172, "min": -230.7407989501953, "p10": -43.69116249084472, "median": 148.8327865600586, "p90": 425.2515350341797, "max": 852.2139892578125, "pos_frac": 0.8125, "sample": [99.3604507446289, 94.45680236816406, -201.3258056640625, -24.1915225982666, 43.03692626953125, 244.671630859375, 272.65057373046875, 422.90533447265625, 95.26025390625, 174.04095458984375, 134.34564208984375, -48.510223388671875, -1.6609077453613281, 509.43768310546875, -18.780792236328125, 163.0538330078125, -35.1253776550293, 347.80908203125, 171.66090393066406, 150.92431640625, 340.5738525390625, 128.34490966796875, 100.06426239013672, 122.33024597167969, 852.2139892578125, 426.2570495605469, 204.07684326171875, 148.83145141601562, 606.4237060546875, 42.883094787597656, 802.1544799804688, 444.671630859375, 335.1129455566406, 76.09632873535156, 79.01191711425781, 156.1924591064453, 209.82525634765625, 94.96043395996094, -47.362213134765625, 148.83412170410156, 126.18463897705078, 40.83683776855469, 103.58282470703125, -11.756805419921875, 244.1407470703125, 87.30978393554688, 346.6829833984375, -75.94432067871094, 231.80203247070312, 198.21142578125, 71.23910522460938, 42.166465759277344, 252.76260375976562, 490.0299377441406, 298.36749267578125, 152.638916015625, -146.71966552734375, 282.74493408203125, 243.93643188476562, 291.154052734375, -230.7407989501953, 398.80792236328125, 94.83232116699219, -102.91024780273438], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000241.npy"}
{"epoch": 0.35389133627019087, "step": 242, "batch_size": 64, "mean": 125.03507995605469, "std": 175.43678283691406, "min": -266.862548828125, "p10": -52.42521514892576, "median": 99.09163284301758, "p90": 306.4091705322266, "max": 702.1851196289062, "pos_frac": 0.78125, "sample": [-32.34886932373047, 22.646053314208984, 148.11953735351562, 268.66070556640625, 44.505672454833984, -266.862548828125, 14.079818725585938, -9.82232666015625, 307.6817626953125, 121.01486206054688, 32.874290466308594, 261.9232177734375, 659.9818725585938, -4.442232131958008, -33.387962341308594, 75.93241119384766, 125.6611557006836, 143.21649169921875, 56.439453125, 11.425031661987305, 206.5117950439453, 87.45355224609375, 246.26080322265625, 211.71224975585938, 255.0294189453125, -60.58403778076172, 100.7599105834961, 62.58275604248047, 126.1594467163086, -96.94271850585938, 162.80142211914062, 208.391845703125, 89.0780258178711, -142.17626953125, 259.14312744140625, 98.37421417236328, 303.4397888183594, 410.649169921875, 23.35688018798828, 99.80905151367188, 104.01313781738281, 195.2769012451172, -14.299163818359375, 702.1851196289062, 26.899879455566406, 74.52629852294922, 466.3192138671875, 84.20962524414062, 2.8354034423828125, -5.713483810424805, -104.40398406982422, 445.6793212890625, 199.88613891601562, -13.945594787597656, 79.04969787597656, 125.50863647460938, 152.49374389648438, -144.58163452148438, 238.9743194580078, 238.59628295898438, 6.147096633911133, 442.3034973144531, 165.4058837890625, -64.22997283935547], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000242.npy"}
{"epoch": 0.355359765051395, "step": 243, "batch_size": 64, "mean": 153.28863525390625, "std": 182.51853942871094, "min": -311.2235412597656, "p10": -51.326242828369125, "median": 131.76402282714844, "p90": 386.6516632080079, "max": 709.8970336914062, "pos_frac": 0.859375, "sample": [345.472412109375, 28.387248992919922, 79.9654769897461, 438.87139892578125, 197.38241577148438, 63.658164978027344, 119.51754760742188, 176.1630859375, -87.15315246582031, -99.42150115966797, -59.184783935546875, 82.37811279296875, 220.78701782226562, 18.991662979125977, -130.81858825683594, 17.05975341796875, -123.22433471679688, 301.623046875, 114.4482421875, 173.11831665039062, -311.2235412597656, 38.86231994628906, 169.88258361816406, 17.192237854003906, 23.841745376586914, 332.68475341796875, 68.10409545898438, 189.497314453125, 89.94274139404297, 405.3201904296875, 299.0294494628906, 273.5637512207031, -32.989646911621094, 142.5322723388672, -96.21334838867188, 359.4100036621094, 301.7235412597656, 552.111083984375, 352.42010498046875, 242.9849853515625, 302.9682922363281, 133.42254638671875, 395.13201904296875, 709.8970336914062, 435.2215576171875, 50.74755859375, 130.10549926757812, 45.997013092041016, 366.8641662597656, -13.940284729003906, 161.39556884765625, 192.01065063476562, 37.89288330078125, 159.5316162109375, 524.0831298828125, 42.157264709472656, 142.59576416015625, 82.35234069824219, 11.812873840332031, 259.53094482421875, 11.880264282226562, 53.5435791015625, 62.42268371582031, 216.1484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000243.npy"}
{"epoch": 0.3568281938325991, "step": 244, "batch_size": 64, "mean": 159.98179626464844, "std": 163.82614135742188, "min": -164.89447021484375, "p10": -14.768110275268555, "median": 134.85299682617188, "p90": 419.73086853027365, "max": 608.970947265625, "pos_frac": 0.859375, "sample": [85.49037170410156, 18.683082580566406, 112.14315795898438, 162.52029418945312, 21.073253631591797, 58.896480560302734, 461.4513854980469, 146.18316650390625, -57.155059814453125, 5.667724609375, 185.2898712158203, 236.48580932617188, 16.1416015625, 289.8599853515625, 40.8194580078125, 127.92721557617188, -15.150165557861328, 443.9891662597656, 58.41987991333008, 113.20243072509766, -164.89447021484375, 120.8321533203125, 277.19219970703125, -67.83958435058594, 208.2265625, 29.645143508911133, 164.50326538085938, 235.45895385742188, 129.91915893554688, 37.942710876464844, 251.9784393310547, 136.05099487304688, 343.5409851074219, 341.6467590332031, 479.56292724609375, 551.9789428710938, 185.4932098388672, 440.3742980957031, 56.833309173583984, 608.970947265625, 230.60791015625, -13.190832138061523, 349.3514709472656, 174.99691772460938, 166.01454162597656, -76.8443374633789, 137.14593505859375, -35.64939880371094, -13.87664794921875, 241.8081817626953, 58.77766418457031, 31.839515686035156, 155.72865295410156, 80.43968963623047, 11.027130126953125, 116.68830871582031, 371.5628662109375, 219.20013427734375, -65.02778625488281, 133.65499877929688, 99.39065551757812, 461.05218505859375, 218.8479461669922, 305.93341064453125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000244.npy"}
{"epoch": 0.35829662261380324, "step": 245, "batch_size": 64, "mean": 125.35258483886719, "std": 183.94993591308594, "min": -447.4949645996094, "p10": -26.22872200012207, "median": 95.8698501586914, "p90": 335.5093536376954, "max": 649.8316650390625, "pos_frac": 0.796875, "sample": [121.8337631225586, 423.2899169921875, 176.97647094726562, 168.2744903564453, 193.37640380859375, 139.821533203125, 57.359352111816406, -23.788925170898438, 225.59710693359375, 113.424560546875, 89.67013549804688, 100.61097717285156, 141.52207946777344, 44.50021743774414, 117.98453521728516, 178.35525512695312, 215.5943603515625, 312.605224609375, 74.32955932617188, 22.20772933959961, 549.6832275390625, 139.4550018310547, -18.430570602416992, 359.8648681640625, 71.7451171875, -14.423751831054688, 207.39501953125, -27.274349212646484, -208.49746704101562, -14.736961364746094, 304.45751953125, 150.36273193359375, 649.8316650390625, -172.7263641357422, 180.48391723632812, 22.30335235595703, 14.265312194824219, 80.91624450683594, 295.78302001953125, 91.12872314453125, -16.23806381225586, 216.6619873046875, 169.0655059814453, -87.86187744140625, 90.8938217163086, -447.4949645996094, 296.39044189453125, 296.2567443847656, 27.588356018066406, 224.12498474121094, 345.3254089355469, 29.352188110351562, 592.6294555664062, -83.17793273925781, -5.767177581787109, 513.9630737304688, 67.58363342285156, 110.30939483642578, 30.134963989257812, -121.09536743164062, 89.20578002929688, 59.99562072753906, 68.64771270751953, 0.9406051635742188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000245.npy"}
{"epoch": 0.35976505139500736, "step": 246, "batch_size": 64, "mean": 129.66812133789062, "std": 184.46658325195312, "min": -250.5979461669922, "p10": -73.48933410644528, "median": 84.09209823608398, "p90": 365.85954895019535, "max": 752.6776123046875, "pos_frac": 0.796875, "sample": [201.01016235351562, 353.0899963378906, 157.55093383789062, 82.48297882080078, 416.5191650390625, 207.7253875732422, 752.6776123046875, -174.7982177734375, 58.39925765991211, -250.5979461669922, 68.67748260498047, 60.64904022216797, 82.44499206542969, 0.6738529205322266, 172.85426330566406, 371.33221435546875, 326.2483825683594, 61.85071563720703, 40.282379150390625, 57.14020538330078, 40.39439392089844, 375.8108825683594, 154.7486572265625, -90.12313079833984, 108.39678955078125, -85.88180541992188, 217.03785705566406, -39.81976318359375, 310.798828125, -121.8624496459961, 4.857767105102539, -26.830894470214844, 114.34811401367188, 107.33360290527344, 313.26751708984375, 54.421653747558594, 22.869455337524414, -6.905483245849609, 10.99542236328125, 226.95162963867188, 410.2629699707031, 125.88053131103516, 43.16908264160156, 239.0723419189453, 85.70121765136719, 172.01614379882812, 86.0445556640625, -45.84202575683594, -3.3447303771972656, 218.1375274658203, -85.33818054199219, 90.28475952148438, 312.9920959472656, 186.8035430908203, 37.383087158203125, 58.122833251953125, 8.941877365112305, 585.7303466796875, 324.7867126464844, 586.0977783203125, -27.432823181152344, 232.29388427734375, -94.51841735839844, 14.49289321899414], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000246.npy"}
{"epoch": 0.36123348017621143, "step": 247, "batch_size": 64, "mean": 177.85269165039062, "std": 199.11367797851562, "min": -314.1670227050781, "p10": -39.10775070190428, "median": 143.4949188232422, "p90": 457.6805725097657, "max": 680.2259521484375, "pos_frac": 0.875, "sample": [-244.1911163330078, 111.84785461425781, -112.34950256347656, 377.474853515625, 80.33644104003906, 494.7352294921875, 167.29702758789062, 441.458251953125, -49.088356018066406, 364.2638854980469, 369.2174377441406, 484.7405090332031, 216.38995361328125, 276.2034912109375, 52.926177978515625, 338.73089599609375, 471.7005615234375, 39.81629180908203, 336.09912109375, 293.7566223144531, 166.36495971679688, 39.28932189941406, 15.095962524414062, 131.45059204101562, 39.06703186035156, 272.87811279296875, 29.798324584960938, 80.68586730957031, 80.6187973022461, 109.04684448242188, 247.05291748046875, 328.82666015625, 78.97818756103516, 149.70089721679688, 37.34416198730469, 379.946533203125, 515.9754638671875, 322.6905517578125, 101.91804504394531, -152.5906982421875, 27.942188262939453, 213.3305206298828, -24.394927978515625, 189.8671875, 76.2733383178711, 26.542869567871094, 137.2889404296875, 368.75457763671875, 653.905517578125, 94.8709945678711, 54.33490753173828, -130.7043914794922, 78.38829040527344, 680.2259521484375, 211.13137817382812, -45.413246154785156, 201.18545532226562, 317.5369873046875, 202.20323181152344, -314.1670227050781, 464.63299560546875, 29.012954711914062, 137.02503967285156, 247.29339599609375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000247.npy"}
{"epoch": 0.36270190895741555, "step": 248, "batch_size": 64, "mean": 154.68795776367188, "std": 146.3386993408203, "min": -190.98704528808594, "p10": -5.424394607543944, "median": 153.07664489746094, "p90": 353.77307128906256, "max": 465.55804443359375, "pos_frac": 0.875, "sample": [336.5872802734375, 357.8430480957031, 108.39684295654297, 187.45281982421875, 186.45265197753906, 0.6077651977539062, 156.3484649658203, 371.7902526855469, 121.29829406738281, -114.5262451171875, 5.405120849609375, 266.7561950683594, 205.79290771484375, 35.50575256347656, 224.6577606201172, 58.83934020996094, 24.193513870239258, 54.44819641113281, 162.1065216064453, 18.062267303466797, 152.64459228515625, 267.4920349121094, 266.9683532714844, 423.6935119628906, 21.20440673828125, -50.248779296875, 148.32156372070312, 337.69677734375, 102.49580383300781, 208.33206176757812, 175.37770080566406, -190.98704528808594, 153.50869750976562, 275.15380859375, 117.37222290039062, 196.9522705078125, 91.46705627441406, 9.276046752929688, -37.23724365234375, 54.8479118347168, 203.74085998535156, 266.1091003417969, 202.50750732421875, 419.784423828125, 109.22879028320312, 94.77952575683594, 453.564453125, 298.21197509765625, 25.41693115234375, 465.55804443359375, 11.797630310058594, 189.31509399414062, 344.2764587402344, 251.05406188964844, 259.31341552734375, 378.133544921875, -4.3304443359375, 53.85752868652344, 122.62132263183594, 287.5558166503906, 116.08844757080078, -5.893230438232422, -14.743694305419922, -120.26963806152344], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000248.npy"}
{"epoch": 0.3641703377386197, "step": 249, "batch_size": 64, "mean": 85.38651275634766, "std": 192.29501342773438, "min": -356.3747863769531, "p10": -137.6492248535156, "median": 60.5495548248291, "p90": 321.44203491210936, "max": 711.8545532226562, "pos_frac": 0.671875, "sample": [-33.77757263183594, 319.04608154296875, -114.30473327636719, -59.33106994628906, 116.75833892822266, -113.7153091430664, 29.636886596679688, 136.62559509277344, 62.573089599609375, 366.4786071777344, 30.1075439453125, 56.3070068359375, 55.66380310058594, 44.791404724121094, 231.25486755371094, -356.3747863769531, -105.97380065917969, 358.1776123046875, 124.07957458496094, -84.38310241699219, 138.24887084960938, 208.38343811035156, -165.6313934326172, 138.96746826171875, -40.98036193847656, 363.6459045410156, -172.3305206298828, 296.1068115234375, 134.18411254882812, 58.52602005004883, -37.74702072143555, -34.220638275146484, 236.23446655273438, 72.93196105957031, 145.17526245117188, -25.47869873046875, 201.31039428710938, -332.46966552734375, 107.7935791015625, 254.93490600585938, 160.71652221679688, -22.124374389648438, 711.8545532226562, -94.88459777832031, -235.91033935546875, -147.6540069580078, 53.602821350097656, 29.76705551147461, 129.28573608398438, 211.39633178710938, 98.47132110595703, 243.67190551757812, -197.75390625, 259.1570739746094, 322.4688720703125, 242.77207946777344, 58.27568054199219, -111.51443481445312, -54.14269256591797, 269.49151611328125, 55.21979522705078, 432.34466552734375, 421.77056884765625, 17.229782104492188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000249.npy"}
{"epoch": 0.3656387665198238, "step": 250, "batch_size": 64, "mean": 158.30816650390625, "std": 165.04046630859375, "min": -188.84890747070312, "p10": -36.16371669769286, "median": 147.90289306640625, "p90": 374.62976379394536, "max": 521.5700073242188, "pos_frac": 0.8125, "sample": [282.1084899902344, 11.148557662963867, 45.35340881347656, -11.0328369140625, -62.41253662109375, 200.4347686767578, 88.96659088134766, 482.38177490234375, 173.78250122070312, -126.59623718261719, 59.144752502441406, 512.8541259765625, 248.97714233398438, 120.88014221191406, 254.23165893554688, 180.24266052246094, 40.404502868652344, 122.83526611328125, 359.4189453125, 360.6269226074219, 178.7891845703125, -92.96688079833984, -40.539955139160156, 88.36683654785156, 91.72240447998047, 229.57025146484375, 60.26630401611328, -18.12019920349121, 75.4902572631836, 15.5865478515625, -25.95249366760254, 22.620271682739258, 145.97076416015625, 259.57476806640625, -56.617286682128906, 521.5700073242188, 130.244140625, 201.34417724609375, -130.81317138671875, -6.864494323730469, 214.1567840576172, 385.6771240234375, 335.72406005859375, 177.94349670410156, 149.83502197265625, 53.37757110595703, 296.314697265625, 133.25967407226562, 280.77862548828125, 139.17662048339844, 75.64227294921875, -3.4784393310546875, 200.36973571777344, 240.38638305664062, -188.84890747070312, 244.65792846679688, 380.6309814453125, 269.12518310546875, 159.8079833984375, 430.009765625, 312.239990234375, 21.46717643737793, 514.1143798828125, 316.3629150390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000250.npy"}
{"epoch": 0.3671071953010279, "step": 251, "batch_size": 64, "mean": 105.8559341430664, "std": 184.5855712890625, "min": -363.56817626953125, "p10": -71.37462921142577, "median": 78.17000579833984, "p90": 324.0533966064453, "max": 668.651611328125, "pos_frac": 0.703125, "sample": [426.1315612792969, 11.280189514160156, 338.1015625, -16.82817840576172, 232.23257446289062, -278.9297790527344, -363.56817626953125, 668.651611328125, 12.423530578613281, 199.71812438964844, 146.35763549804688, -115.89897918701172, 323.4018249511719, 48.91040802001953, 103.6737060546875, -59.94781494140625, -247.31585693359375, 433.6262512207031, -22.958425521850586, 186.26927185058594, 33.54789352416992, 283.4659729003906, -6.469367980957031, -40.99761199951172, 27.14788818359375, 129.0669403076172, -123.08509826660156, -76.27183532714844, 98.83212280273438, 270.2759704589844, 63.27130126953125, -146.73410034179688, 294.4976501464844, -34.028892517089844, 63.17145538330078, 218.4859619140625, 69.41309356689453, 139.64822387695312, 135.58714294433594, 150.06753540039062, -21.961196899414062, 208.49530029296875, -40.077430725097656, 293.03753662109375, -3.712615966796875, 374.2790222167969, 212.1222686767578, 215.54005432128906, 64.58658599853516, 110.16333770751953, -33.311607360839844, 17.358352661132812, 578.3040161132812, 77.91508483886719, 324.3326416015625, 193.91827392578125, -39.70500946044922, 204.77630615234375, 78.4249267578125, -32.78383255004883, 37.12767791748047, 197.67080688476562, 53.89411163330078, 130.1616973876953], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000251.npy"}
{"epoch": 0.368575624082232, "step": 252, "batch_size": 64, "mean": 144.4767608642578, "std": 142.84327697753906, "min": -465.81317138671875, "p10": -13.944217872619621, "median": 160.4151611328125, "p90": 324.266683959961, "max": 411.49078369140625, "pos_frac": 0.859375, "sample": [232.46469116210938, 336.58551025390625, 36.6755256652832, 183.92909240722656, 241.71739196777344, 71.7274169921875, 153.25228881835938, 326.3839416503906, 88.68997192382812, 142.4263916015625, 214.3647003173828, 23.54339599609375, 180.21957397460938, 52.977691650390625, 118.31655883789062, 171.580810546875, 73.39898681640625, 100.34394836425781, 175.72091674804688, -31.41228485107422, 283.406982421875, 173.47276306152344, 15.108806610107422, 399.0415954589844, 275.2475891113281, -116.46505737304688, -20.89168357849121, 131.31893920898438, -17.29962158203125, 148.07308959960938, -40.22819900512695, 183.25637817382812, 35.42491149902344, 183.89251708984375, 95.14129638671875, 132.22454833984375, 118.55368041992188, 399.909423828125, 371.26177978515625, 303.3597412109375, 41.980003356933594, 167.57803344726562, 176.7093963623047, 58.55070495605469, 185.54397583007812, 375.26910400390625, 271.7973327636719, 269.1873779296875, 187.59417724609375, 181.0377960205078, -1.4187164306640625, -465.81317138671875, 319.326416015625, 267.7064208984375, 98.93145751953125, 104.49681091308594, -6.11494255065918, 178.09686279296875, 411.49078369140625, -34.232845306396484, 81.64579772949219, 65.6125259399414, 176.44482421875, 188.37664794921875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000252.npy"}
{"epoch": 0.3700440528634361, "step": 253, "batch_size": 64, "mean": 142.9415283203125, "std": 173.07736206054688, "min": -405.1052551269531, "p10": -57.56651229858397, "median": 137.38423919677734, "p90": 377.3513092041016, "max": 550.0856323242188, "pos_frac": 0.8125, "sample": [-48.21843719482422, 19.221153259277344, 265.5156555175781, 47.561676025390625, 0.5794525146484375, 134.58511352539062, 224.91336059570312, 59.78826141357422, 228.5167999267578, 93.4449462890625, 131.45388793945312, 550.0856323242188, 248.44644165039062, 81.07292938232422, 224.59255981445312, 114.218505859375, 389.46771240234375, 108.821044921875, -61.57283020019531, 102.27369689941406, -21.465391159057617, -405.1052551269531, 235.03518676757812, 379.3598937988281, 388.8918151855469, 9.215829849243164, 430.17694091796875, 15.015327453613281, -20.253997802734375, 40.593021392822266, 18.014732360839844, -24.04755973815918, 149.49789428710938, 283.1200256347656, 300.9541320800781, 228.68270874023438, 290.38018798828125, -133.5276641845703, 267.17193603515625, 103.67725372314453, 265.38427734375, 320.3287353515625, 166.57122802734375, 297.240966796875, 390.08465576171875, 135.0594940185547, 156.22621154785156, 372.66461181640625, 139.708984375, -248.31100463867188, 360.305908203125, 167.79034423828125, -88.689453125, 68.54866027832031, -65.32257080078125, 88.70403289794922, 143.9604034423828, 218.7761993408203, 221.4036102294922, -26.38739013671875, -62.62373733520508, 34.96551513671875, 458.2355041503906, 183.47857666015625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000253.npy"}
{"epoch": 0.37151248164464024, "step": 254, "batch_size": 64, "mean": 123.87564086914062, "std": 203.74114990234375, "min": -272.5957336425781, "p10": -120.96284942626951, "median": 103.06225967407227, "p90": 400.45178833007833, "max": 818.88671875, "pos_frac": 0.734375, "sample": [108.21771240234375, -28.319808959960938, 170.30465698242188, 110.73652648925781, 100.0469970703125, 120.48123168945312, -3.1760406494140625, -83.74278259277344, 143.99099731445312, 30.181007385253906, 142.616455078125, 202.4378204345703, 230.00173950195312, 79.96336364746094, -126.5184326171875, -272.5957336425781, 217.80029296875, -9.870891571044922, 102.54013061523438, 196.012939453125, 3.6968727111816406, 210.30111694335938, 551.2545166015625, 336.5195007324219, 341.8521423339844, -181.2847900390625, 448.8140869140625, 296.1125793457031, -9.125312805175781, 13.260721206665039, 103.58438873291016, 210.2664794921875, 64.35517120361328, -181.05474853515625, 18.889110565185547, 219.83761596679688, -39.20301055908203, 422.20458984375, -108.839111328125, 48.15982437133789, -18.709495544433594, -206.91265869140625, 62.060752868652344, 452.55767822265625, 2.659517288208008, 220.82122802734375, 4.637901306152344, 349.69525146484375, 61.51093292236328, 179.05331420898438, 818.88671875, 159.2798614501953, 85.41311645507812, -12.356254577636719, -176.59841918945312, 574.2611083984375, 128.43447875976562, 234.65740966796875, 148.56663513183594, 66.90959167480469, -126.15873718261719, 567.5609130859375, 172.3578338623047, -21.25738525390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000254.npy"}
{"epoch": 0.37298091042584436, "step": 255, "batch_size": 64, "mean": 132.18743896484375, "std": 188.17323303222656, "min": -421.1156005859375, "p10": -61.425950622558595, "median": 119.01693344116211, "p90": 353.5354858398438, "max": 599.2154541015625, "pos_frac": 0.765625, "sample": [599.2154541015625, -116.2552490234375, 21.1300048828125, 358.58905029296875, 184.75466918945312, -53.0706787109375, -44.53270721435547, -27.135879516601562, -421.1156005859375, 46.4257926940918, 305.57598876953125, 114.15705871582031, -9.235809326171875, 27.772991180419922, 242.4899444580078, 256.55194091796875, -156.59396362304688, -266.8367919921875, 129.73348999023438, 306.1211242675781, 256.2986145019531, 44.149505615234375, 67.93965911865234, 28.011890411376953, 84.99110412597656, -61.645896911621094, -151.07968139648438, 387.04669189453125, -19.207805633544922, 211.61361694335938, 223.2841033935547, 215.90084838867188, 116.97950744628906, 335.6260070800781, 366.700439453125, 311.2218933105469, 281.4601135253906, 381.6764831542969, 341.74383544921875, 548.3128662109375, 53.58539581298828, -20.402982711791992, 273.2265625, 130.97531127929688, 10.245586395263672, 237.76536560058594, 87.23008728027344, 251.13858032226562, -130.2795867919922, 326.761474609375, 437.86065673828125, 71.7607650756836, 151.42294311523438, 39.223487854003906, -60.912742614746094, 137.51449584960938, 33.45880889892578, 121.05435943603516, 45.50798797607422, 152.2040557861328, -41.893287658691406, 68.61126708984375, 322.7694091796875, 292.40325927734375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000255.npy"}
{"epoch": 0.3744493392070485, "step": 256, "batch_size": 64, "mean": 143.77066040039062, "std": 182.90052795410156, "min": -311.5033264160156, "p10": -52.34302749633789, "median": 127.39521789550781, "p90": 373.69494323730476, "max": 613.6595458984375, "pos_frac": 0.75, "sample": [112.486328125, 560.3860473632812, 96.68140411376953, 241.38539123535156, 206.60989379882812, 266.9714660644531, 38.193817138671875, 213.08834838867188, -24.520565032958984, 27.058914184570312, 314.5246276855469, 212.087890625, 193.87564086914062, 350.6976318359375, 278.3408508300781, -132.2648468017578, 59.403743743896484, 205.45501708984375, -311.5033264160156, 43.17098617553711, 293.1087646484375, 116.45118713378906, -161.664306640625, 333.3153991699219, 57.35792922973633, 287.8294982910156, -49.479469299316406, -58.66627502441406, -72.04765319824219, 173.7519989013672, 400.4659729003906, -9.608428955078125, 237.02850341796875, 87.63850402832031, 66.35807800292969, 129.44444274902344, 410.856689453125, -53.57026672363281, 69.38865661621094, 49.83866882324219, -174.92478942871094, 43.510589599609375, 28.184494018554688, -20.653533935546875, 180.02499389648438, -39.34073257446289, 218.42440795898438, 145.12734985351562, -26.576904296875, 240.2403564453125, -42.90187072753906, 125.34599304199219, -8.897499084472656, 201.64630126953125, 330.01092529296875, 456.8662109375, 168.93792724609375, 56.27505874633789, 613.6595458984375, 540.8839721679688, 245.32260131835938, 383.5509338378906, -31.2544002532959, 307.9327087402344], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000256.npy"}
{"epoch": 0.37591776798825255, "step": 257, "batch_size": 64, "mean": 149.67494201660156, "std": 197.8617401123047, "min": -317.62530517578125, "p10": -51.7940731048584, "median": 118.59381103515625, "p90": 451.5304779052735, "max": 716.234619140625, "pos_frac": 0.796875, "sample": [456.1702880859375, -51.865169525146484, 24.326583862304688, 119.62857055664062, 361.6783142089844, 20.373336791992188, 255.79397583007812, 446.5355224609375, 282.91021728515625, 169.2525634765625, -27.526830673217773, -51.62818145751953, 110.85214233398438, 318.74786376953125, 119.32027435302734, 34.291748046875, 224.87042236328125, 269.8648681640625, 17.327205657958984, 453.6711730957031, 51.81291198730469, 401.6678771972656, 304.8347473144531, -190.246337890625, 95.31924438476562, 117.62762451171875, -102.32633972167969, 15.649734497070312, -29.60464096069336, 500.4249572753906, 147.4442596435547, 230.8566436767578, 280.3703308105469, 716.234619140625, 76.00916290283203, -53.565887451171875, 101.05050659179688, 4.399818420410156, 142.56082153320312, -317.62530517578125, 117.86734771728516, 466.2828674316406, 39.193485260009766, 222.49063110351562, 171.4138641357422, 231.541748046875, 60.995338439941406, -21.427711486816406, 570.6211547851562, 251.62832641601562, 237.66915893554688, 346.90826416015625, 12.958152770996094, 132.26187133789062, 54.72224426269531, 514.52978515625, -81.91221618652344, -12.280670166015625, 179.94171142578125, -230.12069702148438, 36.34486389160156, 39.84968566894531, -23.18170166015625, 213.40924072265625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000257.npy"}
{"epoch": 0.37738619676945667, "step": 258, "batch_size": 64, "mean": 147.97097778320312, "std": 207.81454467773438, "min": -531.7978515625, "p10": -69.46477584838867, "median": 130.65869140625, "p90": 380.6291717529297, "max": 700.9085693359375, "pos_frac": 0.8125, "sample": [338.0307312011719, 40.76673889160156, -109.68315124511719, 283.8077697753906, 67.47361755371094, 128.88430786132812, 268.9656677246094, 222.17062377929688, 255.87771606445312, -9.416290283203125, -35.193016052246094, 4.514274597167969, -64.76985931396484, 74.94239807128906, 102.02851104736328, -47.08689880371094, -8.204994201660156, 117.56536865234375, 190.82139587402344, 1.9276657104492188, 60.49516296386719, 250.26327514648438, 528.9597778320312, 236.16221618652344, 457.3908386230469, 273.5425720214844, 155.38525390625, 18.832603454589844, 162.37435913085938, 364.20086669921875, 226.8909454345703, 292.57952880859375, 322.78564453125, 186.10670471191406, -226.45626831054688, 90.29200744628906, 305.4224548339844, -393.92327880859375, 18.21550750732422, 124.66852569580078, 407.33148193359375, 41.52951431274414, -98.031982421875, 398.15362548828125, 35.5554084777832, -531.7978515625, 700.9085693359375, 132.43307495117188, -166.7595672607422, 78.08907318115234, 257.19610595703125, -71.47688293457031, 318.05999755859375, 385.70513916015625, 368.7852478027344, 35.951805114746094, 306.3651123046875, 472.5120849609375, 189.7054901123047, 285.5667724609375, 94.74525451660156, 102.76056671142578, 345.2213134765625, 104.02155303955078], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000258.npy"}
{"epoch": 0.3788546255506608, "step": 259, "batch_size": 64, "mean": 124.25125122070312, "std": 187.17050170898438, "min": -202.0948944091797, "p10": -117.89047698974609, "median": 90.65182495117188, "p90": 361.41693115234375, "max": 554.298828125, "pos_frac": 0.75, "sample": [205.3266143798828, -14.389448165893555, 157.32489013671875, 206.57508850097656, 31.68081283569336, -119.10336303710938, 317.9119873046875, 478.8616638183594, 459.2950744628906, 216.0198974609375, 40.235557556152344, -123.38390350341797, 106.95295715332031, -91.9352798461914, 9.25115966796875, -91.86857604980469, 350.0729675292969, -53.641082763671875, 478.3994140625, 185.80801391601562, 151.91641235351562, 139.6165771484375, 82.49081420898438, 76.96670532226562, 327.87469482421875, 30.454700469970703, 74.12892150878906, 53.54130554199219, 554.298828125, -202.0948944091797, 70.0318603515625, 28.00315284729004, 529.425537109375, 179.65396118164062, 62.073326110839844, -129.14842224121094, 127.22382354736328, 328.6648864746094, -167.2989501953125, 337.60809326171875, -156.33798217773438, 16.530006408691406, 361.35284423828125, -115.06040954589844, 98.81283569335938, 50.21136474609375, 361.44439697265625, 301.811767578125, -165.04261779785156, 163.9073944091797, -35.71875, 298.62591552734375, 288.0860290527344, -31.426742553710938, 388.06292724609375, -64.52872467041016, 119.19105529785156, 318.66717529296875, 145.45672607421875, -55.58861541748047, 56.275508880615234, 27.679821014404297, 167.72250366210938, 7.119636535644531], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000259.npy"}
{"epoch": 0.3803230543318649, "step": 260, "batch_size": 64, "mean": 166.3863525390625, "std": 192.92849731445312, "min": -203.76666259765625, "p10": -18.901126861572266, "median": 131.5645980834961, "p90": 397.34695739746104, "max": 1094.209228515625, "pos_frac": 0.859375, "sample": [232.74880981445312, -53.26543426513672, 39.66449737548828, -88.45663452148438, 3.280416488647461, 160.4530029296875, 132.26658630371094, 205.2344207763672, 360.5618896484375, 49.94340515136719, 71.84837341308594, 453.3602294921875, 120.88291931152344, 114.24031066894531, 158.6571502685547, 1094.209228515625, 173.2543182373047, 253.08932495117188, 42.68109130859375, 451.64599609375, -203.76666259765625, 157.89508056640625, -31.523008346557617, 168.88851928710938, -92.85565948486328, -12.979988098144531, 157.9256134033203, -19.234207153320312, 254.56594848632812, 127.12733459472656, 121.98234558105469, 360.08624267578125, -18.123939514160156, 267.395751953125, 141.23902893066406, 367.2950439453125, 377.8170471191406, 148.2408447265625, 94.59226989746094, 89.30426025390625, 99.97682189941406, 58.902801513671875, 341.33038330078125, 101.0605239868164, 152.65078735351562, 3.400218963623047, 41.90464782714844, 80.4881591796875, 179.4171142578125, 215.05393981933594, 210.66357421875, 215.65908813476562, 130.86260986328125, 413.118408203125, 38.00853729248047, -49.329200744628906, 71.4559326171875, 405.7169189453125, 23.41832733154297, 593.85205078125, 91.7933578491211, 413.3760070800781, 56.813629150390625, 326.9600830078125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000260.npy"}
{"epoch": 0.38179148311306904, "step": 261, "batch_size": 64, "mean": 154.48388671875, "std": 206.12733459472656, "min": -319.9163818359375, "p10": -51.70460548400878, "median": 133.0986328125, "p90": 400.29446411132824, "max": 945.2293090820312, "pos_frac": 0.8125, "sample": [434.9024963378906, 376.49334716796875, 67.36127471923828, -201.77976989746094, 120.57475280761719, 945.2293090820312, 37.22846984863281, 353.15399169921875, -39.81619644165039, 216.07666015625, 38.586090087890625, 71.0672378540039, -94.4776611328125, 409.49871826171875, 136.66253662109375, 212.71932983398438, 215.58363342285156, 362.5562438964844, 376.65118408203125, -24.89356231689453, 106.26765441894531, 88.62759399414062, 148.7172393798828, 174.83999633789062, 165.4573974609375, 322.595458984375, 506.10833740234375, 21.365997314453125, 312.6716003417969, 197.8527069091797, 39.337127685546875, 31.387916564941406, 8.497520446777344, 110.69416809082031, 237.68032836914062, 49.696311950683594, 278.8963317871094, 378.81787109375, 258.64654541015625, 129.53472900390625, 239.01925659179688, -45.28829574584961, 463.2492370605469, 411.29888916015625, 3.310415267944336, 361.70477294921875, -178.7561798095703, 209.3433074951172, 15.697731018066406, 246.2930908203125, 112.98314666748047, 491.008544921875, -4.167320251464844, -319.9163818359375, -183.94540405273438, 115.59273529052734, -185.46212768554688, -15.247495651245117, 152.92062377929688, 192.8029327392578, 50.09453582763672, 66.15093231201172, 161.6658935546875, -54.45445251464844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000261.npy"}
{"epoch": 0.3832599118942731, "step": 262, "batch_size": 64, "mean": 129.03204345703125, "std": 163.50450134277344, "min": -229.15072631835938, "p10": -66.40078887939451, "median": 128.33170700073242, "p90": 296.4082550048829, "max": 715.3250732421875, "pos_frac": 0.8125, "sample": [55.1900634765625, 434.7138366699219, 107.2403335571289, 231.5934600830078, -213.8473663330078, 324.8180236816406, 470.82220458984375, 116.82994079589844, 152.6929931640625, 256.1230163574219, -149.5709228515625, -229.15072631835938, 156.64837646484375, 715.3250732421875, 146.8402099609375, -165.02041625976562, 179.39109802246094, 265.3501892089844, -43.757911682128906, 210.44326782226562, 158.28900146484375, 65.7663345336914, -85.43142700195312, 275.4693298339844, 117.87825012207031, -36.19392395019531, 49.5706901550293, 404.6341247558594, 127.44219970703125, 310.9519958496094, 41.810890197753906, 84.69315338134766, 87.104736328125, -42.73160934448242, 115.87324523925781, 99.65563201904297, 129.18914794921875, 15.774734497070312, 172.36373901367188, 141.07440185546875, 129.7318115234375, 146.116943359375, 24.93199920654297, 259.94500732421875, 241.1805877685547, 112.8153305053711, 245.8681640625, 127.4742660522461, -73.28563690185547, -50.336143493652344, 82.20899963378906, 72.3837890625, 39.48075866699219, 305.382080078125, -132.4013671875, -41.05059051513672, 129.94863891601562, 161.4998321533203, 253.16822814941406, 255.25436401367188, 181.696533203125, 28.430038452148438, 261.69305419921875, 240.05459594726562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000262.npy"}
{"epoch": 0.38472834067547723, "step": 263, "batch_size": 64, "mean": 180.35325622558594, "std": 209.8715057373047, "min": -329.79595947265625, "p10": -53.21893997192381, "median": 156.8031234741211, "p90": 452.7548950195314, "max": 720.2772216796875, "pos_frac": 0.8125, "sample": [319.9400939941406, -329.79595947265625, 141.2469940185547, 196.0599822998047, 348.2821044921875, 50.64757537841797, 334.78564453125, 46.34674072265625, 28.576675415039062, 95.42464447021484, -284.06304931640625, 194.20741271972656, 77.3048324584961, 304.5516357421875, 334.2491760253906, 171.0970001220703, 18.945653915405273, 378.10638427734375, 108.51299285888672, 147.46969604492188, -9.268716812133789, -84.60546875, 58.704505920410156, 344.1884765625, -21.28110122680664, -75.054443359375, 339.5701904296875, 158.927978515625, -17.921409606933594, 102.57275390625, 11.192733764648438, 40.775550842285156, 365.0112609863281, 61.95854187011719, -25.094467163085938, 513.8433837890625, 270.7106018066406, 113.66499328613281, 401.29180908203125, 534.80126953125, 58.589988708496094, 529.893310546875, 106.90620422363281, 272.9636535644531, 338.9619445800781, 165.10845947265625, 466.0697021484375, -33.79618835449219, 241.08978271484375, 274.4107360839844, 388.6072692871094, 421.68701171875, 625.6446533203125, -172.2585906982422, 6.1348114013671875, 546.10791015625, -61.54297637939453, -63.42737579345703, 225.0049285888672, 236.90008544921875, 140.58653259277344, 720.2772216796875, 154.6782684326172, 188.12594604492188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000263.npy"}
{"epoch": 0.38619676945668135, "step": 264, "batch_size": 64, "mean": 154.90243530273438, "std": 204.98941040039062, "min": -243.36941528320312, "p10": -63.58628616333008, "median": 128.22284698486328, "p90": 450.29103088378906, "max": 663.1012573242188, "pos_frac": 0.78125, "sample": [222.60739135742188, 289.3474426269531, 46.87025833129883, 663.1012573242188, 86.6617431640625, -203.1621551513672, -166.04385375976562, 380.3591003417969, 67.1656494140625, 473.82354736328125, -42.110328674316406, 47.36711883544922, 137.28440856933594, -169.6287841796875, 163.48416137695312, 149.44451904296875, 287.1529846191406, 447.32733154296875, 404.63421630859375, 390.00347900390625, 52.520263671875, 278.42364501953125, 221.2437744140625, 185.67613220214844, 65.99736785888672, 30.929061889648438, -61.96092987060547, -13.612289428710938, -38.437461853027344, 189.3382110595703, -64.28286743164062, 24.24097442626953, 50.83885955810547, 594.1591186523438, 3.303375244140625, 451.5611877441406, 142.75900268554688, 179.25045776367188, 41.69468688964844, 279.60089111328125, 65.97549438476562, 211.7128143310547, 119.16128540039062, 114.3370361328125, -3.1242599487304688, 630.33203125, 372.51641845703125, -243.36941528320312, 236.47775268554688, -143.92822265625, 209.2344512939453, 23.024066925048828, 213.65155029296875, 455.8212890625, -14.928062438964844, 184.79251098632812, 44.15631866455078, 104.51248168945312, -156.2838897705078, 514.3272705078125, 334.4880676269531, 308.89630126953125, -17.649372100830078, 60.689361572265625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000264.npy"}
{"epoch": 0.3876651982378855, "step": 265, "batch_size": 64, "mean": 146.05377197265625, "std": 243.67935180664062, "min": -488.15191650390625, "p10": -90.23377838134765, "median": 119.48883438110352, "p90": 413.9072570800781, "max": 830.27197265625, "pos_frac": 0.734375, "sample": [122.1626205444336, 416.79254150390625, 333.20037841796875, 222.0929412841797, 70.34864044189453, 116.81504821777344, 335.18389892578125, 11.613807678222656, 767.8668212890625, 50.112152099609375, 229.0994873046875, -255.62393188476562, -0.2793083190917969, -18.992938995361328, -96.18240356445312, 78.42750549316406, 317.4347229003906, -142.42828369140625, 102.27889251708984, -57.55169677734375, 258.2147521972656, 830.27197265625, -488.15191650390625, 552.2416381835938, -114.15341186523438, 156.12338256835938, 197.732421875, 461.1366882324219, -76.35365295410156, 764.3956298828125, -244.3350372314453, 326.2681884765625, 57.30259704589844, -44.520565032958984, 71.84225463867188, -69.97821044921875, 5.8200225830078125, -68.58358764648438, 377.969482421875, -67.63945007324219, 453.1274719238281, 113.37362670898438, 65.7392807006836, 382.3013000488281, -36.72650909423828, 139.87599182128906, 21.90900421142578, 155.8773193359375, -30.185195922851562, 311.7738037109375, 96.25922393798828, 198.78350830078125, 164.42550659179688, 397.8858947753906, 167.2466583251953, 145.09580993652344, 210.7847442626953, 122.76628875732422, 55.31380844116211, -342.5807800292969, 407.1749267578125, 104.66227722167969, 334.867431640625, 219.71595764160156], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000265.npy"}
{"epoch": 0.3891336270190896, "step": 266, "batch_size": 64, "mean": 115.4014892578125, "std": 196.97235107421875, "min": -293.3901672363281, "p10": -102.11306686401366, "median": 83.60533142089844, "p90": 423.9771118164063, "max": 626.4095458984375, "pos_frac": 0.703125, "sample": [-16.998977661132812, 540.9111938476562, 59.23424530029297, 133.84524536132812, 266.85369873046875, 104.76490783691406, -67.32496643066406, 270.744873046875, 74.03390502929688, 428.08013916015625, 123.19541931152344, 326.09979248046875, -58.22016906738281, 301.18353271484375, -94.96269226074219, 65.10871124267578, 289.0294494628906, 77.77255249023438, 222.749267578125, 69.08048248291016, 116.64222717285156, -293.3901672363281, -52.317970275878906, 357.3040466308594, 161.1619110107422, 433.0241394042969, 165.77223205566406, 3.9016494750976562, 50.78314971923828, 414.40338134765625, 479.29559326171875, 300.00054931640625, 626.4095458984375, 436.4566650390625, 221.25628662109375, 107.7947998046875, 27.596965789794922, 211.01251220703125, 24.332420349121094, 85.5323715209961, 57.291595458984375, 174.88790893554688, 81.67829132080078, -9.550836563110352, 158.54620361328125, -1.8872184753417969, -273.049072265625, -254.7718505859375, 264.56268310546875, 17.51897621154785, -172.92169189453125, -105.1775131225586, -50.14494323730469, -111.67831420898438, -34.913352966308594, -47.80390930175781, -14.500146865844727, -27.273338317871094, -165.85958862304688, 446.676025390625, 151.23687744140625, 210.20864868164062, 13.759265899658203, 86.70745086669922], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000266.npy"}
{"epoch": 0.39060205580029367, "step": 267, "batch_size": 64, "mean": 146.298095703125, "std": 224.27528381347656, "min": -513.3765869140625, "p10": -68.12866439819335, "median": 125.51726531982422, "p90": 363.42298278808596, "max": 784.4556884765625, "pos_frac": 0.765625, "sample": [68.71587371826172, 164.35971069335938, 353.5262451171875, -10.444171905517578, 132.482421875, -16.177274703979492, 268.7005920410156, 156.28607177734375, -0.21490478515625, 15.946277618408203, -94.528564453125, -55.374786376953125, 15.5294189453125, 367.2267150878906, 160.689453125, 136.95858764648438, -95.84770965576172, 220.79583740234375, 519.552734375, 771.6697387695312, 93.41877746582031, 51.093055725097656, 128.32704162597656, 10.35784912109375, -134.13075256347656, 113.27720642089844, 68.39117431640625, 114.7595443725586, 353.323486328125, 229.0447235107422, 632.6063842773438, 414.7742004394531, 784.4556884765625, 45.70281982421875, -14.439029693603516, 276.92578125, 30.959362030029297, -73.59461212158203, -0.4175148010253906, 165.97824096679688, 296.1669921875, 20.439956665039062, -5.974531173706055, 611.1590576171875, -513.3765869140625, -23.85649871826172, -409.5736083984375, 218.69967651367188, 209.31729125976562, 112.47286987304688, 211.1986541748047, 267.1147766113281, 89.41537475585938, 310.84088134765625, 61.57508087158203, 142.03671264648438, -109.34352111816406, 309.2104187011719, 100.3287124633789, 122.70748901367188, 354.547607421875, 153.55458068847656, 321.6914367675781, 142.05979919433594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000267.npy"}
{"epoch": 0.3920704845814978, "step": 268, "batch_size": 64, "mean": 148.31320190429688, "std": 221.29595947265625, "min": -359.3738098144531, "p10": -36.62528419494628, "median": 145.2625503540039, "p90": 377.4056732177735, "max": 850.7744140625, "pos_frac": 0.796875, "sample": [11.831916809082031, 486.3951416015625, 58.531349182128906, -38.23979949951172, 38.64765930175781, 60.05036163330078, 161.0532989501953, 96.3828125, 380.09222412109375, 115.13247680664062, 226.1214599609375, -327.7710266113281, 145.1856689453125, 42.19163513183594, 273.3829345703125, -215.63616943359375, 99.22264862060547, 62.922760009765625, -359.3738098144531, 145.3394317626953, 166.72793579101562, 850.7744140625, 64.64402770996094, 178.79925537109375, 46.6983757019043, 312.53656005859375, 480.4169921875, 571.0820922851562, -11.573471069335938, 353.93194580078125, -100.49774932861328, 69.55250549316406, -31.8980712890625, -1.5400733947753906, 619.4432373046875, 155.99105834960938, 182.7459259033203, 159.62181091308594, 304.86614990234375, 266.1434326171875, 146.01283264160156, 290.69439697265625, 11.203178405761719, -79.72117614746094, 333.55718994140625, -2.36273193359375, 201.5227508544922, 371.1370544433594, 11.029312133789062, 304.63800048828125, 194.84768676757812, 94.45689392089844, 719.1119384765625, 178.244384765625, 75.62660217285156, 114.31214904785156, 150.18943786621094, 18.630165100097656, 288.4250183105469, -20.575407028198242, 213.5208740234375, -357.84234619140625, -32.85808181762695, 168.3148651123047], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000268.npy"}
{"epoch": 0.3935389133627019, "step": 269, "batch_size": 64, "mean": 179.8758544921875, "std": 237.62774658203125, "min": -429.4111328125, "p10": -97.15496520996092, "median": 176.63134765625, "p90": 475.13558654785163, "max": 709.6807861328125, "pos_frac": 0.703125, "sample": [117.61763000488281, 327.0624694824219, 177.72207641601562, 308.23260498046875, -110.23094940185547, 213.06884765625, 313.83184814453125, 162.78134155273438, -80.997802734375, -36.377159118652344, 321.27508544921875, -35.84678649902344, 524.93359375, 17.6580810546875, 365.66058349609375, 277.04901123046875, -23.040199279785156, 189.74330139160156, -159.7136688232422, -429.4111328125, 346.3161926269531, 7.244419097900391, 231.02593994140625, -241.89804077148438, -103.66896057128906, 459.34527587890625, -12.453224182128906, 624.4803466796875, 524.0966186523438, 175.54061889648438, 158.95751953125, 106.83685302734375, 418.282470703125, 279.0860900878906, -49.70335388183594, 358.102783203125, 449.9830017089844, 128.6097412109375, 350.74969482421875, -14.36328125, -7.098503112792969, 141.2529754638672, -214.77882385253906, 46.924041748046875, 147.447509765625, 646.2278442382812, 367.86370849609375, 519.6526489257812, 306.4330749511719, -40.193878173828125, 354.099853515625, 313.3782043457031, -14.526641845703125, 709.6807861328125, -81.95564270019531, 481.9028625488281, 13.771484375, 13.575492858886719, -24.511394500732422, 280.3417663574219, 434.2533874511719, 423.7437744140625, -163.22909545898438, 220.2093505859375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000269.npy"}
{"epoch": 0.39500734214390604, "step": 270, "batch_size": 64, "mean": 130.6387939453125, "std": 204.1112518310547, "min": -438.8076477050781, "p10": -70.70864868164062, "median": 120.48664474487305, "p90": 373.58868408203125, "max": 769.886474609375, "pos_frac": 0.78125, "sample": [226.38214111328125, 298.33343505859375, 424.88189697265625, -254.8349609375, -438.8076477050781, -47.69688415527344, 27.890464782714844, 265.2856140136719, 104.2895736694336, 332.0398864746094, 359.15106201171875, -45.620025634765625, 121.81464385986328, 232.2776641845703, 38.833953857421875, 56.43040084838867, 87.56627655029297, 167.76136779785156, -192.38400268554688, 236.13088989257812, 303.2270812988281, 146.57730102539062, -16.333091735839844, 241.51950073242188, 444.198486328125, 290.2278137207031, 3.4862632751464844, 98.2973403930664, 208.33786010742188, 132.31390380859375, -275.12298583984375, 379.9464111328125, 769.886474609375, -68.99443054199219, -122.36325073242188, 287.213134765625, 374.79644775390625, 66.9078369140625, 97.85826110839844, 20.232418060302734, 551.9244384765625, -213.211181640625, 182.94924926757812, 19.171459197998047, 119.15864562988281, 133.65985107421875, 31.121597290039062, 242.41836547851562, -71.44331359863281, -30.704845428466797, 192.026123046875, 337.4895324707031, 86.78382873535156, 15.015913009643555, -9.930648803710938, 155.50906372070312, 9.774871826171875, 48.41706848144531, -27.652503967285156, 370.77056884765625, 170.55555725097656, 158.65347290039062, 65.15849304199219, 441.32855224609375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000270.npy"}
{"epoch": 0.3964757709251101, "step": 271, "batch_size": 64, "mean": 165.31607055664062, "std": 192.7357940673828, "min": -147.00436401367188, "p10": -33.04742813110351, "median": 117.84884262084961, "p90": 426.53155517578125, "max": 720.297119140625, "pos_frac": 0.78125, "sample": [720.297119140625, 402.1062316894531, 110.40066528320312, -39.02004623413086, 219.45130920410156, -23.196247100830078, 376.8348083496094, 377.1153869628906, -1.1603927612304688, -25.49878692626953, 359.5326843261719, -132.8326416015625, 425.25067138671875, -147.00436401367188, 81.08687591552734, -38.345664978027344, 69.36572265625, -29.33281707763672, 192.90589904785156, -0.7249832153320312, -71.70800018310547, 26.957260131835938, 512.461181640625, 405.4847412109375, 535.015869140625, 427.08050537109375, 72.7602767944336, -34.639404296875, 81.58633422851562, 129.90335083007812, 3.699329376220703, 228.81285095214844, 128.70831298828125, -15.217979431152344, -78.16921997070312, 130.3513946533203, 65.5140380859375, 28.143714904785156, 383.3159484863281, 96.57958984375, 29.071922302246094, 590.7021484375, 69.98871612548828, -25.68425178527832, 119.40177917480469, 34.46210861206055, 110.48818969726562, 144.4273223876953, 334.05621337890625, 186.58523559570312, 481.887939453125, 245.93606567382812, 116.29590606689453, 29.890274047851562, 476.257080078125, 14.439857482910156, 246.10910034179688, 365.1046142578125, 3.108673095703125, 234.9183807373047, 158.73187255859375, 280.8645324707031, 146.38519287109375, 232.92893981933594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000271.npy"}
{"epoch": 0.39794419970631423, "step": 272, "batch_size": 64, "mean": 202.88958740234375, "std": 197.0140838623047, "min": -164.27682495117188, "p10": -62.661069488525385, "median": 200.93250274658203, "p90": 449.1217407226563, "max": 692.3652954101562, "pos_frac": 0.796875, "sample": [296.96484375, 290.28961181640625, -13.10099983215332, -131.4058837890625, -20.610580444335938, 387.58685302734375, 226.6230926513672, 478.4649658203125, 465.3641357421875, 284.4390869140625, 345.0283203125, 407.2436218261719, -65.70384979248047, 181.90542602539062, -164.27682495117188, 74.29463195800781, 86.30547332763672, 227.71458435058594, 143.25399780273438, 171.86483764648438, -69.2513427734375, 356.6443786621094, 321.42926025390625, 208.8822021484375, 331.058837890625, 108.71796417236328, -16.371318817138672, 280.13677978515625, 139.95895385742188, 692.3652954101562, 58.04524230957031, 199.35507202148438, 202.5099334716797, 129.89566040039062, 75.19700622558594, 364.2275695800781, 450.357666015625, -162.9608917236328, 194.53933715820312, 233.78485107421875, 692.1485595703125, -23.106231689453125, 553.8880615234375, 153.0313720703125, 301.8836669921875, -72.40715789794922, 334.309814453125, 29.74831771850586, 243.63279724121094, 397.6907043457031, -18.317977905273438, 197.3500213623047, 3.7534942626953125, -84.25270080566406, -55.561248779296875, 326.76953125, 243.51414489746094, 88.1202621459961, 446.2379150390625, 154.78616333007812, 219.48245239257812, 428.38995361328125, 512.5629272460938, 140.51174926757812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000272.npy"}
{"epoch": 0.39941262848751835, "step": 273, "batch_size": 64, "mean": 193.62213134765625, "std": 211.0909881591797, "min": -305.24853515625, "p10": -66.08981475830076, "median": 185.14366912841797, "p90": 474.31941833496114, "max": 698.9327392578125, "pos_frac": 0.796875, "sample": [258.7403564453125, 336.9141540527344, 87.97760009765625, 153.8609161376953, 573.9171752929688, 495.47882080078125, 184.42489624023438, -87.2093276977539, 619.0429077148438, -38.98316955566406, 399.6622619628906, 209.99505615234375, -15.744903564453125, 44.71526336669922, 6.614595413208008, 176.0450439453125, -5.735065460205078, 208.86599731445312, 220.443603515625, 350.4346618652344, 214.88706970214844, 50.8211669921875, 71.2742919921875, 185.86244201660156, 95.39244079589844, 50.218849182128906, 360.1312255859375, 182.0784454345703, 185.9266357421875, 196.99795532226562, 370.60589599609375, -7.275138854980469, 355.5046081542969, 347.6289978027344, -27.54912757873535, 623.5919189453125, 586.2593994140625, 261.5933837890625, 169.1465301513672, 304.23004150390625, -178.08538818359375, -120.33906555175781, 243.13035583496094, -93.59534454345703, 256.730712890625, 306.02593994140625, 423.14129638671875, 42.89228820800781, 168.09120178222656, -305.24853515625, 698.9327392578125, 174.221923828125, 84.21092224121094, -146.41827392578125, -49.690582275390625, 322.0246276855469, 11.428817749023438, 129.01528930664062, 332.459228515625, -73.11805725097656, 375.0245666503906, 113.44086456298828, 424.9474792480469, 495.80523681640625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000273.npy"}
{"epoch": 0.4008810572687225, "step": 274, "batch_size": 64, "mean": 144.68017578125, "std": 194.56756591796875, "min": -219.2036895751953, "p10": -107.86939392089842, "median": 137.2106704711914, "p90": 422.0397766113282, "max": 610.196533203125, "pos_frac": 0.75, "sample": [47.66632843017578, 103.73793029785156, 36.861488342285156, -141.5375213623047, 233.13209533691406, 187.1905517578125, 222.1293487548828, -42.15711212158203, 90.88211822509766, -40.91203308105469, -131.81069946289062, 454.9093322753906, 374.2122802734375, 237.51632690429688, 273.9089050292969, 167.60890197753906, 27.83562469482422, 496.340576171875, -219.2036895751953, 248.75091552734375, -166.5244903564453, 130.42759704589844, -69.83126831054688, 103.30020141601562, 38.935752868652344, 285.5025939941406, 431.2122802734375, -86.7816162109375, 118.9126205444336, 610.196533203125, -38.29800033569336, 84.51693725585938, 308.7828369140625, 186.79562377929688, 1.1436710357666016, 196.0484161376953, 460.7570495605469, -39.190765380859375, 166.2810821533203, 82.46040344238281, 275.7551574707031, -26.199996948242188, 143.2711639404297, 307.12908935546875, 359.058837890625, -84.86901092529297, 106.28507995605469, 13.482671737670898, 166.221923828125, 534.7127685546875, 289.1413269042969, 180.06739807128906, 7.59466552734375, 182.2935791015625, -164.54376220703125, 291.85980224609375, -142.7960205078125, -116.90701293945312, 575.9688720703125, 218.98687744140625, 131.15017700195312, 400.63726806640625, -24.161224365234375, 203.68243408203125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000274.npy"}
{"epoch": 0.4023494860499266, "step": 275, "batch_size": 64, "mean": 154.61358642578125, "std": 201.15040588378906, "min": -235.81179809570312, "p10": -80.92010116577148, "median": 164.9320297241211, "p90": 445.9409454345704, "max": 614.6110229492188, "pos_frac": 0.78125, "sample": [197.30628967285156, 49.54771423339844, 306.7838134765625, 14.529117584228516, 12.335317611694336, -118.75312042236328, 406.5797424316406, 166.59364318847656, 503.7972106933594, -170.58517456054688, 272.8876953125, 450.8606872558594, 529.7559814453125, 79.31732940673828, 173.75994873046875, 275.4976806640625, -39.76742935180664, 6.7401123046875, 152.26760864257812, 193.8524932861328, -155.06106567382812, 200.57876586914062, 458.8600158691406, -10.968101501464844, 6.658363342285156, -218.52127075195312, 320.7065124511719, 163.27041625976562, 201.2745361328125, 59.38311004638672, -78.16178131103516, 44.322845458984375, 373.0579833984375, 230.86309814453125, -5.989076614379883, 36.569854736328125, -205.78341674804688, -80.92242431640625, -42.931190490722656, 28.400985717773438, 75.60238647460938, 402.36944580078125, -235.81179809570312, 181.39552307128906, 235.3704376220703, 454.4582214355469, 206.38430786132812, 614.6110229492188, 234.62924194335938, 477.67889404296875, 434.4615478515625, -35.41168212890625, 232.36422729492188, 367.38433837890625, 73.42188262939453, 362.2297058105469, 125.77067565917969, 364.7292175292969, -80.91468048095703, 201.10987854003906, 49.22962188720703, 264.7818603515625, 50.25848388671875, 50.252288818359375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000275.npy"}
{"epoch": 0.40381791483113066, "step": 276, "batch_size": 64, "mean": 152.5404815673828, "std": 214.0045928955078, "min": -278.7015380859375, "p10": -65.63937072753905, "median": 151.79047393798828, "p90": 429.3804321289063, "max": 821.0494995117188, "pos_frac": 0.765625, "sample": [114.2452163696289, 821.0494995117188, 157.4183807373047, 412.67254638671875, 465.1027526855469, 2.9221553802490234, -24.370086669921875, 603.1541748046875, 27.37971305847168, 172.1758270263672, 174.88592529296875, 30.58527374267578, -35.501708984375, 243.36073303222656, 302.0948791503906, 318.2462463378906, 195.633056640625, -278.7015380859375, 140.1772918701172, -5.542079925537109, 32.73188400268555, 364.25579833984375, 182.08175659179688, 351.2379455566406, -72.06561279296875, -237.34249877929688, -12.565967559814453, 63.22639083862305, 203.68075561523438, 347.1820983886719, -50.644805908203125, 212.783203125, 48.27434539794922, 157.53228759765625, 339.9962158203125, 274.76678466796875, -210.3614501953125, -227.74021911621094, -2.1285457611083984, -15.25839614868164, -140.4033660888672, 56.410430908203125, 46.68368148803711, 450.2413635253906, 75.92435455322266, 146.16256713867188, 226.5317840576172, 216.51524353027344, 307.19976806640625, 28.530067443847656, 475.5660400390625, -47.04261779785156, 135.9716796875, 248.07284545898438, 6.652252197265625, 239.98753356933594, 170.4216766357422, 180.20285034179688, 410.12457275390625, 80.51885986328125, -198.83917236328125, 586.7429809570312, 37.24440002441406, 436.54095458984375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000276.npy"}
{"epoch": 0.4052863436123348, "step": 277, "batch_size": 64, "mean": 162.13218688964844, "std": 206.83079528808594, "min": -328.0587158203125, "p10": -82.84592132568358, "median": 156.28082275390625, "p90": 462.5413391113282, "max": 553.4262084960938, "pos_frac": 0.765625, "sample": [-5.038974761962891, 320.77679443359375, -141.97702026367188, 156.14071655273438, 38.64004135131836, 291.1828918457031, 377.61553955078125, 173.02392578125, -67.37223815917969, 224.49197387695312, 209.1851806640625, -36.256996154785156, 366.82891845703125, -66.1336441040039, 236.6531524658203, 356.38232421875, 33.74785614013672, 125.47390747070312, 204.74539184570312, 327.38299560546875, -88.2046127319336, 429.1083068847656, 80.22871398925781, 49.950904846191406, -226.27972412109375, -70.3423080444336, 517.478759765625, 57.010009765625, -9.838836669921875, 181.07730102539062, -328.0587158203125, 474.839111328125, 172.0609130859375, 309.94256591796875, -151.3909912109375, -163.69073486328125, 553.4262084960938, 304.4037780761719, 146.78897094726562, -41.08856201171875, 253.9718017578125, 27.652267456054688, 538.4860229492188, 24.232715606689453, 12.43472671508789, 125.3167495727539, 213.08154296875, 406.2060546875, 414.7273254394531, 510.0994873046875, -154.5390625, 290.5504455566406, 74.24835968017578, 92.73267364501953, 13.343017578125, -1.5220146179199219, 266.5811767578125, 156.42092895507812, 471.2218933105469, 478.31512451171875, 107.2828369140625, 175.8638916015625, 114.55081939697266, 442.2867126464844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000277.npy"}
{"epoch": 0.4067547723935389, "step": 278, "batch_size": 64, "mean": 184.778076171875, "std": 208.6544647216797, "min": -138.26394653320312, "p10": -49.93593254089355, "median": 134.70978546142578, "p90": 498.3366149902344, "max": 642.9695434570312, "pos_frac": 0.78125, "sample": [-121.66213989257812, 377.2962951660156, -74.18346405029297, 33.642459869384766, 332.0265808105469, 334.173828125, 238.67922973632812, 49.72858810424805, 182.46238708496094, 114.25394439697266, 293.894287109375, 178.2991943359375, 69.76227569580078, -64.06155395507812, 66.7901611328125, 244.0432586669922, -9.98077392578125, 329.9031066894531, 490.6864318847656, 8.708244323730469, -1.3596973419189453, 617.2282104492188, 423.65460205078125, 220.47177124023438, 292.01422119140625, 309.10211181640625, 105.81018829345703, -31.674781799316406, 188.20474243164062, 543.6492919921875, 501.6152648925781, -89.13236999511719, 98.85787963867188, 59.23541259765625, 595.2991943359375, 483.14654541015625, 30.129295349121094, -45.909423828125, 106.31731414794922, 232.31561279296875, 79.86441802978516, 451.4788818359375, 189.27731323242188, 57.787811279296875, 43.33624267578125, 543.4693603515625, 120.38233184814453, 253.35606384277344, -39.61557388305664, 598.3692626953125, 202.65245056152344, 147.14710998535156, 12.429351806640625, 23.37432098388672, 367.3653259277344, 290.9150390625, -38.19361114501953, -13.373098373413086, 642.9695434570312, -138.26394653320312, 375.3578186035156, 122.2724609375, -128.33798217773438, -51.66157913208008], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000278.npy"}
{"epoch": 0.40822320117474303, "step": 279, "batch_size": 64, "mean": 183.02891540527344, "std": 238.5940399169922, "min": -366.0460510253906, "p10": -64.06897315979003, "median": 151.6808319091797, "p90": 499.1227447509766, "max": 864.4644775390625, "pos_frac": 0.796875, "sample": [71.88532257080078, 61.00413513183594, 34.0207405090332, 50.105987548828125, -51.49432373046875, -38.18944549560547, 441.66015625, 522.4659423828125, 15.077018737792969, -54.82892990112305, 30.33330535888672, 295.9586181640625, 161.64950561523438, 48.072998046875, 502.2455139160156, 620.0469970703125, 60.28413009643555, 174.47293090820312, 272.40338134765625, 266.7622985839844, 692.7233276367188, 74.55632781982422, 25.3079891204834, -279.04364013671875, 45.60844421386719, 87.9251937866211, -43.182220458984375, 195.8795166015625, 116.38131713867188, 594.4354248046875, 141.712158203125, 35.804893493652344, 234.576171875, -30.275259017944336, 275.34503173828125, 469.81756591796875, 242.59942626953125, 864.4644775390625, 494.8057861328125, 303.0653076171875, -37.19667053222656, -68.02899169921875, -178.22048950195312, -366.0460510253906, 169.0461883544922, 500.9728698730469, 111.61297607421875, 280.9217529296875, 373.98028564453125, 130.8948211669922, 408.8126220703125, 326.7928466796875, 99.3132095336914, -112.4879150390625, 489.4081726074219, 222.584228515625, 461.9833984375, 261.3966064453125, -106.41909790039062, -164.21058654785156, 355.697998046875, 256.94769287109375, 3.3300132751464844, 266.3208312988281], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000279.npy"}
{"epoch": 0.40969162995594716, "step": 280, "batch_size": 64, "mean": 188.41152954101562, "std": 193.22438049316406, "min": -289.269775390625, "p10": -79.19318389892578, "median": 171.1201400756836, "p90": 425.2998504638672, "max": 562.051513671875, "pos_frac": 0.8125, "sample": [145.6171112060547, 307.07073974609375, 135.492431640625, 242.4660186767578, 38.39454650878906, -52.054237365722656, 401.0702209472656, 508.56378173828125, 529.1821899414062, 52.93536376953125, 225.5801239013672, 2.0902481079101562, -76.82191467285156, 371.6749267578125, 408.0748291015625, 381.39520263671875, 131.30064392089844, 310.3057861328125, 159.6600341796875, 292.23193359375, 96.89424133300781, 556.6629638671875, 464.1970520019531, 183.60704040527344, 275.0807189941406, 81.40093994140625, 363.5760498046875, 327.7581787109375, -1.9627532958984375, 253.71142578125, 178.48477172851562, 160.76040649414062, 562.051513671875, 163.50384521484375, -46.32110595703125, 149.71592712402344, -154.9906005859375, 384.4410095214844, 239.2554168701172, 87.32315826416016, 253.2433624267578, 117.77188873291016, -109.83952331542969, 561.0028686523438, -80.20944213867188, -289.269775390625, 143.14968872070312, 422.7155456542969, 174.76531982421875, 111.63597869873047, 203.67868041992188, 426.40740966796875, -82.42173767089844, 167.47496032714844, -12.360565185546875, -85.79344940185547, 190.02044677734375, 129.94834899902344, 99.47702026367188, 338.90447998046875, 301.21246337890625, -159.4593505859375, 58.49549865722656, 338.4075927734375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000280.npy"}
{"epoch": 0.4111600587371512, "step": 281, "batch_size": 64, "mean": 172.38308715820312, "std": 191.71197509765625, "min": -590.4569091796875, "p10": -26.20276069641111, "median": 160.81727600097656, "p90": 406.65272521972656, "max": 580.1284790039062, "pos_frac": 0.859375, "sample": [110.9264144897461, 307.0739440917969, 144.36036682128906, 258.3740234375, 56.25113296508789, 197.47564697265625, 62.74760437011719, 216.32177734375, 183.54774475097656, 73.58773040771484, 278.7618103027344, 381.7804260253906, 274.85015869140625, 87.51874542236328, 251.07835388183594, -91.26531982421875, 177.27418518066406, 580.1284790039062, 52.21282958984375, 140.56869506835938, 33.6966552734375, 404.64520263671875, 273.456787109375, 561.662353515625, 209.3128662109375, 266.6348571777344, 200.27099609375, 304.30450439453125, -35.2230110168457, 575.48974609375, 323.19500732421875, 109.96127319335938, 187.7095947265625, 443.603515625, 187.95758056640625, -590.4569091796875, 93.63444519042969, 96.50498962402344, 115.6147689819336, 72.58145141601562, 504.46331787109375, 11.029220581054688, 1.0282440185546875, 272.56707763671875, 181.29055786132812, 258.6119079589844, 63.52116394042969, 132.72352600097656, 512.1473999023438, 110.62088775634766, -83.77759552001953, 135.35467529296875, -105.84208679199219, 88.53570556640625, -72.92572021484375, -5.155509948730469, -64.94705200195312, 387.9942626953125, 221.22593688964844, 77.6411361694336, 128.92208862304688, 296.566162109375, -4.721933364868164, 407.5130920410156], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000281.npy"}
{"epoch": 0.41262848751835535, "step": 282, "batch_size": 64, "mean": 128.67344665527344, "std": 215.21070861816406, "min": -213.01434326171875, "p10": -113.1386100769043, "median": 110.91732788085938, "p90": 365.9270050048828, "max": 1157.354736328125, "pos_frac": 0.75, "sample": [477.5335998535156, 148.51373291015625, 87.66275024414062, -213.01434326171875, 174.6505889892578, 221.52655029296875, 92.38702392578125, 38.397216796875, 104.178466796875, 371.37921142578125, 261.7256164550781, 206.048095703125, 316.327880859375, 81.06377410888672, -25.754806518554688, 210.00146484375, 1157.354736328125, 147.02230834960938, 220.17330932617188, 117.65618896484375, 150.32452392578125, 186.38519287109375, 25.806133270263672, -125.0218505859375, 527.6550903320312, 365.8197021484375, 94.46072387695312, 180.15089416503906, 150.1770477294922, 517.3825073242188, 257.87322998046875, 211.15985107421875, 314.97210693359375, -26.099639892578125, -13.998126983642578, 136.72779846191406, -112.84147644042969, -103.45929718017578, 67.72579956054688, -131.71368408203125, -46.7659912109375, 36.686614990234375, 144.37596130371094, 135.98806762695312, 247.94517517089844, 8.611732482910156, -159.89743041992188, 407.62298583984375, 195.64456176757812, 140.5230712890625, -65.74706268310547, -57.89662170410156, 25.662185668945312, 52.9522590637207, 99.67127990722656, -198.43789672851562, 306.6724853515625, 3.1445140838623047, -113.26595306396484, 365.9729919433594, -200.51605224609375, 31.929439544677734, -56.83397674560547, 62.73838806152344], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000282.npy"}
{"epoch": 0.41409691629955947, "step": 283, "batch_size": 64, "mean": 202.11631774902344, "std": 204.2007598876953, "min": -388.81158447265625, "p10": -30.766219139099114, "median": 190.32743072509766, "p90": 483.0229125976563, "max": 632.6588134765625, "pos_frac": 0.84375, "sample": [-124.336181640625, -60.041160583496094, 193.1650848388672, 25.4094295501709, 326.2392883300781, 323.7885437011719, -48.133544921875, 264.2702941894531, 414.1481018066406, 47.09040069580078, 52.31999969482422, 163.9982452392578, -16.189205169677734, 42.33699035644531, 87.95317077636719, 194.97706604003906, -388.81158447265625, 50.02065658569336, 415.31396484375, -38.301841735839844, 506.9492492675781, 419.7395935058594, 114.83296203613281, 329.84698486328125, 75.21778869628906, 208.897216796875, -24.127595901489258, 486.2305908203125, 404.95965576171875, 497.6031494140625, 40.83746337890625, 277.0514831542969, -121.06465148925781, 231.09634399414062, 440.73193359375, 54.74641418457031, 130.12197875976562, 3.808879852294922, 99.41202545166016, -5.31828498840332, 475.538330078125, 232.0784454345703, 321.264404296875, 210.08990478515625, 299.7696838378906, 180.21694946289062, 114.86095428466797, 187.48977661132812, 428.07537841796875, 336.8956298828125, 632.6588134765625, 31.737510681152344, 402.002685546875, 172.24819946289062, 65.66492462158203, 286.2392883300781, 583.59814453125, 204.59324645996094, 353.19183349609375, 613.7677001953125, 502.54541015625, 98.57048797607422, 139.1671142578125, -33.61134338378906], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000283.npy"}
{"epoch": 0.4155653450807636, "step": 284, "batch_size": 64, "mean": 187.474365234375, "std": 205.45692443847656, "min": -200.23602294921875, "p10": -24.075717926025376, "median": 161.27429962158203, "p90": 456.7031311035159, "max": 774.213623046875, "pos_frac": 0.875, "sample": [-85.65127563476562, 94.06103515625, 304.50628662109375, 39.65142059326172, 123.1689224243164, 60.33171844482422, 373.13763427734375, 17.20452880859375, 264.0018310546875, 386.7843933105469, 365.30120849609375, -45.725650787353516, 486.6683044433594, 28.50213050842285, 299.7825012207031, 64.46717834472656, 49.430564880371094, 268.10272216796875, 281.6265563964844, 505.8623962402344, 36.035125732421875, 704.0570068359375, 186.48556518554688, 286.1947021484375, 162.77359008789062, 20.901504516601562, 159.77500915527344, 75.46893310546875, 288.75482177734375, -30.748779296875, 48.37522888183594, 743.4730834960938, 197.4092254638672, -89.06438446044922, 193.83743286132812, 106.93670654296875, -93.47721099853516, 53.925804138183594, 217.05523681640625, 774.213623046875, 36.78697204589844, 174.41091918945312, 192.79986572265625, 7.660417556762695, -11.16009521484375, -200.23602294921875, 322.6349182128906, 324.7635803222656, 86.19371032714844, 274.2980651855469, 569.505859375, 156.29562377929688, 297.2348327636719, 236.2311553955078, 84.44732666015625, 11.351390838623047, -29.610984802246094, 9.984123229980469, 344.2763671875, 202.0561065673828, 4.451240539550781, 570.2861328125, 47.749351501464844, 362.3519592285156], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000284.npy"}
{"epoch": 0.4170337738619677, "step": 285, "batch_size": 64, "mean": 161.8185272216797, "std": 190.46876525878906, "min": -477.16314697265625, "p10": -46.52246589660644, "median": 160.80523681640625, "p90": 364.965869140625, "max": 593.364990234375, "pos_frac": 0.84375, "sample": [366.14825439453125, 219.27127075195312, 44.5760498046875, 362.20697021484375, 115.80579376220703, 121.94393920898438, 158.33316040039062, 68.29547882080078, 158.97776794433594, 552.3538818359375, 272.14984130859375, 162.63270568847656, 341.8557434082031, 120.90882110595703, 166.9268035888672, 570.8323974609375, 36.98565673828125, 102.1373291015625, 129.72903442382812, 253.82540893554688, 437.2720031738281, -248.79751586914062, 90.0230941772461, 298.83795166015625, 437.3023986816406, 22.21656036376953, -63.058475494384766, 86.5831298828125, 101.4798583984375, 39.923011779785156, 262.26483154296875, 154.6422882080078, -39.974708557128906, 61.304500579833984, 281.02740478515625, 335.26739501953125, 36.61176300048828, -477.16314697265625, 246.60845947265625, -28.465972900390625, 178.61260986328125, 30.193603515625, 593.364990234375, 270.14251708984375, -27.377769470214844, 318.7286682128906, 256.0265808105469, 163.65818786621094, 259.9732360839844, 343.1338806152344, 188.46832275390625, -151.98309326171875, 278.7312927246094, 223.51748657226562, -168.6107940673828, -49.32864761352539, 439.79583740234375, 95.89730834960938, 89.51509857177734, 185.2433624267578, -151.52932739257812, 299.7476806640625, 197.50045776367188, 133.16336059570312], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000285.npy"}
{"epoch": 0.4185022026431718, "step": 286, "batch_size": 64, "mean": 183.6310577392578, "std": 223.95550537109375, "min": -361.1418762207031, "p10": -86.11129150390624, "median": 182.88518524169922, "p90": 480.77653808593755, "max": 693.0799560546875, "pos_frac": 0.78125, "sample": [61.090965270996094, -67.15184783935547, 496.9655456542969, 149.87254333496094, 151.44175720214844, 185.46127319335938, 53.03905487060547, 217.79701232910156, 328.1473693847656, -361.1418762207031, 312.881591796875, 182.96231079101562, 50.03422546386719, -70.75096893310547, 438.9031677246094, 226.05029296875, 576.9376220703125, 2.8465576171875, 232.215576171875, 191.28016662597656, 473.89599609375, 82.40753173828125, -273.1888732910156, 432.80645751953125, 373.9725341796875, -108.14508056640625, -80.44496154785156, 418.06036376953125, 329.0870666503906, 532.5743408203125, 494.9239501953125, -168.623291015625, 228.73312377929688, 64.10446166992188, -24.499465942382812, 62.37345886230469, 456.4951171875, 151.35572814941406, 36.993682861328125, -70.87693786621094, 182.8080596923828, -28.33348846435547, 236.56011962890625, -88.53971862792969, 115.10055541992188, 400.40411376953125, 453.0249938964844, 141.06427001953125, -115.04208374023438, 618.693115234375, -72.41590118408203, 237.30697631835938, 72.82179260253906, 483.725341796875, 89.72845458984375, 314.717041015625, 113.91122436523438, 269.97412109375, 247.3699951171875, -111.87969970703125, 693.0799560546875, 356.3412170410156, 263.79144287109375, 107.2883071899414], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000286.npy"}
{"epoch": 0.4199706314243759, "step": 287, "batch_size": 64, "mean": 146.35472106933594, "std": 199.4036102294922, "min": -265.8547668457031, "p10": -67.4243293762207, "median": 113.30455780029297, "p90": 451.9771453857423, "max": 763.91845703125, "pos_frac": 0.78125, "sample": [107.25636291503906, 41.869537353515625, 20.05495262145996, 74.63787078857422, 137.63685607910156, -67.71047973632812, 283.2975769042969, 559.2275390625, 148.3527374267578, 126.02420043945312, 527.1482543945312, 87.13722229003906, 182.31040954589844, 519.4249267578125, -68.42027282714844, -55.112396240234375, 25.43798065185547, 506.34051513671875, 113.76556396484375, 317.5479736328125, 189.40679931640625, 7.000738143920898, 123.37732696533203, -265.8547668457031, 763.91845703125, 79.67399597167969, 64.6466064453125, -25.08441162109375, -13.223922729492188, -3.3899612426757812, 146.2818145751953, -3.4650955200195312, -104.85406494140625, 127.56404113769531, 77.91670227050781, 217.95823669433594, 65.73870849609375, 252.56764221191406, 112.84355163574219, 245.9529266357422, -12.052431106567383, 285.9224853515625, 166.33245849609375, 105.48500061035156, 12.051467895507812, 133.0697479248047, 85.4324951171875, 17.140853881835938, 461.2401123046875, 427.915771484375, -164.92990112304688, 42.975975036621094, 208.6884002685547, 97.25679779052734, 430.3635559082031, -202.11941528320312, -154.6018829345703, -66.75664520263672, 210.25613403320312, 305.934814453125, 493.03472900390625, 394.8429260253906, 202.45562744140625, 241.56072998046875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000287.npy"}
{"epoch": 0.42143906020558003, "step": 288, "batch_size": 64, "mean": 192.16741943359375, "std": 203.39080810546875, "min": -309.9934997558594, "p10": -17.45656929016113, "median": 175.23712158203125, "p90": 445.1930114746095, "max": 721.9754638671875, "pos_frac": 0.875, "sample": [199.81124877929688, 174.15640258789062, -15.74636459350586, 218.25537109375, 350.7589111328125, 414.47491455078125, 15.049665451049805, 721.9754638671875, 23.901445388793945, 389.1675720214844, 237.97683715820312, 133.67166137695312, -110.75586700439453, 15.736251831054688, 92.45364379882812, 190.99534606933594, 130.99435424804688, 414.1106262207031, 212.70437622070312, 378.5785827636719, 176.1436309814453, 298.02423095703125, 308.81353759765625, -18.18951416015625, 98.75563049316406, 211.53079223632812, 171.53265380859375, 49.961631774902344, 174.34872436523438, 37.26759338378906, 198.22525024414062, 218.38787841796875, 602.309814453125, 162.21035766601562, 458.35791015625, 318.31573486328125, 176.12551879882812, 81.6409912109375, 204.88140869140625, 99.0013198852539, -309.9934997558594, 598.9945068359375, 98.06048583984375, 307.11181640625, 75.79658508300781, 164.74880981445312, 97.72766876220703, 40.61993408203125, 536.8348388671875, 112.93804168701172, 233.08566284179688, 621.843505859375, 255.73977661132812, -302.1202392578125, 105.72750854492188, 509.70123291015625, 315.4812927246094, 62.57405090332031, 59.715972900390625, 371.506591796875, -19.036048889160156, 393.9222412109375, -142.09812927246094, -106.08306884765625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000288.npy"}
{"epoch": 0.42290748898678415, "step": 289, "batch_size": 64, "mean": 214.66830444335938, "std": 231.7854766845703, "min": -173.52182006835938, "p10": -49.78528976440429, "median": 149.44650268554688, "p90": 569.4821350097658, "max": 722.9451904296875, "pos_frac": 0.828125, "sample": [156.56954956054688, -21.34210205078125, 700.7098388671875, 716.3233642578125, -173.52182006835938, 588.084716796875, 408.6158447265625, 299.2536926269531, 87.02853393554688, 593.6090087890625, 532.9522094726562, 415.0921630859375, 264.27496337890625, 493.16461181640625, 382.22344970703125, 304.64984130859375, 83.03318786621094, 722.9451904296875, -58.021446228027344, 289.8014831542969, 106.21478271484375, 25.135068893432617, 125.05557250976562, -41.71963119506836, -117.126220703125, 46.43221664428711, 290.533935546875, 458.7155456542969, 66.13958740234375, 135.5305938720703, 416.9956970214844, 620.7914428710938, 368.9347839355469, -53.242000579833984, 54.780860900878906, 10.285476684570312, 585.1378173828125, 84.48768615722656, -103.2039794921875, 17.50501823425293, 104.38970947265625, 483.5543212890625, -40.374481201171875, 252.03125, 190.8955841064453, 447.16278076171875, 404.78204345703125, -0.8350811004638672, 137.43426513671875, -104.9012451171875, 130.75953674316406, 214.40576171875, 90.80963134765625, 28.332029342651367, 451.1241149902344, 186.90774536132812, 21.138113021850586, 20.664878845214844, 142.32345581054688, 216.18295288085938, 320.5584411621094, -159.55438232421875, 256.9124450683594, 61.237510681152344], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000289.npy"}
{"epoch": 0.4243759177679883, "step": 290, "batch_size": 64, "mean": 214.46087646484375, "std": 262.9317626953125, "min": -332.73394775390625, "p10": -59.35904350280761, "median": 190.9147720336914, "p90": 620.996551513672, "max": 986.230224609375, "pos_frac": 0.828125, "sample": [545.9132080078125, 176.87860107421875, 130.36306762695312, 150.5413818359375, -332.73394775390625, 395.6097106933594, 462.10943603515625, -102.91729736328125, 23.849777221679688, 133.19281005859375, -203.1578369140625, 405.76104736328125, 231.599853515625, 45.694305419921875, 245.12744140625, 220.32867431640625, 482.33172607421875, -24.06903076171875, 253.36593627929688, 15.997928619384766, 64.92127990722656, 35.61277770996094, -151.99563598632812, 150.52349853515625, 134.5745849609375, -293.66845703125, 136.05853271484375, 298.45599365234375, 986.230224609375, 758.78271484375, 85.4864501953125, 254.4199981689453, 295.0576171875, 634.628662109375, 183.033935546875, 41.44496154785156, 270.4801940917969, 124.51057434082031, 275.6055603027344, 233.82382202148438, 15.96417236328125, 199.96351623535156, 198.7956085205078, 345.26361083984375, 56.18489074707031, 288.1876220703125, -35.290794372558594, -61.99542236328125, 301.27008056640625, -107.2264404296875, 651.0051879882812, 432.3260498046875, 268.8081359863281, 665.0113525390625, 22.175098419189453, 648.7590942382812, 589.1882934570312, 326.5758361816406, -7.229663848876953, 43.346588134765625, 51.23215103149414, 851.5377197265625, 261.0777587890625, -53.20749282836914], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000290.npy"}
{"epoch": 0.42584434654919234, "step": 291, "batch_size": 64, "mean": 195.4530487060547, "std": 231.75222778320312, "min": -444.0740661621094, "p10": -52.85432739257812, "median": 201.68061065673828, "p90": 472.82214050292976, "max": 673.6700439453125, "pos_frac": 0.78125, "sample": [210.08445739746094, 198.13035583496094, -17.599258422851562, 46.48877716064453, 9.637969970703125, 280.1026611328125, 672.28076171875, 304.92535400390625, 425.6654052734375, 281.72802734375, 58.00929260253906, 439.97576904296875, 299.041015625, -36.330894470214844, 610.4765625, -37.4615478515625, 428.6099853515625, 235.3179473876953, -49.640785217285156, 254.52830505371094, 447.36322021484375, 9.528945922851562, 306.15509033203125, 85.99197387695312, 281.1480712890625, 481.403076171875, 156.76956176757812, 137.27232360839844, 405.2153625488281, 32.916927337646484, 324.6766357421875, 85.62464141845703, 351.730224609375, -242.11680603027344, 6.103851318359375, -172.59259033203125, -80.24996185302734, 62.920982360839844, -444.0740661621094, 673.6700439453125, 341.8729248046875, 489.6469421386719, 422.17376708984375, 265.25836181640625, -106.69917297363281, -31.780105590820312, -54.23155975341797, 388.40118408203125, 60.8065185546875, 161.68228149414062, 99.96751403808594, 158.52365112304688, 356.309814453125, 319.3695373535156, -1.173004150390625, 205.23086547851562, 452.7999572753906, 76.28144836425781, 570.6500244140625, -30.00304412841797, -125.17284393310547, 402.66473388671875, 560.755126953125, 2.231931686401367], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000291.npy"}
{"epoch": 0.42731277533039647, "step": 292, "batch_size": 64, "mean": 207.61753845214844, "std": 271.2224426269531, "min": -685.923095703125, "p10": -71.76386756896972, "median": 195.58016967773438, "p90": 543.1191284179688, "max": 869.2666015625, "pos_frac": 0.796875, "sample": [241.07415771484375, 38.865238189697266, -1.73876953125, 191.87911987304688, 282.9800109863281, 188.78439331054688, 127.16571044921875, 125.62328338623047, 194.65872192382812, -685.923095703125, 766.90576171875, 72.40096282958984, 644.3621826171875, 130.60336303710938, 358.9617614746094, -78.67388916015625, -274.94818115234375, 443.31744384765625, 34.257545471191406, 339.8623352050781, 23.011024475097656, 826.38134765625, 477.2488708496094, 210.16189575195312, 445.3210144042969, 524.4666748046875, 289.2704162597656, 518.2183837890625, 176.94732666015625, 114.06573486328125, -137.4622802734375, 283.4820861816406, 10.408651351928711, 504.966064453125, -10.838996887207031, 144.11279296875, -89.30511474609375, 178.1944580078125, 93.90950012207031, 130.66355895996094, 37.56292724609375, 566.5867309570312, 225.71371459960938, 198.60887145996094, 200.02099609375, 201.39871215820312, 480.0013122558594, 367.3890380859375, 869.2666015625, -26.172883987426758, 196.50161743164062, 327.53289794921875, 253.1267547607422, -229.6030731201172, 236.67674255371094, -200.49017333984375, 701.7857666015625, -55.64048385620117, -40.192623138427734, 551.113037109375, 238.48793029785156, 225.68234252929688, 124.62263488769531, -16.097658157348633], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000292.npy"}
{"epoch": 0.4287812041116006, "step": 293, "batch_size": 64, "mean": 164.92308044433594, "std": 252.12110900878906, "min": -311.18634033203125, "p10": -124.86650009155272, "median": 140.5926284790039, "p90": 471.1249694824219, "max": 967.2957763671875, "pos_frac": 0.734375, "sample": [38.24089431762695, 51.8184814453125, 27.96332359313965, 57.92432403564453, 251.16342163085938, 224.42225646972656, 478.8151550292969, 172.66099548339844, 472.001220703125, 53.454795837402344, 469.08038330078125, -42.54438781738281, 232.34710693359375, -3.251293182373047, 967.2957763671875, -258.9476623535156, 294.9195861816406, 196.56195068359375, 91.73074340820312, 274.9461975097656, 378.4616394042969, -155.36782836914062, 271.8227844238281, -66.56397247314453, 44.237083435058594, 97.69242095947266, -120.67549896240234, 163.40396118164062, -12.784805297851562, 525.776123046875, 455.52117919921875, 358.170166015625, 331.6490478515625, 417.5854797363281, 307.2590637207031, -166.0266876220703, 59.51429748535156, 164.68724060058594, 185.22259521484375, -34.247520446777344, -66.8637466430664, 94.75629425048828, 5.3079833984375, 288.9297180175781, 297.4083251953125, 387.8624572753906, 226.2095947265625, 325.93310546875, 117.78129577636719, -280.349609375, 7.482208251953125, -311.18634033203125, -126.66264343261719, 493.07049560546875, 908.775634765625, 96.41443634033203, -102.26773071289062, 165.11419677734375, 78.80557250976562, -76.09373474121094, -154.34234619140625, 394.8070373535156, 568.3397827148438, -40.09514617919922], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000293.npy"}
{"epoch": 0.4302496328928047, "step": 294, "batch_size": 64, "mean": 177.8052978515625, "std": 242.58016967773438, "min": -291.1479187011719, "p10": -113.72974548339839, "median": 106.464599609375, "p90": 514.6378021240236, "max": 868.402587890625, "pos_frac": 0.859375, "sample": [56.33881378173828, 83.73480224609375, 345.45574951171875, 428.7718200683594, 87.34390258789062, 40.13984298706055, -171.89505004882812, 139.30706787109375, 5.9461669921875, 72.7282485961914, 24.290084838867188, 28.391311645507812, -64.33221435546875, 551.9327392578125, 48.97901153564453, 315.28106689453125, 185.0071258544922, 422.49847412109375, 50.395328521728516, 161.9477081298828, 698.184326171875, -170.654541015625, 263.6329650878906, 91.53720092773438, 662.7819213867188, 479.3730163574219, 320.6432800292969, 150.1133270263672, 170.92532348632812, -134.90011596679688, 276.2663269042969, 345.8725280761719, 288.4134521484375, 868.402587890625, -259.44677734375, -215.9292755126953, 612.5325927734375, 255.52574157714844, 453.5721130371094, 25.18531036376953, -291.1479187011719, -178.61370849609375, 275.8778381347656, 310.3968811035156, 622.3729248046875, 109.88316345214844, 24.452518463134766, 141.15794372558594, 529.7512817382812, 29.39799690246582, 351.3273010253906, 361.4880065917969, 36.41638946533203, 149.49510192871094, 34.65903854370117, 75.24755859375, 50.9437370300293, 51.49292755126953, -12.708942413330078, 103.04603576660156, 477.31585693359375, 87.13076782226562, 0.434234619140625, 45.429115295410156], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000294.npy"}
{"epoch": 0.43171806167400884, "step": 295, "batch_size": 64, "mean": 197.43405151367188, "std": 253.7707061767578, "min": -305.055419921875, "p10": -113.78769302368161, "median": 189.58729553222656, "p90": 524.2405395507812, "max": 945.85009765625, "pos_frac": 0.765625, "sample": [-250.03134155273438, -22.83228874206543, 612.2168579101562, 12.600933074951172, -305.055419921875, 212.62301635742188, -158.24977111816406, 324.885986328125, 256.7487487792969, -275.3973693847656, 329.2596435546875, -11.258682250976562, 168.99343872070312, 378.2059326171875, 408.0956726074219, 273.3714599609375, 239.23365783691406, 313.2451477050781, -66.6397933959961, 81.02108001708984, 356.5286865234375, -60.62895965576172, 181.8663330078125, 384.69952392578125, 19.13440704345703, 141.5693359375, 521.703125, -123.75559997558594, 706.6602783203125, 173.91818237304688, 399.4137878417969, 50.795684814453125, -6.417057037353516, -168.24676513671875, 486.87322998046875, 581.8163452148438, 110.78694915771484, -90.52924346923828, -210.4651641845703, 90.66258239746094, 250.6005859375, 426.0126953125, 392.04473876953125, 113.09815979003906, -47.06037139892578, 90.28814697265625, 237.27650451660156, 72.16873931884766, 197.82540893554688, 544.408447265625, 361.0171813964844, 388.1258544921875, 13.641326904296875, 945.85009765625, 525.3280029296875, 441.4225769042969, 197.30825805664062, 216.76321411132812, 224.26748657226562, 163.57176208496094, 44.700477600097656, 109.15455627441406, 670.64404296875, -10.101768493652344], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000295.npy"}
{"epoch": 0.4331864904552129, "step": 296, "batch_size": 64, "mean": 139.83859252929688, "std": 266.1891174316406, "min": -682.4444580078125, "p10": -208.20932159423825, "median": 131.8069610595703, "p90": 502.35405883789065, "max": 801.8712158203125, "pos_frac": 0.75, "sample": [116.28304290771484, 82.66825103759766, 159.67730712890625, 801.8712158203125, -16.24847412109375, 532.413818359375, -220.66357421875, -48.75773620605469, -54.070152282714844, -682.4444580078125, 131.7735595703125, 213.18475341796875, -336.5032958984375, 212.67433166503906, 161.9779510498047, -282.2481994628906, 162.50379943847656, 380.35491943359375, 625.9559326171875, 408.6953125, 237.76959228515625, 308.965087890625, 19.654258728027344, -69.87933349609375, 106.49606323242188, 361.57080078125, 67.60067749023438, -236.8286590576172, 7.2965087890625, -179.14939880371094, 547.155029296875, 19.930740356445312, 249.4917755126953, 81.3759994506836, 214.05599975585938, 516.646240234375, 162.5952606201172, 110.69984436035156, -321.903076171875, 246.76296997070312, -278.94525146484375, 306.6937255859375, -115.3575210571289, -76.4774169921875, 503.7159423828125, 122.58988189697266, 125.21788787841797, 243.85511779785156, 155.53125, 173.28720092773438, 131.84036254882812, -31.878990173339844, 12.601722717285156, 796.6954956054688, 215.4396209716797, 210.57183837890625, 101.1867446899414, 62.451805114746094, 499.17633056640625, -86.06950378417969, 130.13816833496094, 153.33456420898438, 390.8050537109375, 403.86199951171875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000296.npy"}
{"epoch": 0.434654919236417, "step": 297, "batch_size": 64, "mean": 159.44927978515625, "std": 204.6948699951172, "min": -265.00726318359375, "p10": -53.430959320068354, "median": 138.38317108154297, "p90": 459.02940368652355, "max": 655.461181640625, "pos_frac": 0.796875, "sample": [140.88153076171875, 272.876220703125, 337.5328674316406, 583.1945190429688, 6.218669891357422, 11.212579727172852, 398.50604248046875, 194.97998046875, -182.67613220214844, -55.22737503051758, 37.81396484375, 143.46163940429688, 255.92391967773438, 387.92120361328125, 335.34735107421875, -20.651453018188477, -25.7119140625, 57.11016845703125, -62.79736328125, -11.982192993164062, 137.81114196777344, -186.0616912841797, 117.58106994628906, 6.567869186401367, 528.3604125976562, 92.82077026367188, 329.87420654296875, 79.01715087890625, 232.83135986328125, 260.40625, 165.10772705078125, 138.9552001953125, 475.6966552734375, 169.11402893066406, 437.6387939453125, 58.15920639038086, 60.22868347167969, 7.394416809082031, 541.3237915039062, 267.7559509277344, 22.019851684570312, 193.4145965576172, -265.00726318359375, 352.45135498046875, 89.62527465820312, -40.510658264160156, 146.5029754638672, 73.30166625976562, 655.461181640625, -54.762847900390625, 165.27734375, 468.1968078613281, 212.17324829101562, -16.73724365234375, -224.0283203125, 380.8939208984375, 13.877235412597656, 97.85841369628906, 219.06430053710938, 569.8682861328125, 67.07373046875, 362.01397705078125, -50.323219299316406, 42.53242492675781], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000297.npy"}
{"epoch": 0.43612334801762115, "step": 298, "batch_size": 64, "mean": 208.72238159179688, "std": 301.7993469238281, "min": -629.9827270507812, "p10": -89.04871292114255, "median": 158.94509887695312, "p90": 634.1696899414064, "max": 1246.0810546875, "pos_frac": 0.71875, "sample": [114.71918487548828, -106.57650756835938, -3.9015235900878906, -629.9827270507812, 87.93496704101562, -111.93963623046875, 8.053361892700195, 750.5463256835938, 251.33084106445312, 8.0126953125, 179.9095458984375, 540.0556640625, 4.1733245849609375, -31.209823608398438, -114.87469482421875, 54.17772674560547, 159.56607055664062, 111.98487854003906, 89.79071807861328, 292.4972839355469, 799.945556640625, -8.08990478515625, 389.65802001953125, -233.76495361328125, 662.0947875976562, 309.7151184082031, -105.83357238769531, 558.8021850585938, 557.821044921875, 693.1470336914062, -98.50557708740234, -66.98269653320312, -51.71007537841797, 8.066238403320312, -21.479209899902344, 670.8812255859375, 653.8963012695312, 163.07460021972656, 384.96209716796875, 244.90896606445312, -16.907167434692383, -5.6640777587890625, 1246.0810546875, 224.9168701171875, 588.1409301757812, 176.77139282226562, 475.74432373046875, 466.44769287109375, -21.15078353881836, 100.09786224365234, 158.32412719726562, 413.3805236816406, 352.7574462890625, 49.34381866455078, 523.2418823242188, 184.38999938964844, -58.133392333984375, 383.1539001464844, -33.06886291503906, 144.31182861328125, 242.89010620117188, 214.8762969970703, 60.90447235107422, 322.5072021484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000298.npy"}
{"epoch": 0.43759177679882527, "step": 299, "batch_size": 64, "mean": 180.86856079101562, "std": 251.91526794433594, "min": -334.8707275390625, "p10": -88.22909927368163, "median": 167.5488052368164, "p90": 546.9444885253907, "max": 821.2966918945312, "pos_frac": 0.765625, "sample": [-138.25018310546875, -263.58953857421875, -334.8707275390625, 392.5198669433594, 87.90512084960938, 20.485790252685547, 367.5832824707031, 129.23529052734375, 12.798135757446289, 129.4180908203125, 623.271728515625, 530.4102783203125, 727.9640502929688, -92.01598358154297, 157.53456115722656, 712.6317138671875, -230.958251953125, 372.8513488769531, 185.0435791015625, 256.19720458984375, -53.125587463378906, 294.58331298828125, 186.59478759765625, 121.68228149414062, 412.3916320800781, 117.06658935546875, 821.2966918945312, -8.431316375732422, 215.20709228515625, -28.11307144165039, -56.23029327392578, 175.5613250732422, -79.39303588867188, 44.01191711425781, 203.973876953125, 554.0305786132812, 134.5455780029297, 207.35208129882812, -27.8741455078125, 165.60885620117188, 70.25508880615234, 365.8266296386719, 305.79833984375, -42.783512115478516, 185.97348022460938, -75.31812286376953, -226.47789001464844, 464.6354675292969, 27.16107177734375, 705.35546875, -131.14389038085938, 47.615516662597656, 264.4518737792969, 335.3058166503906, 285.8623046875, 280.23663330078125, 42.040252685546875, 648.8491821289062, 207.30523681640625, 169.48875427246094, 26.606842041015625, 350.4669494628906, 7.477668762207031, 213.69400024414062], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000299.npy"}
{"epoch": 0.4390602055800294, "step": 300, "batch_size": 64, "mean": 218.1741180419922, "std": 280.5107116699219, "min": -386.9246520996094, "p10": -200.97341918945307, "median": 218.2755126953125, "p90": 577.5835083007813, "max": 746.5830078125, "pos_frac": 0.796875, "sample": [318.5060729980469, 302.5931701660156, 165.09332275390625, 374.3843994140625, 679.9003295898438, -312.3551025390625, -221.30810546875, 629.4666137695312, 432.00213623046875, 28.93416976928711, -98.26006317138672, 294.7925720214844, 97.08744812011719, 552.6201171875, 244.73480224609375, 400.8079833984375, 112.29237365722656, 488.45068359375, 50.148948669433594, 74.27183532714844, -2.24615478515625, 250.27590942382812, 266.6414489746094, -8.910255432128906, 538.9898071289062, 385.26202392578125, -104.80987548828125, 60.31578063964844, 159.1789093017578, 352.95050048828125, 73.61912536621094, 733.4139404296875, 280.013671875, -386.9246520996094, 462.2227783203125, -260.0406494140625, 582.142578125, 355.7373352050781, 507.640869140625, 180.51800537109375, 99.2154312133789, 420.13525390625, 566.9456787109375, 397.86199951171875, 21.96664810180664, 377.9593505859375, -52.80739212036133, -231.46029663085938, 608.532470703125, 403.4760437011719, 480.35150146484375, -315.51220703125, 103.88936614990234, -233.02920532226562, 37.58432388305664, 179.23458862304688, 524.2571411132812, 75.36419677734375, 603.271240234375, 746.5830078125, -153.52581787109375, 33.9749641418457, 191.81622314453125, 36.90422058105469], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000300.npy"}
{"epoch": 0.44052863436123346, "step": 301, "batch_size": 64, "mean": 209.615966796875, "std": 257.2528381347656, "min": -211.21368408203125, "p10": -61.64197006225586, "median": 152.05249786376953, "p90": 555.3745849609376, "max": 895.8914794921875, "pos_frac": 0.828125, "sample": [215.06321716308594, 348.1879577636719, -57.629066467285156, 342.3150939941406, 387.1087646484375, 267.41058349609375, 805.8482666015625, 68.74870300292969, 489.20086669921875, -50.67524719238281, 846.940185546875, 362.8701477050781, 23.663917541503906, 274.121337890625, -150.6476593017578, 172.120361328125, 98.7065658569336, 647.1590576171875, 84.85069274902344, 372.90350341796875, 771.544189453125, 31.511798858642578, -48.387611389160156, 60.80064392089844, 452.4947204589844, 27.419830322265625, 44.910423278808594, 55.24458312988281, 257.8802795410156, 387.6098327636719, 315.8651123046875, 280.98126220703125, 61.275264739990234, 205.06802368164062, 84.79749298095703, 75.463134765625, 150.89820861816406, -126.74983215332031, 11.013154983520508, 49.0828857421875, 41.98834228515625, 29.184972763061523, 246.46905517578125, -102.4218978881836, 25.051620483398438, 895.8914794921875, 341.23779296875, 157.65542602539062, 576.7243041992188, 80.7908935546875, 541.389404296875, 153.206787109375, 360.9168701171875, 548.3841552734375, 412.746826171875, 72.059814453125, -128.629150390625, -179.3026580810547, 558.3704833984375, -211.21368408203125, 84.99671936035156, -63.361785888671875, -14.855892181396484, 291.1522521972656], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000301.npy"}
{"epoch": 0.4419970631424376, "step": 302, "batch_size": 64, "mean": 176.04083251953125, "std": 222.24951171875, "min": -235.5748291015625, "p10": -82.54252853393555, "median": 144.02488708496094, "p90": 469.8048156738282, "max": 840.3447875976562, "pos_frac": 0.78125, "sample": [45.02476501464844, 416.41802978515625, 572.9172973632812, -37.29775619506836, 213.42715454101562, -10.317960739135742, 268.7955322265625, 24.373504638671875, 153.6838836669922, 9.980072021484375, 265.2542419433594, 27.634353637695312, 79.18474578857422, 149.90585327148438, 152.77227783203125, 104.83165740966797, 362.71240234375, -6.288166046142578, 359.7763671875, 381.35809326171875, 187.8106231689453, 192.07440185546875, 205.46774291992188, 300.9209289550781, -81.11788177490234, -123.85738372802734, 398.4920654296875, 546.781005859375, 105.68535614013672, -196.5675811767578, 237.8082275390625, -32.015357971191406, 125.79864501953125, 6.12713623046875, -83.15309143066406, 552.1533203125, 840.3447875976562, 477.4647216796875, 98.22081756591797, -76.51898193359375, -63.0074462890625, 451.93170166015625, -235.5748291015625, 235.28665161132812, 413.8084716796875, 40.83159637451172, 133.9831085205078, 648.8610229492188, 193.6541290283203, 327.994384765625, 62.527915954589844, 255.6283721923828, -177.87149047851562, 266.13427734375, 59.59565353393555, 261.6580810546875, -83.72761535644531, 397.4276428222656, 72.95994567871094, 600.1514282226562, -86.09869384765625, 41.77064895629883, 138.1439208984375, 94.47804260253906], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000302.npy"}
{"epoch": 0.4434654919236417, "step": 303, "batch_size": 64, "mean": 163.615966796875, "std": 245.03900146484375, "min": -463.8420104980469, "p10": -129.25434570312498, "median": 146.66006469726562, "p90": 524.392791748047, "max": 671.2479248046875, "pos_frac": 0.703125, "sample": [299.19122314453125, -6.374973297119141, 69.2200927734375, 631.5037841796875, -123.37625122070312, 339.01214599609375, 508.79681396484375, 671.2479248046875, -24.583759307861328, -87.9447021484375, 366.42596435546875, 198.72305297851562, 269.85565185546875, -137.55123901367188, 85.19507598876953, 170.9649658203125, 90.70686340332031, -2.4971446990966797, -44.863609313964844, 409.02752685546875, -463.8420104980469, -49.39912033081055, -131.77352905273438, 331.07550048828125, 424.68511962890625, 558.9938354492188, -120.18729400634766, 315.616943359375, -251.59750366210938, 140.20375061035156, -48.97186279296875, 360.73638916015625, 644.8523559570312, 230.34414672851562, 461.24017333984375, 151.733154296875, -163.58767700195312, 148.8665771484375, 130.25689697265625, 621.0634765625, 508.0587463378906, 49.51678466796875, 167.50775146484375, 531.0767822265625, 146.96652221679688, 207.05075073242188, 243.27197265625, 325.6877136230469, 146.35360717773438, -15.735960006713867, 99.64920043945312, -31.73163604736328, 227.44264221191406, 29.445026397705078, 106.32705688476562, 591.6151123046875, -167.85321044921875, -41.55390930175781, 57.30388259887695, 55.21982955932617, 263.2017822265625, 14.696014404296875, -180.0667724609375, 164.9842529296875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000303.npy"}
{"epoch": 0.44493392070484583, "step": 304, "batch_size": 64, "mean": 183.16299438476562, "std": 167.4127960205078, "min": -267.88934326171875, "p10": -5.778076934814452, "median": 168.43756103515625, "p90": 379.3365661621094, "max": 633.4578247070312, "pos_frac": 0.875, "sample": [25.596420288085938, 256.479736328125, 297.33294677734375, 99.26592254638672, -8.344085693359375, -6.2196197509765625, 1.116973876953125, 36.01515197753906, 157.32855224609375, 132.96612548828125, 296.1273193359375, 364.0408935546875, 179.54656982421875, 314.38861083984375, 74.30921936035156, 289.52996826171875, 287.0013732910156, 351.6890869140625, -4.747810363769531, 55.098575592041016, 155.283203125, 363.868408203125, 43.87532043457031, 69.93368530273438, 280.5321350097656, 86.76530456542969, 316.9272766113281, 52.90870666503906, 369.76800537109375, 89.82931518554688, 117.15898132324219, 125.06192016601562, 633.4578247070312, 116.9477310180664, -32.38756561279297, -267.88934326171875, 241.71392822265625, -15.939056396484375, 52.97071838378906, 241.16259765625, -59.564971923828125, 455.2091064453125, 54.43412780761719, 25.869653701782227, 485.02459716796875, 106.3016357421875, 11.934783935546875, 72.39119720458984, 194.45013427734375, 356.76556396484375, 275.6728820800781, -116.03730773925781, 186.86448669433594, 451.9994812011719, 404.6878662109375, 94.79487609863281, 238.99786376953125, 292.25787353515625, 459.221435546875, 383.4373779296875, 340.7762451171875, 231.70181274414062, 299.63092041015625, 235.13851928710938], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000304.npy"}
{"epoch": 0.44640234948604995, "step": 305, "batch_size": 64, "mean": 161.9324951171875, "std": 248.97390747070312, "min": -317.6741027832031, "p10": -94.19625549316403, "median": 116.69367980957031, "p90": 494.31342468261727, "max": 1104.7862548828125, "pos_frac": 0.734375, "sample": [453.0384216308594, 280.74578857421875, 80.796142578125, 216.7921600341797, 134.05116271972656, 675.9068603515625, 267.5885009765625, 30.357831954956055, 278.1534423828125, 524.6155395507812, 130.01695251464844, 81.50419616699219, 72.7086410522461, -67.03265380859375, -66.87359619140625, -13.972023010253906, 323.2851867675781, 504.0248718261719, 56.429054260253906, 79.68657684326172, 1104.7862548828125, 358.35986328125, 21.717769622802734, 283.37060546875, 314.94708251953125, -55.62306213378906, -15.811243057250977, 233.9093475341797, 225.69253540039062, -105.83779907226562, -42.92698669433594, 301.46270751953125, -146.6622772216797, 308.73773193359375, 104.52203369140625, 144.8753204345703, 599.321533203125, 313.6175537109375, -3.133848190307617, 157.2244873046875, 128.86532592773438, -28.13506317138672, 80.68733215332031, 63.87340545654297, 592.5870971679688, 567.7970581054688, 236.8309783935547, -45.852142333984375, -248.16232299804688, 137.50515747070312, -302.14324951171875, 471.65338134765625, 420.4880065917969, 33.530601501464844, -106.55776977539062, 31.58624267578125, -156.3408203125, 41.884483337402344, 275.4264221191406, -317.6741027832031, 61.901771545410156, 278.1629638671875, -15.615127563476562, 17.007816314697266], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000305.npy"}
{"epoch": 0.447870778267254, "step": 306, "batch_size": 64, "mean": 160.8773193359375, "std": 205.6149139404297, "min": -372.6436767578125, "p10": -51.41714553833007, "median": 104.3771858215332, "p90": 415.5544036865234, "max": 942.2510986328125, "pos_frac": 0.8125, "sample": [-12.536785125732422, 3.649066925048828, 100.5487060546875, 6.805267333984375, 26.2774658203125, -54.67182922363281, 316.4610290527344, 14.800758361816406, 90.60357666015625, -14.407875061035156, 220.7097625732422, 365.1966247558594, 198.90390014648438, 415.80059814453125, 54.47119140625, 82.55977630615234, 11.674686431884766, 502.4263000488281, 94.29430389404297, 52.08949279785156, 103.22798919677734, -66.73711395263672, -55.030609130859375, 468.9487609863281, 270.9088134765625, 312.9103698730469, -27.42892837524414, -7.735898971557617, 461.83233642578125, 105.52638244628906, 88.69992065429688, 142.288330078125, 249.29226684570312, 400.691162109375, 46.407073974609375, 124.00279235839844, 86.87596130371094, -93.4836654663086, 32.05678939819336, 269.2105712890625, -372.6436767578125, 320.63690185546875, 115.6114273071289, -58.56501770019531, 60.17103576660156, 272.75042724609375, -43.82288360595703, 132.24996948242188, 584.569091796875, 504.39569091796875, 173.1051025390625, 314.79241943359375, 236.85585021972656, -69.7016830444336, 414.9799499511719, 0.09211349487304688, 60.54251480102539, 12.951164245605469, 246.38763427734375, 137.20950317382812, 942.2510986328125, 243.5894775390625, 304.9183349609375, 374.7019958496094], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000306.npy"}
{"epoch": 0.44933920704845814, "step": 307, "batch_size": 64, "mean": 167.7259063720703, "std": 219.5751190185547, "min": -408.8985595703125, "p10": -107.2556655883789, "median": 137.51649475097656, "p90": 473.20269470214856, "max": 798.9640502929688, "pos_frac": 0.796875, "sample": [379.5455017089844, 140.38040161132812, 147.419189453125, 216.90084838867188, 183.27609252929688, 157.1306610107422, -121.8060302734375, 84.55313873291016, -77.66032409667969, 570.644775390625, 148.75416564941406, -32.46440887451172, 93.48493957519531, -152.62869262695312, 441.80078125, -408.8985595703125, 538.656005859375, -161.8876190185547, 252.697509765625, -73.67127227783203, 224.99435424804688, 279.2309265136719, 52.513633728027344, 158.36277770996094, 503.28228759765625, 256.9964294433594, 46.63810348510742, 109.91300964355469, 101.6429443359375, 500.4214172363281, 90.91812133789062, 485.5859069824219, 71.52518463134766, 107.95249938964844, -110.79646301269531, 129.65536499023438, -98.99380493164062, 42.00144958496094, 433.4725341796875, 444.30853271484375, 0.5156021118164062, 387.4686279296875, 77.2583236694336, 288.58734130859375, 207.98814392089844, 49.63966369628906, 353.03411865234375, 244.56103515625, -13.580877304077148, 86.43119812011719, 247.97312927246094, 15.100780487060547, 389.15216064453125, 617.1514282226562, 267.6757507324219, 798.9640502929688, 134.652587890625, -116.22525024414062, 231.23361206054688, 131.26620483398438, -45.43800354003906, 302.4756164550781, 63.08747863769531, -140.3665771484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000307.npy"}
{"epoch": 0.45080763582966227, "step": 308, "batch_size": 64, "mean": 190.49862670898438, "std": 202.46066284179688, "min": -216.95272827148438, "p10": -53.99456367492674, "median": 163.2793731689453, "p90": 494.2600341796875, "max": 543.5474853515625, "pos_frac": 0.8125, "sample": [163.9718475341797, 470.3753662109375, -204.65383911132812, 268.0364685058594, 31.044349670410156, 518.3797607421875, -216.95272827148438, -62.849849700927734, 12.14654541015625, 82.13520050048828, 495.60845947265625, -9.288387298583984, 223.79891967773438, 91.68698120117188, 406.653564453125, 516.504150390625, -131.38548278808594, 157.76280212402344, 171.85723876953125, 2.8155288696289062, 279.88336181640625, 518.1157836914062, 303.349365234375, 491.11370849609375, -83.2203369140625, 295.84478759765625, 134.17282104492188, 111.2898178100586, 123.28028869628906, -33.33222961425781, 251.62371826171875, 125.30225372314453, 157.3670654296875, 242.82176208496094, 301.1658935546875, 483.0458679199219, 524.2236938476562, 217.933837890625, -162.4762725830078, 78.59870910644531, 108.64930725097656, 366.2967529296875, 346.2781982421875, 162.58689880371094, 134.32801818847656, 543.5474853515625, 238.01425170898438, 33.97267150878906, -12.952491760253906, 398.91326904296875, 512.2406005859375, 284.69378662109375, 158.70046997070312, -2.227872848510742, -29.848310470581055, 46.327423095703125, 350.15118408203125, 370.24420166015625, 262.3646240234375, 15.822883605957031, 434.51806640625, 239.63571166992188, -126.36387634277344, 8.268791198730469], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000308.npy"}
{"epoch": 0.4522760646108664, "step": 309, "batch_size": 64, "mean": 187.09609985351562, "std": 243.833251953125, "min": -742.0777587890625, "p10": -79.67725219726562, "median": 171.16937255859375, "p90": 490.5228790283204, "max": 767.5919189453125, "pos_frac": 0.78125, "sample": [176.08615112304688, 74.01480102539062, 112.9309310913086, -119.3033447265625, 354.075439453125, 182.61959838867188, 119.86620330810547, -16.731630325317383, 539.1195068359375, 62.833587646484375, 303.73370361328125, 465.2608642578125, 116.20101165771484, 501.3494567871094, 25.11872100830078, 27.233877182006836, -73.23927307128906, 314.506591796875, 511.1803894042969, 306.6509094238281, 266.585693359375, -742.0777587890625, 234.48342895507812, -131.0783233642578, 62.632904052734375, 166.25259399414062, 255.59124755859375, 328.8087158203125, 59.02961730957031, -19.159528732299805, -144.0819091796875, 625.5318603515625, 67.37017822265625, -0.2246856689453125, 117.73602294921875, 448.1685791015625, 767.5919189453125, 465.1622314453125, 134.12313842773438, -22.439865112304688, -62.956417083740234, 94.58291625976562, 58.92787551879883, 428.01519775390625, 325.36138916015625, 306.9438171386719, 527.1234130859375, 439.1600341796875, 317.29656982421875, -24.241825103759766, 456.0058288574219, -167.4926300048828, 244.07748413085938, 298.2018127441406, -118.73300170898438, 64.264892578125, 545.131103515625, 425.1036376953125, -82.43638610839844, 161.64169311523438, 178.4824981689453, 7.472972869873047, 371.63055419921875, 257.074462890625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000309.npy"}
{"epoch": 0.45374449339207046, "step": 310, "batch_size": 64, "mean": 181.6678924560547, "std": 200.31301879882812, "min": -316.7443542480469, "p10": -41.33667488098144, "median": 172.69345092773438, "p90": 434.7058471679688, "max": 656.2089233398438, "pos_frac": 0.828125, "sample": [-51.191646575927734, 169.9814453125, 420.0498962402344, -53.580650329589844, 135.0405731201172, 343.8931579589844, 71.14262390136719, 19.8741397857666, 50.64952087402344, 207.34158325195312, -242.46023559570312, 409.04705810546875, 440.9869689941406, -36.09791946411133, 75.95540618896484, 334.6109313964844, 200.81231689453125, 245.87379455566406, 97.45120239257812, 175.40545654296875, -27.595294952392578, 118.25044250488281, 325.1402893066406, 69.92041015625, 176.290283203125, 573.0479736328125, 339.24005126953125, 17.327091217041016, 54.67218017578125, 329.4060363769531, -316.7443542480469, 656.2089233398438, -30.501178741455078, 147.81138610839844, 585.5670776367188, 187.81405639648438, 255.26687622070312, -29.530927658081055, -43.58185577392578, 30.59282684326172, 69.04769897460938, 159.63314819335938, 343.4541015625, 241.15394592285156, 75.03797149658203, 275.98736572265625, 443.031982421875, -153.82818603515625, 370.142578125, 253.08731079101562, 361.3021240234375, -96.03534698486328, 341.01824951171875, 123.92259216308594, 67.62199401855469, 57.50220489501953, 80.0714111328125, 559.5489501953125, 545.06201171875, 221.6603240966797, 57.001041412353516, 370.5047912597656, 250.67185974121094, 176.7570343017578], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000310.npy"}
{"epoch": 0.4552129221732746, "step": 311, "batch_size": 64, "mean": 201.01556396484375, "std": 263.168701171875, "min": -284.3929443359375, "p10": -115.17536163330074, "median": 171.28459930419922, "p90": 583.4643066406251, "max": 893.4180908203125, "pos_frac": 0.71875, "sample": [540.7750244140625, 421.0244445800781, 403.6434326171875, -13.928459167480469, -152.29949951171875, 554.77392578125, -130.84225463867188, 285.82012939453125, 165.73556518554688, 592.80224609375, 282.77301025390625, 74.52359008789062, 360.49151611328125, 799.0147705078125, 320.62646484375, 144.16421508789062, 473.21295166015625, 155.85348510742188, -51.21088409423828, 634.1131591796875, 186.8430938720703, 213.53515625, 561.67578125, 211.69496154785156, 37.40642547607422, 177.1602325439453, -78.61927795410156, -187.16802978515625, 200.73910522460938, 144.95098876953125, -60.08624267578125, 311.0538024902344, -144.820068359375, 120.79841613769531, 247.25677490234375, -137.84747314453125, -7.748100280761719, 893.4180908203125, -34.86767578125, 281.1101989746094, 42.108802795410156, 61.05268096923828, 246.02072143554688, 63.48939514160156, 641.9810180664062, 418.5717468261719, 34.32545852661133, -66.65399169921875, -27.64815902709961, 97.19244384765625, -22.120647430419922, 517.633056640625, 132.25729370117188, 21.83594512939453, -284.3929443359375, 456.717041015625, 176.83363342285156, 236.76495361328125, -5.528053283691406, 248.23500061035156, 617.20458984375, -175.5083770751953, -44.251548767089844, 681.322265625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000311.npy"}
{"epoch": 0.4566813509544787, "step": 312, "batch_size": 64, "mean": 184.3203125, "std": 265.811767578125, "min": -355.11614990234375, "p10": -85.39189224243164, "median": 145.67584991455078, "p90": 502.8841064453125, "max": 1078.4044189453125, "pos_frac": 0.78125, "sample": [126.38934326171875, 1078.4044189453125, 504.70465087890625, 194.52200317382812, 112.02934265136719, 42.70008087158203, -88.25928497314453, 94.96026611328125, 55.24311065673828, 372.6246337890625, -256.4883728027344, 423.998779296875, 1000.6275634765625, 354.2987060546875, 246.484619140625, 269.2293395996094, -204.667236328125, 270.1116943359375, 396.770263671875, 153.66119384765625, 553.948974609375, 79.07279968261719, 148.3955078125, 287.78741455078125, 82.80380249023438, -1.0096569061279297, 20.411643981933594, 763.5545654296875, 341.0270080566406, 521.725830078125, 148.29608154296875, 431.5897521972656, 182.23995971679688, 91.02519226074219, -107.89425659179688, 95.92030334472656, 162.43360900878906, 498.63616943359375, -22.843551635742188, 49.38215255737305, 281.59124755859375, 146.8137664794922, -44.91876983642578, 270.04473876953125, 209.50228881835938, -8.780654907226562, -23.7298583984375, 682.7129516601562, -78.70130920410156, 48.47303009033203, 236.29489135742188, 87.23878479003906, -242.16424560546875, -53.46849060058594, -109.2273178100586, 133.1052703857422, 144.53793334960938, 5.0093536376953125, -355.11614990234375, 308.9701232910156, 254.16470336914062, 19.004241943359375, 27.671985626220703, 383.6239013671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000312.npy"}
{"epoch": 0.4581497797356828, "step": 313, "batch_size": 64, "mean": 187.22030639648438, "std": 259.85992431640625, "min": -453.4371337890625, "p10": -85.06626510620113, "median": 168.20648956298828, "p90": 483.9980407714844, "max": 878.816162109375, "pos_frac": 0.796875, "sample": [73.6107177734375, 308.54443359375, 110.68795776367188, 307.8287353515625, -149.7677001953125, -0.8470611572265625, 487.4493408203125, 150.0897674560547, 389.8918151855469, -38.00027084350586, 24.055679321289062, -160.55885314941406, 475.94500732421875, 170.69715881347656, 20.722009658813477, 193.6159210205078, 457.1012268066406, 202.0548553466797, 81.56148529052734, 199.3369140625, 69.27252960205078, 336.4611511230469, -443.7065124511719, -103.79011535644531, 218.9324951171875, 609.0106201171875, 275.2255859375, 92.76058959960938, 218.47262573242188, 878.816162109375, 332.655517578125, 94.7322998046875, -37.468292236328125, 200.4131317138672, -229.76962280273438, 131.9589385986328, -32.18019104003906, -453.4371337890625, 406.92572021484375, 249.5840301513672, -15.280998229980469, 244.52294921875, 56.50249099731445, 285.25634765625, 297.5444641113281, 121.30521392822266, -41.377281188964844, 314.78387451171875, 303.3179931640625, -169.67977905273438, 429.2978820800781, 87.72337341308594, 165.7158203125, 13.3297119140625, 164.94741821289062, 616.6907958984375, 657.5672607421875, 23.197158813476562, 16.269798278808594, 77.64805603027344, 270.4366149902344, 367.9920959472656, 737.6547241210938, 837.8452758789062], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000313.npy"}
{"epoch": 0.45961820851688695, "step": 314, "batch_size": 64, "mean": 178.0127410888672, "std": 283.011474609375, "min": -391.35064697265625, "p10": -148.34142608642577, "median": 126.36592483520508, "p90": 496.63109130859385, "max": 920.363037109375, "pos_frac": 0.703125, "sample": [792.53955078125, -271.1136779785156, 97.23685455322266, 16.035911560058594, 47.905181884765625, 260.3930969238281, 77.01544189453125, 331.8135070800781, 69.23727416992188, -139.26976013183594, 362.6013488769531, -2.9261016845703125, 272.72869873046875, 129.0029754638672, 181.53350830078125, -391.35064697265625, -158.00682067871094, 39.12388610839844, 741.5557250976562, 775.3960571289062, 32.4368896484375, -40.51885223388672, -141.61570739746094, 49.18748092651367, 165.7069091796875, -175.2029266357422, -26.838912963867188, -151.223876953125, -40.28508758544922, 435.06158447265625, 472.2886962890625, 40.14947509765625, 290.80181884765625, 505.47076416015625, 285.5655822753906, 179.0880889892578, -132.65597534179688, 123.72887420654297, 11.26568603515625, 784.969970703125, 453.0342102050781, 360.5583190917969, 920.363037109375, 476.00518798828125, -92.51104736328125, 373.671875, -125.54814910888672, 466.963623046875, -24.049819946289062, 218.10824584960938, 355.31005859375, 38.417510986328125, 334.4140625, 352.0834655761719, 461.0948181152344, 544.6958618164062, 80.72853088378906, 208.53973388671875, 231.24913024902344, -13.718879699707031, -194.05128479003906, -35.8482780456543, 335.91510009765625, -231.4428253173828], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000314.npy"}
{"epoch": 0.461086637298091, "step": 315, "batch_size": 64, "mean": 165.0262451171875, "std": 218.4488983154297, "min": -181.0963592529297, "p10": -79.50510101318359, "median": 123.55726623535156, "p90": 501.94612121582037, "max": 714.9159545898438, "pos_frac": 0.765625, "sample": [594.1883544921875, -36.60491180419922, 293.63262939453125, 7.627532958984375, -51.20750045776367, 59.82963562011719, 43.32733154296875, 132.4163360595703, 278.9697265625, -26.103591918945312, 323.75543212890625, 39.874053955078125, -88.79762268066406, 262.5876159667969, 80.17699432373047, 515.0632934570312, 318.11383056640625, 155.91128540039062, -152.67446899414062, 351.8489685058594, 240.0733642578125, 509.86566162109375, 559.755859375, 48.1820068359375, -118.08544921875, 171.25726318359375, 271.39825439453125, 179.04969787597656, 404.1303405761719, 156.3579559326172, 471.9643859863281, 114.69819641113281, 110.73431396484375, 104.81002807617188, 210.0662078857422, 348.4163818359375, -143.38992309570312, -17.80413818359375, 247.6194305419922, 23.509719848632812, 23.305072784423828, -67.78164672851562, 714.9159545898438, 51.12493133544922, 226.12374877929688, -7.141868591308594, -151.79345703125, 10.49412727355957, -75.38667297363281, 644.1885986328125, 82.2017822265625, 209.0887908935547, 639.5405883789062, 156.56048583984375, 154.6147918701172, -81.2701416015625, 289.7962951660156, 2.9304542541503906, 399.8916931152344, 483.4671936035156, -181.0963592529297, -53.718597412109375, 68.93794250488281, 28.14183807373047], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000315.npy"}
{"epoch": 0.46255506607929514, "step": 316, "batch_size": 64, "mean": 230.76441955566406, "std": 252.83624267578125, "min": -246.15072631835938, "p10": -9.53220252990721, "median": 161.16259765625, "p90": 591.0006286621094, "max": 935.2681884765625, "pos_frac": 0.890625, "sample": [337.24664306640625, 129.2425994873047, 492.9949951171875, 73.25099182128906, 39.319664001464844, 935.2681884765625, 154.06484985351562, 691.6900634765625, 479.814208984375, 650.8389282226562, 363.98712158203125, 338.38818359375, 33.986724853515625, 16.234317779541016, 44.261070251464844, 246.5937042236328, 65.47489929199219, 107.25225830078125, 29.00606918334961, 412.50177001953125, 194.79458618164062, 37.359317779541016, 598.7069702148438, 395.375, 269.61651611328125, 117.9615707397461, 44.95560073852539, 710.4268188476562, 140.58216857910156, 506.525146484375, 35.981239318847656, 166.095458984375, 50.695098876953125, 153.90809631347656, 238.175537109375, 325.94879150390625, 387.1728515625, 55.280555725097656, 325.134033203125, 19.03343963623047, 573.0191650390625, 338.3966064453125, 39.265777587890625, 404.2357177734375, 201.29359436035156, 156.229736328125, -236.12657165527344, 7.274913787841797, 918.8225708007812, 150.24314880371094, 804.0872802734375, -246.15072631835938, 182.04568481445312, 186.75674438476562, 367.04010009765625, -16.735252380371094, -56.822288513183594, -30.710586547851562, -24.763099670410156, 120.70748138427734, 229.40365600585938, -117.51461029052734, 110.8040771484375, 292.9736328125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000316.npy"}
{"epoch": 0.46402349486049926, "step": 317, "batch_size": 64, "mean": 254.12632751464844, "std": 247.21603393554688, "min": -190.11392211914062, "p10": -51.63412322998046, "median": 228.22374725341797, "p90": 595.4982177734375, "max": 780.0244750976562, "pos_frac": 0.8125, "sample": [358.47515869140625, 42.90808868408203, 116.8410415649414, 488.4633483886719, 469.5303955078125, 76.91018676757812, 439.58148193359375, 399.38018798828125, -190.11392211914062, 603.5867919921875, 734.3701171875, 127.92860412597656, 236.9324951171875, 3.04022216796875, 129.61456298828125, 400.53924560546875, 194.0076141357422, -21.513877868652344, 122.24906158447266, 384.57763671875, -43.56732177734375, 357.47723388671875, 393.3342590332031, 541.701171875, 31.012218475341797, 576.6248779296875, -19.006771087646484, 111.62581634521484, 736.0224609375, 414.9510498046875, 663.1259155273438, 481.80908203125, 185.0662384033203, 557.6151123046875, 419.2498779296875, -65.46849060058594, 255.70143127441406, 135.3151092529297, 262.0767517089844, 91.53396606445312, 218.05682373046875, 564.980224609375, -31.934099197387695, -55.09132385253906, 780.0244750976562, 219.51499938964844, 652.6976318359375, 305.86676025390625, 294.05694580078125, -83.56920623779297, 133.47731018066406, 86.68769073486328, 268.8414306640625, -21.36968231201172, -97.67052459716797, 132.74465942382812, 103.40283203125, -171.71014404296875, 656.368896484375, 409.516845703125, 81.9041519165039, 338.36279296875, -120.14613342285156, 395.5641784667969], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000317.npy"}
{"epoch": 0.4654919236417034, "step": 318, "batch_size": 64, "mean": 189.4044189453125, "std": 240.00523376464844, "min": -325.74151611328125, "p10": -70.33793869018554, "median": 145.14964294433594, "p90": 511.05648193359394, "max": 856.4867553710938, "pos_frac": 0.796875, "sample": [73.27252960205078, -45.75712585449219, 848.0322265625, 92.9986572265625, 293.0470886230469, 111.24546813964844, -65.7247543334961, 187.43826293945312, 125.21382904052734, 678.0941772460938, 17.722421646118164, 104.6466064453125, 341.99334716796875, 531.3407592773438, 634.2447509765625, 249.44374084472656, 6.3710174560546875, 128.4803009033203, -108.57946014404297, -73.45441436767578, 605.0985107421875, 66.1002426147461, -138.0654754638672, 463.72650146484375, 856.4867553710938, 323.0740966796875, -43.43635940551758, -120.57060241699219, 235.7738494873047, 346.2010498046875, 83.74844360351562, 339.7831115722656, 62.61748504638672, 417.1340637207031, 220.2178497314453, 144.4071044921875, 545.196044921875, 10.447193145751953, -32.295013427734375, 448.2135925292969, 427.81231689453125, 186.88116455078125, -8.712810516357422, -64.7098159790039, 342.55059814453125, 224.49734497070312, 97.75167083740234, 164.81246948242188, 135.04234313964844, -72.31501770019531, -325.74151611328125, 54.797340393066406, 220.19757080078125, 203.27630615234375, 371.5765075683594, 385.67840576171875, 222.79803466796875, 134.47344970703125, 57.68241882324219, 290.1983947753906, 145.89218139648438, 192.236572265625, -266.4745788574219, 37.754661560058594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000318.npy"}
{"epoch": 0.4669603524229075, "step": 319, "batch_size": 64, "mean": 225.37155151367188, "std": 256.25732421875, "min": -342.0672912597656, "p10": -115.12229003906249, "median": 226.59461975097656, "p90": 542.0910705566407, "max": 989.6464233398438, "pos_frac": 0.8125, "sample": [309.1063232421875, 143.5807647705078, 342.3615417480469, 241.5162353515625, 76.10511779785156, 263.11468505859375, 379.5715637207031, 521.6425170898438, 240.60302734375, 74.60765838623047, -118.07749938964844, -228.34378051757812, 305.3910827636719, 121.92063903808594, 331.3998107910156, 121.59934997558594, 43.664466857910156, 273.4525146484375, 258.9761657714844, 211.6890869140625, 188.07118225097656, 472.5435791015625, 212.58621215820312, 253.2230224609375, 305.84820556640625, 29.798202514648438, 383.1167907714844, 818.9716796875, 418.6604919433594, 348.751220703125, -105.16546630859375, 184.2887420654297, 174.79188537597656, 495.80645751953125, -249.2608642578125, 432.11981201171875, -342.0672912597656, -117.27053833007812, -110.10971069335938, 416.4439697265625, 584.8448486328125, 463.9016418457031, 584.6774291992188, -1.8003215789794922, 171.90208435058594, 24.895797729492188, 579.9141845703125, 633.14892578125, 273.4502868652344, 550.854736328125, 989.6464233398438, -142.92115783691406, 156.21815490722656, 451.79571533203125, -72.32664489746094, 37.462486267089844, -225.81234741210938, -50.22240447998047, 388.3286437988281, 159.27597045898438, 142.44268798828125, 184.99468994140625, 265.1014099121094, 148.9768524169922], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000319.npy"}
{"epoch": 0.4684287812041116, "step": 320, "batch_size": 64, "mean": 247.78436279296875, "std": 263.08148193359375, "min": -271.97601318359375, "p10": -54.27815246582031, "median": 223.66954040527344, "p90": 586.0202270507814, "max": 977.1348876953125, "pos_frac": 0.828125, "sample": [188.94320678710938, 200.6049041748047, 917.5799560546875, 122.12010192871094, 295.228759765625, -53.421470642089844, 212.57916259765625, -271.97601318359375, 838.5145874023438, 240.94224548339844, 407.05169677734375, 328.2298889160156, -53.278953552246094, 361.28875732421875, 221.70565795898438, 529.3740844726562, 627.2139282226562, 405.8487243652344, 357.1011047363281, -232.39035034179688, 313.3914794921875, 389.9302062988281, -42.190147399902344, 225.6334228515625, 59.73462677001953, 33.53014373779297, 333.31268310546875, 304.3670654296875, -31.629745483398438, 602.4554443359375, 11.257034301757812, 266.8454284667969, -54.645301818847656, 117.28551483154297, 487.7696533203125, 150.120849609375, 150.53579711914062, 209.83827209472656, 977.1348876953125, 412.8119812011719, 404.340087890625, 83.78540802001953, 146.1802978515625, -78.22894287109375, 652.33544921875, 59.064918518066406, 375.38018798828125, 195.2179718017578, 3.5773487091064453, 35.0885009765625, 358.3609924316406, 361.52166748046875, 268.74835205078125, 470.7130126953125, 547.67138671875, 657.8134765625, 545.5739135742188, 8.674406051635742, -113.86480712890625, -58.554710388183594, 47.89996337890625, 220.85328674316406, 276.9688720703125, -171.67059326171875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000320.npy"}
{"epoch": 0.4698972099853157, "step": 321, "batch_size": 64, "mean": 210.17095947265625, "std": 268.70733642578125, "min": -309.507568359375, "p10": -104.86915740966795, "median": 184.76492309570312, "p90": 588.4237609863283, "max": 947.637939453125, "pos_frac": 0.765625, "sample": [695.8711547851562, 114.40489959716797, 279.01324462890625, 236.30374145507812, -10.242425918579102, 338.4862060546875, 366.5850830078125, 289.3932800292969, 171.33770751953125, 506.27972412109375, 417.2334899902344, 368.55804443359375, 121.21084594726562, 43.11716842651367, -245.50975036621094, 274.9249267578125, 458.42742919921875, 275.6652526855469, 248.56246948242188, 606.251953125, 809.224365234375, 140.78585815429688, -309.507568359375, 280.3171081542969, 207.5604705810547, 316.96331787109375, 789.5908203125, -242.00643920898438, -114.24046325683594, 109.51377868652344, 90.29127502441406, 546.8246459960938, -83.00277709960938, 312.03778076171875, 159.50408935546875, 417.3621826171875, 261.38275146484375, -274.05352783203125, 947.637939453125, 121.13541412353516, -25.96148681640625, 72.35843658447266, 15.592370986938477, 112.53701782226562, -46.726219177246094, 130.69906616210938, -119.31585693359375, -35.61433792114258, 144.40316772460938, 237.3942413330078, 674.1868286132812, 679.197998046875, -28.987407684326172, -259.6368408203125, 198.192138671875, 162.52676391601562, 163.78659057617188, 159.72743225097656, -4.626981735229492, 476.67755126953125, 242.70672607421875, 296.0171813964844, 211.663818359375, -49.052337646484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000321.npy"}
{"epoch": 0.4713656387665198, "step": 322, "batch_size": 64, "mean": 224.41220092773438, "std": 241.4074249267578, "min": -301.3122863769531, "p10": -43.93543319702148, "median": 240.70982360839844, "p90": 506.3457702636719, "max": 779.60595703125, "pos_frac": 0.71875, "sample": [-197.57858276367188, 779.60595703125, 311.36627197265625, 774.4056396484375, 106.72649383544922, -34.16800308227539, 489.37945556640625, 385.8356018066406, 280.4915771484375, -60.0983772277832, -45.45466613769531, -6.875724792480469, 338.48370361328125, 285.1893005371094, 328.6060791015625, 361.71966552734375, -40.39055633544922, 242.06817626953125, -20.63671875, -106.45181274414062, 664.451416015625, 548.7952880859375, 199.4422607421875, 618.3157348632812, -34.42987060546875, 216.55838012695312, 378.96307373046875, 508.3848876953125, -4.375633239746094, 482.91693115234375, -22.487714767456055, 146.739501953125, -14.340129852294922, 393.54254150390625, -38.7237548828125, 6.574825286865234, 295.26007080078125, 193.7284698486328, -14.063213348388672, 309.59185791015625, 501.58782958984375, 433.7711181640625, 439.08612060546875, -89.89354705810547, 422.2802734375, 97.35517883300781, 299.2259521484375, -58.16901779174805, 239.35147094726562, 7.5931243896484375, 154.9278564453125, 656.7893676757812, 195.42697143554688, 306.6001892089844, 387.0583190917969, 367.781982421875, 112.06051635742188, 60.32410430908203, 70.87943267822266, -301.3122863769531, -31.564239501953125, 411.17425537109375, 406.9521484375, 266.02490234375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000322.npy"}
{"epoch": 0.47283406754772395, "step": 323, "batch_size": 64, "mean": 190.05551147460938, "std": 256.4014587402344, "min": -463.4825439453125, "p10": -98.14968566894531, "median": 182.568115234375, "p90": 560.3814208984376, "max": 802.5084838867188, "pos_frac": 0.796875, "sample": [376.6187438964844, 261.5145263671875, 587.2120361328125, -463.4825439453125, 632.5305786132812, -325.24609375, 592.7816162109375, 802.5084838867188, 21.213092803955078, 149.50872802734375, 296.9424133300781, 49.01953125, 195.84042358398438, 109.43561553955078, 15.290023803710938, 330.3099365234375, 97.36467742919922, 314.77899169921875, -99.55872344970703, 667.6260375976562, 4.297843933105469, 93.57514953613281, 498.04901123046875, -123.30330657958984, -120.00883483886719, 350.7789001464844, 115.16413116455078, 135.14137268066406, 419.0562744140625, 3.183349609375, 568.9402465820312, 278.707763671875, 471.4842529296875, -52.471893310546875, 293.98052978515625, 22.42010498046875, -18.997817993164062, 169.29580688476562, 283.738037109375, 206.59060668945312, 298.2942810058594, 94.469482421875, 212.7684326171875, 526.3417358398438, -343.11077880859375, 144.4739532470703, 299.3006896972656, 624.7321166992188, 403.3933410644531, -57.46782684326172, 219.82684326171875, 540.4108276367188, -94.86193084716797, 301.2798156738281, -245.22938537597656, 217.13604736328125, 116.96002197265625, 240.86270141601562, 75.11486053466797, 286.0950927734375, -15.869209289550781, -31.484943389892578, 126.19944763183594, 12.08740234375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000323.npy"}
{"epoch": 0.47430249632892807, "step": 324, "batch_size": 64, "mean": 186.71697998046875, "std": 296.74395751953125, "min": -385.01300048828125, "p10": -176.87523803710937, "median": 172.23516845703125, "p90": 494.2251434326172, "max": 993.8056640625, "pos_frac": 0.734375, "sample": [-183.50772094726562, 695.5567016601562, -91.33999633789062, 221.56997680664062, 50.12433624267578, 207.39324951171875, 174.60540771484375, 309.3436279296875, 96.37101745605469, 206.47650146484375, 861.4505615234375, -161.39944458007812, 117.74264526367188, 108.07931518554688, -35.09746170043945, 775.0281982421875, -229.2148895263672, -385.01300048828125, 358.6191101074219, 236.67739868164062, 25.184585571289062, 200.28793334960938, -209.7545166015625, 183.47084045410156, 349.9635009765625, 912.5045166015625, 478.49908447265625, -146.6284942626953, 107.36660766601562, 492.3846740722656, 452.8111877441406, 138.4939422607422, 87.6523666381836, 55.88373565673828, 403.6348571777344, 325.5180358886719, 148.62574768066406, 309.9653015136719, 214.584716796875, -17.54473304748535, 449.08782958984375, -374.37060546875, -135.935302734375, 393.63153076171875, 495.013916015625, 406.7793884277344, 170.81617736816406, -346.8959655761719, -195.03460693359375, 92.39630889892578, 274.56695556640625, 102.68627166748047, 304.8858642578125, -56.504695892333984, 63.99363708496094, 73.84417724609375, 373.4915771484375, -85.35298156738281, -34.432254791259766, 993.8056640625, 680.3093872070312, 173.65415954589844, -29.944595336914062, 313.025390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000324.npy"}
{"epoch": 0.47577092511013214, "step": 325, "batch_size": 64, "mean": 158.47683715820312, "std": 240.71376037597656, "min": -400.5531311035156, "p10": -84.86911849975586, "median": 128.37326431274414, "p90": 451.67523803710947, "max": 948.4625244140625, "pos_frac": 0.796875, "sample": [-381.55279541015625, -400.5531311035156, 198.26889038085938, -74.99898529052734, 328.6631164550781, 293.0680847167969, 126.65149688720703, -79.94194030761719, 168.60888671875, 272.28900146484375, 340.7325439453125, 516.89990234375, 306.54400634765625, 165.7657470703125, 108.40943908691406, 118.8873062133789, -2.5470809936523438, 144.2164764404297, 48.81037902832031, 260.8980712890625, -210.56747436523438, 252.6007080078125, 284.09271240234375, 612.124755859375, 23.64957046508789, 41.934837341308594, 126.65025329589844, 98.56807708740234, 115.82319641113281, 34.49810028076172, 404.70697021484375, -93.38410949707031, 948.4625244140625, 3.4031410217285156, 547.8294677734375, 72.20470428466797, 562.7905883789062, -85.58552551269531, 272.2326965332031, 202.95004272460938, -83.19750213623047, 144.95477294921875, 427.6170959472656, -48.130279541015625, 254.6502685546875, 310.79681396484375, 158.40362548828125, 299.6831359863281, 109.37191772460938, 130.09503173828125, 344.8639831542969, -311.7965087890625, 23.601398468017578, 714.1051635742188, 27.784088134765625, -13.607505798339844, 461.9858703613281, 2.477182388305664, -137.5849609375, 244.46231079101562, 97.49806213378906, 30.380237579345703, 58.61186981201172, 221.38638305664062], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000325.npy"}
{"epoch": 0.47723935389133626, "step": 326, "batch_size": 64, "mean": 159.19859313964844, "std": 210.4442596435547, "min": -478.55523681640625, "p10": -10.408135986328125, "median": 144.4689483642578, "p90": 421.53975524902353, "max": 666.7449951171875, "pos_frac": 0.84375, "sample": [16.9967041015625, 7.094669342041016, 159.4403839111328, 396.1333923339844, 243.6110076904297, -9.3841552734375, 182.49118041992188, 115.36285400390625, 360.0571594238281, -275.27679443359375, -181.13565063476562, -478.55523681640625, 574.4032592773438, 71.38805389404297, -115.1595687866211, 155.53131103515625, 199.28936767578125, 97.72785949707031, 320.05950927734375, 253.82681274414062, 29.588043212890625, 87.16055297851562, 208.87991333007812, 159.052001953125, -27.36572265625, 93.23039245605469, 666.7449951171875, 190.8384246826172, 76.43849182128906, 122.44825744628906, 430.79034423828125, 201.41366577148438, 241.6552276611328, 120.29940795898438, 228.2080078125, 19.2633056640625, 39.677886962890625, 388.3072509765625, -8.19113540649414, 86.86996459960938, 130.58460998535156, -10.84698486328125, 60.70210647583008, 166.52786254882812, 24.236114501953125, 91.81159973144531, 270.43878173828125, 399.9550476074219, 264.9744873046875, 218.1226043701172, 17.359725952148438, 490.2012634277344, 654.0983276367188, 24.527122497558594, 283.162109375, 173.60397338867188, 513.6324462890625, 290.0703430175781, -2.537759780883789, -334.7647705078125, 442.09234619140625, 384.28277587890625, 53.85774612426758, 133.40658569335938], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000326.npy"}
{"epoch": 0.4787077826725404, "step": 327, "batch_size": 64, "mean": 243.88858032226562, "std": 259.6573181152344, "min": -318.830810546875, "p10": -92.12123718261716, "median": 263.99609375, "p90": 585.773828125, "max": 763.345947265625, "pos_frac": 0.796875, "sample": [209.5008544921875, 182.1191864013672, 547.0997924804688, 182.80145263671875, 155.37513732910156, 416.4440612792969, 113.75089263916016, 342.8739318847656, 349.16912841796875, 187.3950958251953, 401.90655517578125, 350.8599853515625, 118.52656555175781, 486.5570068359375, -152.39846801757812, 552.172607421875, -37.02456283569336, 570.618896484375, 395.2622375488281, 369.3759460449219, -237.53240966796875, 648.607177734375, 606.9484252929688, 335.00604248046875, 88.1898193359375, 628.892333984375, 303.42742919921875, -178.2598419189453, 307.82440185546875, 374.07342529296875, 434.23919677734375, -191.088623046875, -24.717529296875, 149.7750701904297, 301.72357177734375, 128.1221160888672, 285.57464599609375, 286.01654052734375, 242.41754150390625, 145.2872314453125, 0.1966094970703125, 90.14718627929688, -77.06655883789062, 443.4531555175781, 228.37152099609375, -98.5732421875, -33.8413200378418, 391.93603515625, 478.0008544921875, 124.41354370117188, 719.5747680664062, 87.72630310058594, 763.345947265625, -318.830810546875, 521.0999755859375, -46.390777587890625, 111.89903259277344, 592.268798828125, -16.374343872070312, 424.74359130859375, 321.7460021972656, 720.6074829101562, 107.26776885986328, -303.7654724121094], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000327.npy"}
{"epoch": 0.4801762114537445, "step": 328, "batch_size": 64, "mean": 217.22732543945312, "std": 274.74945068359375, "min": -458.5177917480469, "p10": -89.49351806640622, "median": 195.36598205566406, "p90": 557.2424255371095, "max": 943.6165771484375, "pos_frac": 0.75, "sample": [195.00460815429688, -159.74484252929688, 223.04055786132812, 183.90097045898438, 0.63629150390625, 472.76470947265625, 329.47198486328125, 157.52281188964844, 40.88563537597656, -458.5177917480469, 99.3929214477539, 481.5372009277344, 522.6821899414062, 416.5187683105469, 349.81439208984375, 497.3843994140625, 36.758033752441406, -129.3555145263672, -35.395538330078125, 200.713134765625, -62.97469711303711, -31.430419921875, 363.58856201171875, 943.6165771484375, 247.74172973632812, 392.2961730957031, 402.8829040527344, 205.4847412109375, 657.6354370117188, 387.9972839355469, -145.65530395507812, 221.94155883789062, 304.5953674316406, -115.67753601074219, 69.48141479492188, 97.79446411132812, 327.9098205566406, 402.19830322265625, 124.95989990234375, 188.7686004638672, -8.455705642700195, 165.56466674804688, -99.45388793945312, 195.72735595703125, -8.391914367675781, 477.59320068359375, 140.11854553222656, 267.5711975097656, 237.76077270507812, 214.95394897460938, 748.2745361328125, 572.053955078125, 49.76701354980469, -15.029510498046875, -12.046733856201172, 813.1329956054688, 926.0657958984375, 67.27191162109375, 665.028564453125, -66.25265502929688, -32.44994354248047, 295.4115295410156, 60.60588073730469, -160.44216918945312], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000328.npy"}
{"epoch": 0.48164464023494863, "step": 329, "batch_size": 64, "mean": 224.6842041015625, "std": 299.6548156738281, "min": -797.932861328125, "p10": -77.51896934509276, "median": 207.2824478149414, "p90": 607.9489318847657, "max": 984.7885131835938, "pos_frac": 0.796875, "sample": [188.65902709960938, 407.68145751953125, 580.3450317382812, 334.22808837890625, 369.0035095214844, -83.68013763427734, 514.045166015625, 47.757118225097656, 499.412841796875, 223.50360107421875, -20.62567138671875, 435.1015319824219, 64.37772369384766, -52.03846740722656, 84.28279113769531, 150.42037963867188, 244.3214569091797, 187.34219360351562, 59.39698791503906, 858.3768310546875, 139.78726196289062, 97.80543518066406, 237.1997528076172, 161.00860595703125, -797.932861328125, 301.0975646972656, 46.711448669433594, 984.7885131835938, 616.880615234375, -119.78733825683594, 231.42169189453125, -63.14291000366211, 421.5909118652344, 74.71353912353516, 229.71978759765625, 425.3322448730469, 633.3616943359375, 436.76055908203125, 766.3756713867188, 191.47421264648438, 33.67197036743164, -57.10280227661133, -196.92161560058594, 632.3380126953125, 519.32958984375, -181.11199951171875, 286.4282531738281, -103.96656799316406, 320.35479736328125, 207.8771209716797, 223.282958984375, -41.35749816894531, 317.081787109375, 206.68777465820312, 79.37540435791016, 48.95832061767578, 247.91184997558594, 478.94866943359375, 822.4769897460938, -25.96014404296875, -345.4317321777344, 202.52517700195312, 587.1083374023438, 10.206520080566406], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000329.npy"}
{"epoch": 0.4831130690161527, "step": 330, "batch_size": 64, "mean": 237.08126831054688, "std": 272.60009765625, "min": -276.66766357421875, "p10": -80.39442138671873, "median": 234.00270080566406, "p90": 532.4990234375001, "max": 934.93359375, "pos_frac": 0.796875, "sample": [465.958251953125, -32.52410888671875, -142.15234375, -84.86038208007812, 369.4833984375, 541.960693359375, 159.4926300048828, 186.66116333007812, -46.03484344482422, 86.71846771240234, -159.3830108642578, 134.89735412597656, 149.8839111328125, 467.878662109375, 540.3828125, 45.572303771972656, 354.3358459472656, 276.9473571777344, 273.5318603515625, 445.9339904785156, -44.662078857421875, 120.0584716796875, 327.0804748535156, 68.69514465332031, 151.498291015625, 325.3802490234375, -276.66766357421875, 364.70318603515625, 264.7440490722656, 180.41485595703125, 496.2198181152344, 11.141559600830078, -275.38934326171875, 279.8743591308594, 472.3013000488281, 212.97866821289062, 58.81468200683594, 890.68798828125, 15.525428771972656, 150.32501220703125, -18.123573303222656, 462.75262451171875, 141.12747192382812, 934.93359375, -240.5026397705078, 845.054443359375, 719.7601928710938, 358.1100769042969, 411.2566223144531, 85.28472900390625, 350.3783874511719, 459.44830322265625, 156.66522216796875, 737.619873046875, -32.501319885253906, 307.1084899902344, 255.0267333984375, 514.103515625, -197.27651977539062, 447.5433349609375, 58.45770263671875, 332.54638671875, 325.9920654296875, -69.97384643554688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000330.npy"}
{"epoch": 0.4845814977973568, "step": 331, "batch_size": 64, "mean": 229.1529541015625, "std": 199.49591064453125, "min": -169.34249877929688, "p10": 0.14031906127930194, "median": 195.92516326904297, "p90": 445.9533874511721, "max": 895.1319580078125, "pos_frac": 0.890625, "sample": [154.54954528808594, 750.1654052734375, 307.9920959472656, -39.81640625, 258.69281005859375, 391.0595703125, 92.08126831054688, 55.11650085449219, 197.0980987548828, 115.26416015625, 704.0517578125, 295.3701477050781, 895.1319580078125, 5.120368957519531, 185.80616760253906, 372.97271728515625, 639.3143310546875, 135.23426818847656, 174.43338012695312, 204.80426025390625, 468.2608337402344, 393.9026794433594, 469.27203369140625, -37.30247116088867, 91.21566772460938, 174.7670135498047, 169.9978790283203, 341.2483825683594, 256.70892333984375, -22.5333251953125, 251.1687469482422, -83.42164611816406, 301.07440185546875, 194.75222778320312, 71.47246551513672, 171.38973999023438, -169.34249877929688, 179.88693237304688, 321.02587890625, 615.2085571289062, 273.9729309082031, 199.53575134277344, 132.2336883544922, 364.3839111328125, 101.77478790283203, 92.4683837890625, 242.58392333984375, 191.17202758789062, 285.6037902832031, 58.10130310058594, -129.02935791015625, 258.8207092285156, 135.9339599609375, 84.83061218261719, 334.8420104980469, 338.2420654296875, 295.1339111328125, 78.55805206298828, 149.0259246826172, 340.99822998046875, 140.7370147705078, 290.08197021484375, 354.583740234375, -1.993988037109375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000331.npy"}
{"epoch": 0.48604992657856094, "step": 332, "batch_size": 64, "mean": 162.39129638671875, "std": 235.04383850097656, "min": -260.8882141113281, "p10": -108.06213531494139, "median": 110.80982208251953, "p90": 453.91249694824216, "max": 912.18359375, "pos_frac": 0.78125, "sample": [788.1957397460938, 137.24032592773438, 155.1687774658203, 299.6371154785156, 28.91510009765625, 4.9226531982421875, 60.61637878417969, 444.47296142578125, 378.85943603515625, 153.3545684814453, 29.944198608398438, 62.66569519042969, -124.6053466796875, 912.18359375, 199.46983337402344, 99.31609344482422, 109.22689819335938, 135.9423065185547, 100.16187286376953, 1.5707683563232422, 108.61687469482422, 89.47312927246094, 454.1042175292969, -170.38206481933594, 343.26580810546875, 44.0534553527832, -190.53231811523438, 104.27739715576172, 592.80078125, 213.92129516601562, 139.40780639648438, 230.33322143554688, 294.03387451171875, -70.7662124633789, -59.25928497314453, 68.8134536743164, 517.9603271484375, 478.3416442871094, -138.05856323242188, 251.2078399658203, 163.57379150390625, -260.8882141113281, 357.5225830078125, 91.89613342285156, 362.5434265136719, 686.6127319335938, 112.39274597167969, 453.46514892578125, -57.69451904296875, 98.94320678710938, -223.68014526367188, 222.09963989257812, 350.1213073730469, -119.06143188476562, 361.7621765136719, 33.763126373291016, -82.09931945800781, -7.220333099365234, -82.39710998535156, 173.20736694335938, 282.7066650390625, 129.034423828125, 108.95994567871094, -41.391845703125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000332.npy"}
{"epoch": 0.48751835535976507, "step": 333, "batch_size": 64, "mean": 218.6634521484375, "std": 300.07330322265625, "min": -303.8968505859375, "p10": -181.92406311035157, "median": 234.7210235595703, "p90": 574.1784667968751, "max": 972.695556640625, "pos_frac": 0.71875, "sample": [-82.55101013183594, 302.8311462402344, 972.695556640625, -12.618118286132812, 111.8899917602539, 62.95921325683594, 288.41680908203125, 248.6723175048828, 18.93902587890625, 43.78872299194336, 784.5390014648438, -200.0472412109375, 181.89306640625, -251.75973510742188, 549.5492553710938, -182.57492065429688, 678.9081420898438, -109.27218627929688, 646.2049560546875, 389.5813293457031, -25.601139068603516, 328.5461730957031, 11.023147583007812, 379.5392150878906, 445.094482421875, 540.4258422851562, -180.4053955078125, 112.52398681640625, 71.92474365234375, 420.953125, 234.1149444580078, -10.735918045043945, -79.66934204101562, 549.1143188476562, 537.9632568359375, 413.0299072265625, -272.6985168457031, 10.435317993164062, 559.0663452148438, 451.4665832519531, 466.78240966796875, -262.3577880859375, -14.426521301269531, 522.50927734375, 480.97149658203125, -45.672523498535156, 235.3271026611328, 162.1234893798828, 738.1602783203125, 276.8358154296875, 475.26373291015625, 313.3331604003906, 580.6550903320312, 146.21795654296875, 245.55075073242188, -155.24427795410156, 428.2320556640625, -27.71395492553711, 299.18817138671875, -303.8968505859375, 611.5056762695312, -286.9678649902344, 72.52018737792969, 97.40705871582031], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000333.npy"}
{"epoch": 0.4889867841409692, "step": 334, "batch_size": 64, "mean": 193.68478393554688, "std": 302.49334716796875, "min": -630.1360473632812, "p10": -118.93902816772457, "median": 145.79762268066406, "p90": 671.0344604492188, "max": 796.8699951171875, "pos_frac": 0.796875, "sample": [637.7657470703125, 60.601226806640625, -193.05593872070312, 504.74188232421875, 264.6419372558594, 42.39965057373047, -88.16161346435547, 67.94377899169922, 240.45391845703125, 31.21136474609375, 258.9542236328125, 13.448112487792969, 380.7733459472656, 185.15475463867188, -132.1293487548828, 197.8782501220703, 55.53834533691406, 450.040771484375, -630.1360473632812, 685.29248046875, 55.78786849975586, 248.88388061523438, -14.5867919921875, -294.1481018066406, 364.5423583984375, 205.51840209960938, 487.84307861328125, -398.55517578125, -239.43553161621094, -53.49464416503906, 33.15119934082031, -37.00895309448242, 122.93968200683594, 73.97119140625, 761.5355834960938, 794.52880859375, 295.85205078125, 13.236948013305664, 111.32975769042969, 502.78607177734375, 78.78807830810547, 286.41656494140625, 504.7769775390625, 400.05181884765625, -67.58869934082031, 75.05520629882812, 796.8699951171875, 69.65768432617188, 691.950927734375, 507.06805419921875, -86.78461456298828, -359.806396484375, 36.02996826171875, 295.9461975097656, 177.3909454345703, 733.80419921875, 490.1805114746094, 140.45912170410156, 41.54277038574219, 368.6917419433594, 89.79000854492188, 718.5386352539062, 151.13612365722656, 187.82540893554688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000334.npy"}
{"epoch": 0.49045521292217326, "step": 335, "batch_size": 64, "mean": 159.14288330078125, "std": 270.9282531738281, "min": -308.02880859375, "p10": -203.04672393798822, "median": 147.66073608398438, "p90": 473.98120727539066, "max": 997.6619262695312, "pos_frac": 0.71875, "sample": [174.33892822265625, 399.3168029785156, 471.43768310546875, 12.910247802734375, 180.59991455078125, -76.68082427978516, 76.4958267211914, 475.0712890625, 60.022342681884766, 52.86420440673828, -237.27000427246094, 115.58909606933594, 322.9817199707031, 258.0850830078125, -14.776908874511719, 659.4351806640625, 65.52632904052734, 132.01947021484375, -267.9658508300781, 228.481201171875, -115.88667297363281, 37.75187683105469, 194.5482177734375, 187.65774536132812, 736.974853515625, 555.6311645507812, 377.3973083496094, 271.283935546875, 38.05738830566406, 633.1300048828125, 997.6619262695312, -278.611328125, 180.74758911132812, -265.328125, 400.4378356933594, -17.65763282775879, 434.57586669921875, 11.506792068481445, 71.61007690429688, 516.9419555664062, -143.6478271484375, 335.55889892578125, -106.7850570678711, 74.3226318359375, 334.7672424316406, -55.51470184326172, 217.3821258544922, -21.66387939453125, -228.13919067382812, -120.5389404296875, 120.4979248046875, -144.4976348876953, 403.91192626953125, -308.02880859375, -114.18328094482422, 391.26129150390625, 260.8350830078125, 458.4522705078125, 14.187393188476562, -242.06161499023438, 252.32611083984375, 376.6404113769531, 209.84710693359375, 163.302001953125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000335.npy"}
{"epoch": 0.4919236417033774, "step": 336, "batch_size": 64, "mean": 242.4196319580078, "std": 326.6019287109375, "min": -590.7725219726562, "p10": -74.54704284667967, "median": 219.9119644165039, "p90": 736.7287170410158, "max": 1056.91796875, "pos_frac": 0.84375, "sample": [345.7191162109375, 58.37688446044922, 882.7225341796875, 17.92656898498535, 409.0982360839844, 168.340087890625, 266.5271911621094, -112.44612884521484, -67.02761840820312, 881.4710693359375, 284.03668212890625, 297.0556640625, 195.81378173828125, 431.0631408691406, 1056.91796875, -29.243532180786133, 747.952880859375, 371.9551696777344, 284.4163818359375, 349.33123779296875, -62.20814895629883, 79.69024658203125, 154.28404235839844, 317.6034851074219, 116.08267211914062, 338.31768798828125, 134.32199096679688, 67.0445556640625, 710.5390014648438, 236.48846435546875, 203.33546447753906, 68.94342803955078, 84.63661193847656, 278.3364562988281, 129.59718322753906, 14.84417724609375, -355.3902587890625, 770.3956909179688, -77.7696533203125, -173.34524536132812, 699.9000854492188, 26.93274688720703, 812.095947265625, 64.27366638183594, 413.5694580078125, 78.35250854492188, -590.7725219726562, 80.88961029052734, 315.49560546875, 329.4461669921875, 131.60972595214844, 121.75597381591797, 180.9840850830078, 67.3458023071289, 276.65789794921875, 467.50958251953125, 465.01837158203125, 1003.9205932617188, -395.01409912109375, 322.571044921875, -447.99346923828125, 363.2317810058594, 315.4320983886719, 535.8889770507812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000336.npy"}
{"epoch": 0.4933920704845815, "step": 337, "batch_size": 64, "mean": 234.3765411376953, "std": 262.86138916015625, "min": -252.9602813720703, "p10": -94.21924514770507, "median": 208.73727416992188, "p90": 574.4450927734376, "max": 1016.9542236328125, "pos_frac": 0.828125, "sample": [887.5435791015625, 276.039794921875, -252.9602813720703, 100.17450714111328, 541.5581665039062, 464.9888000488281, -23.480201721191406, 291.55706787109375, 63.2465934753418, 443.491943359375, 7.604469299316406, 251.25283813476562, 289.91015625, 412.99932861328125, 435.3298034667969, -102.17725372314453, 259.3358154296875, 194.31431579589844, -55.06913757324219, 441.79681396484375, 149.5896453857422, 235.3223876953125, 0.8655452728271484, 588.5394897460938, 174.11329650878906, -112.55194091796875, 1016.9542236328125, 70.7738037109375, -172.09921264648438, -170.7093505859375, 222.73458862304688, 175.3176727294922, 282.587890625, -185.65927124023438, 165.81419372558594, 168.48728942871094, 456.1882629394531, -5.2856903076171875, 261.96588134765625, 367.2783508300781, 36.88056564331055, 714.9004516601562, 65.2662353515625, 172.71966552734375, -236.50462341308594, 156.6483154296875, 803.2597045898438, 524.6864624023438, 347.4254150390625, 81.12574005126953, 194.73995971679688, 99.39739227294922, 590.1033935546875, 348.3953857421875, 135.98341369628906, 619.9240112304688, 336.6240234375, 192.18896484375, 17.475791931152344, 356.1477355957031, 317.1714172363281, 233.13232421875, -75.65055847167969, 350.37310791015625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000337.npy"}
{"epoch": 0.4948604992657856, "step": 338, "batch_size": 64, "mean": 222.4942626953125, "std": 222.23809814453125, "min": -212.87045288085938, "p10": -2.8557106018066376, "median": 178.20867156982422, "p90": 521.2991088867187, "max": 789.8837890625, "pos_frac": 0.890625, "sample": [50.86698913574219, 521.8071899414062, 69.39131164550781, -212.87045288085938, 182.44589233398438, 107.55305480957031, -131.469482421875, 279.54388427734375, 630.9266967773438, 254.41403198242188, 89.59529113769531, 74.81462097167969, -97.49346923828125, 480.4635314941406, 80.90402221679688, 262.80126953125, 47.471710205078125, 485.8932189941406, 428.30517578125, 543.7323608398438, 665.89697265625, 3.1884078979492188, 233.8231201171875, 176.5596466064453, 252.85252380371094, 17.292221069335938, 427.4783630371094, -22.262474060058594, -8.029319763183594, 1.4380264282226562, 188.43597412109375, 446.0373229980469, 489.9781494140625, 76.4405517578125, 220.94387817382812, 382.80340576171875, 166.10528564453125, 56.966800689697266, 349.7310485839844, 121.64580535888672, 426.6078186035156, 78.14888000488281, 62.4158935546875, 85.86448669433594, 445.63800048828125, 41.35791015625, 265.3174133300781, 149.07431030273438, 326.18426513671875, 165.71543884277344, 0.12310791015625, 37.78706741333008, 574.7798461914062, 789.8837890625, -4.132347106933594, 274.15032958984375, 470.72515869140625, 179.85769653320312, 314.07891845703125, 155.01962280273438, 616.9893798828125, -207.62646484375, 75.13592529296875, 520.1135864257812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000338.npy"}
{"epoch": 0.49632892804698975, "step": 339, "batch_size": 64, "mean": 218.38970947265625, "std": 237.4848175048828, "min": -227.92320251464844, "p10": -49.387886428833, "median": 199.04153442382812, "p90": 586.28564453125, "max": 783.2193603515625, "pos_frac": 0.84375, "sample": [58.262359619140625, -139.90432739257812, -39.56460952758789, -53.597862243652344, 597.5234375, 27.386184692382812, 81.03324890136719, 155.80062866210938, 354.9554748535156, 29.226661682128906, 783.2193603515625, 74.59468078613281, 96.38412475585938, 141.53079223632812, 317.25555419921875, 355.7908935546875, 14.41435432434082, 11.582939147949219, 75.89984893798828, 590.3311767578125, 248.3992156982422, 25.47418975830078, 237.52328491210938, -15.961006164550781, 230.32125854492188, 153.40855407714844, 602.7135009765625, 27.37188720703125, 352.3768310546875, 643.2116088867188, -173.084228515625, 260.9393005371094, -226.08547973632812, 138.5145263671875, 513.6815185546875, 308.79071044921875, 179.20834350585938, -35.851707458496094, 230.74349975585938, 380.447998046875, 148.55262756347656, 259.23333740234375, 466.75146484375, 514.4453125, 117.0003433227539, 692.8274536132812, -170.75677490234375, 650.9749755859375, 271.9720153808594, -227.92320251464844, 206.5663299560547, 189.93670654296875, 16.233932495117188, 236.0130615234375, 63.740596771240234, 191.51673889160156, 231.7467803955078, -57.75591278076172, 485.40679931640625, 281.5718688964844, 493.88543701171875, 576.8460693359375, 394.09661865234375, 329.7903137207031], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000339.npy"}
{"epoch": 0.4977973568281938, "step": 340, "batch_size": 64, "mean": 197.50909423828125, "std": 269.6901550292969, "min": -376.09478759765625, "p10": -113.94139328002926, "median": 164.76229858398438, "p90": 538.8825622558594, "max": 934.90283203125, "pos_frac": 0.765625, "sample": [184.6085205078125, 266.83843994140625, 78.58417510986328, 176.63909912109375, 179.0083465576172, 498.2619934082031, 650.728271484375, -376.09478759765625, 254.99835205078125, -66.19231414794922, 386.8184814453125, 934.90283203125, 107.196533203125, -257.6263732910156, 600.0809326171875, -41.67266082763672, 285.6416931152344, 127.81681823730469, -36.509254455566406, 83.9859619140625, 154.24957275390625, -177.75604248046875, -359.0436096191406, 534.1227416992188, -133.0250701904297, 137.87490844726562, 18.96895408630371, 401.2304382324219, -52.35235595703125, 322.0467224121094, 498.5880126953125, 48.35737609863281, 882.2472534179688, 674.1439819335938, 11.37061882019043, 145.76560974121094, 148.24371337890625, 237.64309692382812, 113.61958312988281, 175.2750244140625, 244.2596435546875, 134.35360717773438, 225.84381103515625, -15.709842681884766, -130.7245635986328, 294.61444091796875, 258.1416015625, -74.7806625366211, 533.360595703125, -9.919448852539062, 119.23152923583984, 451.0600891113281, 540.9224853515625, 579.9173583984375, 406.9061279296875, 222.84243774414062, 148.5953826904297, 44.98208999633789, 408.9739074707031, 189.99557495117188, -247.45582580566406, 405.698486328125, -40.14013671875, 130.02752685546875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000340.npy"}
{"epoch": 0.49926578560939794, "step": 341, "batch_size": 64, "mean": 227.45578002929688, "std": 262.4552307128906, "min": -253.4835968017578, "p10": -67.9309730529785, "median": 208.4241180419922, "p90": 514.1222290039062, "max": 1170.69482421875, "pos_frac": 0.78125, "sample": [1170.69482421875, -54.73394775390625, -72.35426330566406, -189.2165069580078, 499.13311767578125, 507.8426513671875, 689.9952392578125, 486.3614196777344, 220.0681610107422, 169.01971435546875, -36.290863037109375, 202.39663696289062, 180.50714111328125, 267.7776184082031, 29.63032341003418, -52.361236572265625, -28.817398071289062, 356.738037109375, 57.042686462402344, 289.99554443359375, -174.95925903320312, 382.738525390625, 457.914794921875, 283.92425537109375, 77.87715911865234, 575.76416015625, 123.89318084716797, 196.3114013671875, -57.23954772949219, -95.47099304199219, 112.96791076660156, 360.58123779296875, 641.9566650390625, 276.9579772949219, 331.44769287109375, 419.4540100097656, 98.48606872558594, 13.169466018676758, -76.46820831298828, 480.94573974609375, 501.1358642578125, 296.37738037109375, 75.31642150878906, 77.70792388916016, -253.4835968017578, 516.8134765625, 54.04707336425781, 225.6651153564453, -204.87161254882812, 362.0736389160156, 214.45159912109375, 274.3036804199219, 358.15216064453125, -57.609962463378906, 724.2132568359375, -16.382583618164062, 347.88616943359375, 427.08819580078125, 474.7733154296875, 83.98611450195312, 158.25604248046875, 135.71034240722656, 578.7216796875, 79.15682220458984], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000341.npy"}
{"epoch": 0.5007342143906021, "step": 342, "batch_size": 64, "mean": 200.836181640625, "std": 270.1055908203125, "min": -591.6846923828125, "p10": -88.73346710205078, "median": 172.4019775390625, "p90": 481.79977722167973, "max": 986.35693359375, "pos_frac": 0.765625, "sample": [717.0667114257812, -591.6846923828125, -181.81838989257812, 526.856689453125, 83.7410888671875, 41.14305114746094, 208.96023559570312, 346.97393798828125, 362.6058349609375, 350.8088073730469, 140.0989227294922, 128.12705993652344, 195.0548095703125, 210.63575744628906, 455.4830627441406, 300.2686767578125, 94.36152648925781, 456.9300231933594, -233.39889526367188, -34.35533142089844, 360.30255126953125, -86.07781219482422, 99.57492065429688, -62.177146911621094, -114.49308013916016, 288.9673156738281, 953.8973388671875, 365.0487976074219, 94.41909790039062, -197.6591033935547, -45.57773971557617, 986.35693359375, 201.42242431640625, 202.14053344726562, 14.941726684570312, -89.8716049194336, 507.8885498046875, 485.3596496582031, 587.3135375976562, 174.2975616455078, -8.552961349487305, 170.5063934326172, 361.0138244628906, -55.431976318359375, 432.7142639160156, 401.5882263183594, 158.05694580078125, 416.56878662109375, 88.37745666503906, 437.00885009765625, 473.493408203125, 123.38416290283203, -151.15267944335938, 141.1593780517578, 389.7748718261719, -56.65276336669922, 158.5022735595703, -77.5228042602539, 185.62982177734375, 92.92924499511719, 209.4906005859375, 445.278076171875, 137.25750732421875, 76.1615219116211], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000342.npy"}
{"epoch": 0.5022026431718062, "step": 343, "batch_size": 64, "mean": 240.85354614257812, "std": 206.98602294921875, "min": -345.42828369140625, "p10": 23.64659233093265, "median": 229.90457916259766, "p90": 480.09211730957037, "max": 761.2392578125, "pos_frac": 0.921875, "sample": [345.46429443359375, 179.895263671875, 165.5831756591797, 133.1904296875, 252.58273315429688, 9.375190734863281, 283.7130432128906, 353.7669677734375, 189.52447509765625, 667.0521850585938, 151.01405334472656, -115.6138916015625, 245.1795654296875, 79.70199584960938, 195.13156127929688, 56.946529388427734, 76.89300537109375, 258.0738220214844, 238.06729125976562, 629.64013671875, 198.63211059570312, 291.58343505859375, 99.92079162597656, 324.36322021484375, 409.908447265625, 336.09222412109375, 90.79127502441406, 111.22482299804688, 756.5447998046875, 221.7418670654297, 62.28833770751953, 91.44477844238281, 387.0915222167969, 83.921875, 263.9222106933594, 347.4349060058594, 246.09780883789062, 106.96208190917969, 7.116230010986328, 398.68817138671875, -345.42828369140625, 314.8680725097656, 170.9207763671875, 413.7536315917969, 311.18951416015625, 761.2392578125, 354.00311279296875, 333.821044921875, 240.3839111328125, 348.5886535644531, 196.29141235351562, 343.7270812988281, 80.31661987304688, -37.108089447021484, 472.7132568359375, 483.2544860839844, 171.8527374267578, 153.65512084960938, 190.93399047851562, 181.6251220703125, -52.57526397705078, 631.12841796875, 632.3602905273438, -167.84097290039062], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000343.npy"}
{"epoch": 0.5036710719530103, "step": 344, "batch_size": 64, "mean": 212.74746704101562, "std": 278.8431396484375, "min": -201.12754821777344, "p10": -61.479529571533185, "median": 176.8047103881836, "p90": 563.3976501464844, "max": 1482.066162109375, "pos_frac": 0.8125, "sample": [49.65386199951172, -69.47283172607422, -81.48835754394531, -12.111587524414062, 74.5164794921875, 81.45455932617188, 191.44729614257812, 137.83328247070312, 76.93557739257812, 255.2747344970703, 28.354951858520508, 805.6083984375, 42.55767059326172, 207.6691131591797, 77.93455505371094, 229.232666015625, 249.2870635986328, -201.12754821777344, -194.92193603515625, 291.79547119140625, 217.7544708251953, 75.58589172363281, 175.0526580810547, 13.269306182861328, 229.85009765625, 655.9136962890625, 555.2883911132812, 439.3287048339844, 73.50360870361328, -15.18157958984375, 495.5932922363281, 230.20556640625, 250.346923828125, 84.2177734375, 178.5567626953125, -26.373504638671875, 353.48822021484375, -42.8284912109375, 270.5870361328125, 17.925214767456055, 566.873046875, 293.7730712890625, 160.6425018310547, 133.63189697265625, 79.07727813720703, 1482.066162109375, 449.3349304199219, 318.6092834472656, 13.320863723754883, 654.2631225585938, -32.065467834472656, 381.6217956542969, 188.0183563232422, 582.0942993164062, 90.24382019042969, -179.5391387939453, -163.27175903320312, 438.84027099609375, 568.9930419921875, -198.41281127929688, 287.44830322265625, 381.7970886230469, 100.23033142089844, 545.7291259765625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000344.npy"}
{"epoch": 0.5051395007342144, "step": 345, "batch_size": 64, "mean": 239.02157592773438, "std": 267.0421447753906, "min": -412.76824951171875, "p10": -37.091122436523435, "median": 202.93138122558594, "p90": 597.027282714844, "max": 963.9730224609375, "pos_frac": 0.828125, "sample": [40.65068817138672, 625.3590087890625, 136.49920654296875, 361.228515625, 122.8819351196289, -195.8353271484375, 747.23876953125, -184.87196350097656, 196.265380859375, 474.18487548828125, -34.085899353027344, 335.89031982421875, 148.31134033203125, 364.0179138183594, 530.919921875, 98.87997436523438, -313.3255310058594, 301.69195556640625, 61.98202133178711, 124.40235900878906, 474.3645935058594, 392.30853271484375, 78.01210021972656, 445.6550598144531, 335.58740234375, 520.1781616210938, 126.54973602294922, 302.1591491699219, 104.833984375, 194.39451599121094, 42.895660400390625, 844.5121459960938, 372.519775390625, 77.23112487792969, 96.45120239257812, 410.9774475097656, 416.0836181640625, 489.22772216796875, 397.1239318847656, 299.49444580078125, 362.87646484375, 963.9730224609375, 44.40993881225586, 129.65606689453125, 397.65692138671875, 273.6345520019531, 18.320484161376953, 681.9395751953125, 205.21536254882812, 410.44415283203125, -35.108604431152344, -88.66919708251953, 204.85617065429688, 634.0908203125, -12.096799850463867, -142.08905029296875, -37.940773010253906, 100.54306030273438, 639.140869140625, 201.006591796875, 251.6068115234375, 152.80642700195312, -412.76824951171875, -8.968887329101562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000345.npy"}
{"epoch": 0.5066079295154186, "step": 346, "batch_size": 64, "mean": 219.47308349609375, "std": 257.0786437988281, "min": -593.8995361328125, "p10": -85.81837997436521, "median": 232.52356719970703, "p90": 594.1106750488283, "max": 753.2579345703125, "pos_frac": 0.78125, "sample": [331.3021545410156, 225.5264434814453, 54.24352264404297, 329.92205810546875, 665.1492919921875, 517.3016357421875, 681.43798828125, 224.7228546142578, 197.73570251464844, 703.7222290039062, -593.8995361328125, 121.06028747558594, 383.56683349609375, 51.60196304321289, 348.4772644042969, 637.075927734375, -16.667083740234375, 314.24407958984375, 753.2579345703125, 72.87835693359375, 333.1868896484375, 151.07095336914062, -139.5260009765625, 413.4559326171875, 291.23834228515625, 327.56903076171875, 325.300537109375, 239.52069091796875, 381.6416015625, 73.94815063476562, -18.116701126098633, -91.80721282958984, 172.77098083496094, 103.06214904785156, 631.6427001953125, 198.44395446777344, 287.6317138671875, 164.27989196777344, 289.48345947265625, -178.3185577392578, -24.27558135986328, 613.431396484375, 284.23834228515625, 549.0289916992188, 110.25425720214844, 345.957763671875, 382.4561462402344, 345.7730407714844, -16.422077178955078, -20.740230560302734, 260.12701416015625, -71.84443664550781, 89.11349487304688, -176.1884307861328, 122.03092956542969, 468.5970458984375, 113.25579833984375, 108.89950561523438, 489.9341735839844, -229.27352905273438, -66.03802490234375, -165.42617797851562, 245.6615447998047, 328.58892822265625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000346.npy"}
{"epoch": 0.5080763582966226, "step": 347, "batch_size": 64, "mean": 222.85726928710938, "std": 278.6788635253906, "min": -355.36602783203125, "p10": -119.08620758056638, "median": 202.90349578857422, "p90": 576.1639038085938, "max": 871.930419921875, "pos_frac": 0.796875, "sample": [-355.36602783203125, 254.18304443359375, 510.1134033203125, 347.29217529296875, 96.95146942138672, -9.891792297363281, 192.72372436523438, 259.2147216796875, 584.2434692382812, 52.45482635498047, 485.593017578125, 33.00212860107422, 125.66590881347656, -69.4805908203125, 398.1907958984375, -259.136962890625, -272.2472229003906, -128.18765258789062, 791.0572509765625, 101.56493377685547, 557.3115844726562, 18.86117935180664, 267.4898681640625, 295.03826904296875, -179.8712158203125, 536.4043579101562, 486.0057678222656, 332.239013671875, 2.2400894165039062, 769.2523803710938, 337.5579833984375, 653.4512329101562, 351.61199951171875, -7.059181213378906, 214.22854614257812, -30.257041931152344, 501.19873046875, 189.34324645996094, 123.076171875, -97.84950256347656, 213.08335876464844, 300.1092529296875, 102.34465789794922, 351.125732421875, 163.32791137695312, 109.49685668945312, 115.36163330078125, 8.876029968261719, 871.930419921875, 78.0390396118164, 332.022216796875, 614.9844360351562, 213.08326721191406, 806.6959228515625, -70.25695037841797, 61.679054260253906, 349.15380859375, -206.1991729736328, 424.4154968261719, 46.92872619628906, 455.1015930175781, -176.9998321533203, 111.54447937011719, 528.80322265625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000347.npy"}
{"epoch": 0.5095447870778267, "step": 348, "batch_size": 64, "mean": 186.99984741210938, "std": 260.75811767578125, "min": -306.52398681640625, "p10": -121.01541290283201, "median": 184.55374908447266, "p90": 442.8052642822266, "max": 1103.3026123046875, "pos_frac": 0.78125, "sample": [305.81640625, 76.5510482788086, 168.72073364257812, 419.75726318359375, 364.3953552246094, -85.41230010986328, 16.43500518798828, 326.6979064941406, 383.2890625, -306.52398681640625, 137.2108612060547, 81.64212036132812, 604.93603515625, 282.6851806640625, 85.79792785644531, 290.92852783203125, 23.328950881958008, 345.9683532714844, 128.566162109375, 82.8324966430664, 339.12225341796875, 321.0367736816406, -88.19060516357422, 1103.3026123046875, 440.5020446777344, 276.4662780761719, -60.08837127685547, 263.81634521484375, 5.649229049682617, 78.12779998779297, -52.62245178222656, -138.40611267089844, -132.40805053710938, 946.6644897460938, 573.6126708984375, 308.49346923828125, 301.309814453125, 292.33056640625, 233.59434509277344, 11.60835075378418, -260.01470947265625, 412.666015625, -67.46045684814453, 313.1708679199219, 264.2328796386719, 248.47854614257812, -128.90264892578125, 4.781515121459961, 515.9525146484375, 264.87176513671875, 124.00089263916016, -102.61186218261719, 443.7923583984375, -244.16796875, 381.9856872558594, 200.3867645263672, 64.66963958740234, 10.629642486572266, 31.340850830078125, 521.9229736328125, 76.72470092773438, 324.41119384765625, -151.8209686279297, -38.594547271728516], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000348.npy"}
{"epoch": 0.5110132158590308, "step": 349, "batch_size": 64, "mean": 210.99227905273438, "std": 301.4742126464844, "min": -479.6652526855469, "p10": -87.89560928344726, "median": 144.00296020507812, "p90": 553.9861145019531, "max": 1333.76416015625, "pos_frac": 0.796875, "sample": [145.76791381835938, -353.6776123046875, 459.3036804199219, 133.50230407714844, 31.642925262451172, 134.44296264648438, 524.02294921875, -92.25555419921875, 250.57772827148438, 395.7942810058594, 139.857421875, 447.74200439453125, 158.74606323242188, -36.711143493652344, 40.902374267578125, 85.4361343383789, -32.89899444580078, 17.99474334716797, 231.22698974609375, -208.458984375, 497.2603759765625, 337.8565979003906, 309.2393798828125, 27.878835678100586, 334.43084716796875, -34.32841873168945, 371.40496826171875, -77.72240447998047, 120.6463851928711, 142.23800659179688, 792.1013793945312, 3.0438613891601562, -55.64243698120117, 441.05426025390625, 178.67498779296875, -479.6652526855469, -163.22161865234375, 97.94072723388672, 557.6845703125, 57.139713287353516, 715.128662109375, 28.709686279296875, -250.23233032226562, -41.25709533691406, 560.0567626953125, 577.3955688476562, 890.2054443359375, 154.7062225341797, 545.3563842773438, 178.46365356445312, 408.2732238769531, 479.9154052734375, 62.49126434326172, 370.7023620605469, 292.19805908203125, 272.4645080566406, -105.85973358154297, 101.17384338378906, 34.92817306518555, 449.2959899902344, 385.9441833496094, 55.257080078125, 73.45135498046875, 1333.76416015625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000349.npy"}
{"epoch": 0.5124816446402349, "step": 350, "batch_size": 64, "mean": 201.89549255371094, "std": 285.01116943359375, "min": -516.1025390625, "p10": -165.03458709716796, "median": 192.2936248779297, "p90": 504.7183959960938, "max": 975.3400268554688, "pos_frac": 0.8125, "sample": [767.7494506835938, -424.4781799316406, 107.6037368774414, 376.24517822265625, 288.97991943359375, 194.29544067382812, 393.9735107421875, 89.38031005859375, -141.00709533691406, 90.89607238769531, 110.28781127929688, 412.4017639160156, -451.83807373046875, 160.5887908935547, 201.45651245117188, 23.449920654296875, 715.7939453125, 468.933349609375, 975.3400268554688, 279.0931091308594, 173.1510009765625, 280.0973815917969, -38.60594940185547, 397.8664245605469, 197.02801513671875, 744.6365356445312, 117.58612060546875, -182.97601318359375, 108.92731475830078, -2.0224533081054688, 511.5743408203125, 128.25735473632812, 170.28890991210938, 167.45492553710938, 317.49749755859375, 307.902099609375, 115.37731170654297, 304.4852294921875, -238.028564453125, 688.3258056640625, 176.7445526123047, 488.72119140625, 317.87744140625, -143.54217529296875, 172.28369140625, 243.49806213378906, 541.0161743164062, 429.5346984863281, 208.92626953125, 221.360595703125, -516.1025390625, 425.52301025390625, 124.47410583496094, 272.8788146972656, 39.795982360839844, 204.38821411132812, -302.22296142578125, -88.66134643554688, 70.5796127319336, 176.18594360351562, 475.2921142578125, 190.29180908203125, -174.24562072753906, 458.74462890625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000350.npy"}
{"epoch": 0.5139500734214391, "step": 351, "batch_size": 64, "mean": 249.21287536621094, "std": 311.09381103515625, "min": -393.289794921875, "p10": -103.09831085205073, "median": 227.98191833496094, "p90": 677.9021606445314, "max": 1117.8570556640625, "pos_frac": 0.765625, "sample": [84.1475830078125, 311.4801940917969, 483.21527099609375, 1117.8570556640625, 1099.8741455078125, 115.45661926269531, 413.7141418457031, 109.16246032714844, 290.8603515625, 318.56109619140625, 47.34625244140625, 259.5318908691406, 702.354736328125, 836.3416137695312, 377.55224609375, 318.0262145996094, 514.8470458984375, 556.8351440429688, 654.055419921875, -37.21546173095703, -178.66256713867188, 415.38116455078125, 119.90509033203125, 229.50967407226562, 183.65573120117188, 331.3139953613281, 512.2227783203125, 232.0839385986328, -393.289794921875, -51.05079650878906, -276.8120422363281, 185.3162841796875, -137.67251586914062, 161.84365844726562, -18.515106201171875, 821.8992919921875, 27.812416076660156, 142.08627319335938, 226.45416259765625, 24.183799743652344, -125.40438842773438, 356.0819396972656, 20.912372589111328, 688.1221923828125, 31.05999755859375, -32.30773162841797, 173.6605682373047, -17.3621826171875, -48.46990966796875, -191.7477264404297, -5.451883316040039, 629.9843139648438, 187.6793975830078, 376.1688537597656, 728.498046875, -175.65672302246094, 301.4279479980469, 639.4996337890625, 305.8354187011719, 278.4333801269531, 197.8715362548828, 308.08685302734375, 235.4666290283203, -44.433433532714844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000351.npy"}
{"epoch": 0.5154185022026432, "step": 352, "batch_size": 64, "mean": 153.89178466796875, "std": 277.4481201171875, "min": -663.4613647460938, "p10": -146.6770973205566, "median": 121.22327423095703, "p90": 500.82065429687503, "max": 975.5285034179688, "pos_frac": 0.703125, "sample": [373.9778137207031, 383.76422119140625, 146.88619995117188, 507.4344787597656, -326.2619934082031, 77.37824249267578, 89.42093658447266, 394.2969970703125, 64.94847106933594, 240.1366424560547, 618.9610595703125, 476.27679443359375, -37.2000846862793, 59.85228729248047, 237.2212371826172, -39.07243347167969, -77.82217407226562, 975.5285034179688, 260.1102600097656, -157.04367065429688, 191.23260498046875, 705.842529296875, 139.42332458496094, 408.48455810546875, 22.502349853515625, -97.12482452392578, -36.420711517333984, -319.9383544921875, 59.657020568847656, 119.84188842773438, 34.557708740234375, 491.102294921875, 17.16884994506836, -21.241661071777344, 530.9529418945312, 121.40367889404297, -663.4613647460938, 59.56219482421875, 216.73446655273438, 375.0658874511719, 285.514892578125, 144.429443359375, 196.1429901123047, -183.48692321777344, -69.805908203125, -40.018463134765625, -205.25732421875, 502.0128173828125, 495.5604248046875, -180.7157745361328, 252.66893005371094, -122.4884262084961, -80.46760559082031, 498.0389404296875, 250.41653442382812, 190.16848754882812, 413.5143737792969, -73.47978973388672, 589.0232543945312, 121.0428695678711, 13.006134033203125, -78.87937927246094, 238.5760498046875, 69.41937255859375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000352.npy"}
{"epoch": 0.5168869309838473, "step": 353, "batch_size": 64, "mean": 212.76190185546875, "std": 292.6882019042969, "min": -429.4403991699219, "p10": -77.48814315795899, "median": 151.56806182861328, "p90": 613.2348205566407, "max": 978.75, "pos_frac": 0.71875, "sample": [350.2242431640625, 478.3942565917969, 74.14969635009766, 487.25531005859375, 165.29840087890625, 394.0210266113281, 24.488740921020508, -77.50324249267578, 602.716064453125, 202.7266845703125, 368.2850036621094, -67.07581329345703, 353.82244873046875, 578.0100708007812, 43.2176513671875, 373.5583190917969, -116.18537902832031, 46.6116943359375, 601.0009765625, 663.4148559570312, 121.29637145996094, 276.7181396484375, -22.662113189697266, 94.81582641601562, -429.4403991699219, 407.16424560546875, -238.42620849609375, -65.17523956298828, -83.51062774658203, 297.0661926269531, -34.93678283691406, 137.8377227783203, 329.5621643066406, 366.71337890625, 20.01971435546875, -8.57595443725586, 14.337982177734375, -156.00820922851562, 859.5201416015625, 1.4864978790283203, -34.213592529296875, 618.8892822265625, -31.41754913330078, 7.088996887207031, 113.5149154663086, 316.717041015625, 326.74517822265625, 239.43016052246094, -45.134918212890625, 661.0787353515625, 280.89239501953125, -77.45291137695312, 612.25390625, 362.7035827636719, 978.75, 840.287353515625, -184.3842010498047, 350.64471435546875, 613.6552124023438, -69.7733154296875, 52.714195251464844, 18.219768524169922, -77.1881103515625, 308.5065002441406], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000353.npy"}
{"epoch": 0.5183553597650514, "step": 354, "batch_size": 64, "mean": 230.70608520507812, "std": 239.1261444091797, "min": -181.8125, "p10": -62.80235633850097, "median": 199.49755859375, "p90": 509.13988647460945, "max": 1032.4228515625, "pos_frac": 0.796875, "sample": [342.9237365722656, 494.16461181640625, 285.7076416015625, 51.31854248046875, 453.2170715332031, 195.50808715820312, 632.3270263671875, -60.0163459777832, 224.361572265625, -101.20207214355469, -81.89737701416016, -181.8125, 254.2115936279297, 1032.4228515625, 446.5751953125, 199.35977172851562, -63.996360778808594, 375.2088317871094, 439.3747253417969, 229.39076232910156, 8.223640441894531, 427.8168029785156, 127.03681182861328, 126.416015625, -158.7779541015625, 135.30226135253906, 357.9734191894531, 199.63534545898438, 94.39717864990234, 352.96868896484375, 565.9898071289062, -110.97000122070312, 340.3930358886719, 260.4731140136719, 166.62210083007812, 135.2060089111328, -71.0138931274414, 468.8847961425781, 181.81939697265625, 158.54811096191406, 515.557861328125, -44.744117736816406, 335.71624755859375, 249.80126953125, 34.07202911376953, 296.3084716796875, 78.1360092163086, 604.8051147460938, 420.67822265625, -14.918670654296875, -25.679285049438477, 381.8907165527344, 370.6595458984375, 166.22142028808594, -18.44483184814453, 829.6708374023438, 39.93303680419922, 400.29510498046875, 395.4226989746094, 43.29621887207031, 520.4494018554688, 147.7239227294922, 112.21102905273438, -7.965274810791016], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000354.npy"}
{"epoch": 0.5198237885462555, "step": 355, "batch_size": 64, "mean": 230.5689239501953, "std": 242.03224182128906, "min": -334.2419738769531, "p10": -60.407928466796854, "median": 222.10120391845703, "p90": 528.6586486816407, "max": 1028.6173095703125, "pos_frac": 0.84375, "sample": [149.47799682617188, 565.307373046875, 431.6026306152344, 489.10870361328125, 56.001792907714844, 298.81787109375, 222.57162475585938, 1028.6173095703125, -319.22955322265625, 271.6966552734375, 317.1871032714844, 544.9021606445312, 35.238243103027344, -17.67099380493164, 519.0939331054688, 542.0364990234375, -123.81155395507812, 500.73980712890625, 169.67051696777344, 91.5708999633789, 384.11248779296875, 254.36886596679688, -39.35762023925781, -72.12190246582031, 377.176025390625, 489.7469482421875, 466.97540283203125, 306.0769348144531, 145.1680450439453, -2.894622802734375, 6.096408843994141, 208.82501220703125, 75.62303161621094, -334.2419738769531, 221.6307830810547, -127.19430541992188, 274.5899658203125, 609.3037719726562, 308.0223388671875, 659.8619995117188, 40.07630157470703, 235.1282958984375, 95.48748779296875, 227.46847534179688, 170.09194946289062, 210.917724609375, 57.448814392089844, 447.0156555175781, 87.97821044921875, 211.01190185546875, 455.5962219238281, 532.7578125, 408.17901611328125, 170.67791748046875, 121.68318176269531, 0.307891845703125, 169.59266662597656, -127.89822387695312, 227.18414306640625, -69.42948913574219, 86.15838623046875, 388.7833251953125, 337.0767822265625, 288.41986083984375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000355.npy"}
{"epoch": 0.5212922173274597, "step": 356, "batch_size": 64, "mean": 121.26020050048828, "std": 327.2241516113281, "min": -709.423095703125, "p10": -289.1190551757812, "median": 85.35372924804688, "p90": 498.6971557617188, "max": 941.93017578125, "pos_frac": 0.671875, "sample": [153.06661987304688, 68.79036712646484, 355.1522216796875, 57.76372528076172, -16.485736846923828, -13.43320083618164, -77.56480407714844, 206.10293579101562, -168.3638916015625, 236.97775268554688, 291.3662109375, 0.4142646789550781, 813.3386840820312, 480.8309631347656, -131.13177490234375, 570.1193237304688, -97.41450500488281, -323.63702392578125, -709.423095703125, -308.9734191894531, 54.54436492919922, 828.06201171875, -186.5170440673828, 118.38473510742188, 348.3079833984375, 466.279052734375, -43.140342712402344, 506.3540954589844, -346.67645263671875, 28.351675033569336, -422.82696533203125, 706.2710571289062, 114.64698791503906, -242.79220581054688, 437.46441650390625, 85.82276916503906, 282.2688293457031, 421.4560546875, -203.9813232421875, 29.498367309570312, -57.59678268432617, -164.35763549804688, 143.71661376953125, 204.64483642578125, 7.809425354003906, 288.45904541015625, 293.2505187988281, -203.54090881347656, 846.796142578125, 295.1922302246094, 153.54364013671875, 128.70639038085938, -388.4986572265625, -38.82599639892578, 191.02484130859375, 78.99918365478516, 84.88468933105469, 355.667236328125, -456.6128234863281, 941.93017578125, 316.90362548828125, 299.20989990234375, 8.352470397949219, 61.72077178955078], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000356.npy"}
{"epoch": 0.5227606461086637, "step": 357, "batch_size": 64, "mean": 195.17225646972656, "std": 287.7282409667969, "min": -653.5499877929688, "p10": -131.95761260986328, "median": 179.5165557861328, "p90": 617.9890258789063, "max": 821.1763916015625, "pos_frac": 0.75, "sample": [382.2322998046875, 564.7433471679688, -145.4468536376953, 522.243896484375, 640.948486328125, -182.04795837402344, 10.492080688476562, 501.40771484375, 152.98748779296875, 653.640869140625, 262.60931396484375, 73.4746322631836, -116.7552719116211, 821.1763916015625, -54.997650146484375, 156.74099731445312, 228.87411499023438, 211.31314086914062, 337.93890380859375, -48.78059387207031, 609.20458984375, -10.033119201660156, 460.4980773925781, -129.49440002441406, -653.5499877929688, 41.90798568725586, 256.8916015625, 98.08145904541016, -82.02313232421875, 507.99090576171875, 247.57113647460938, -213.06411743164062, 621.7537841796875, -272.0946044921875, 323.3077392578125, 12.805538177490234, 355.1455993652344, 753.443115234375, 393.44476318359375, 286.4096374511719, -133.01327514648438, 0.49103355407714844, 272.2619323730469, 685.8927001953125, 106.46698760986328, 53.4269905090332, -32.75666809082031, 206.42153930664062, -32.634525299072266, 202.2921142578125, 5.446535110473633, 261.6312255859375, 152.4219970703125, 8.829147338867188, 306.878662109375, 288.41937255859375, 94.28369903564453, 245.93099975585938, -213.16061401367188, 372.75714111328125, 145.0217742919922, 791.777587890625, -27.42681884765625, 148.37271118164062], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000357.npy"}
{"epoch": 0.5242290748898678, "step": 358, "batch_size": 64, "mean": 200.61288452148438, "std": 297.124267578125, "min": -483.28033447265625, "p10": -97.45706634521484, "median": 131.8130340576172, "p90": 545.4588043212892, "max": 1054.0399169921875, "pos_frac": 0.734375, "sample": [68.9674072265625, 637.0489501953125, 150.3093719482422, 283.67816162109375, 252.8597869873047, 437.4811096191406, 103.41026306152344, 288.7137451171875, 111.14669799804688, 28.301498413085938, 47.98065948486328, -133.64569091796875, -170.47775268554688, -24.714492797851562, -1.7423267364501953, 980.9150390625, -97.62096405029297, 661.8741455078125, 66.08633422851562, 119.0159912109375, -34.614601135253906, 1.4748382568359375, 17.879074096679688, 65.5624008178711, 120.816162109375, -483.28033447265625, 379.0064697265625, -151.73638916015625, 1054.0399169921875, 319.57061767578125, 504.234619140625, 142.80990600585938, 239.46226501464844, -1.1901626586914062, 562.0193481445312, 200.53472900390625, -97.07463836669922, 39.71847915649414, -99.7021713256836, 506.8175354003906, 331.938720703125, 490.4934997558594, -166.4402313232422, 286.995849609375, 364.63818359375, -53.41156005859375, 351.64727783203125, 1030.7037353515625, -83.01075744628906, 105.77607727050781, 495.28070068359375, 192.44906616210938, 110.31741333007812, -34.728858947753906, 472.29656982421875, 859.8685302734375, 90.84490203857422, -23.906837463378906, 174.9496307373047, -64.86428833007812, 171.9783172607422, 251.493896484375, 218.40283203125, 169.57559204101562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000358.npy"}
{"epoch": 0.5256975036710719, "step": 359, "batch_size": 64, "mean": 234.68478393554688, "std": 316.45904541015625, "min": -390.44635009765625, "p10": -130.3836898803711, "median": 196.75216674804688, "p90": 640.7079162597657, "max": 1117.1475830078125, "pos_frac": 0.75, "sample": [-390.44635009765625, 252.26358032226562, 344.3290710449219, 469.8543395996094, -132.62881469726562, 647.0744018554688, -275.7465515136719, 854.2916259765625, 118.91252899169922, -21.59086799621582, -123.21705627441406, -98.21341705322266, 425.3291015625, 303.1662292480469, -206.86859130859375, -27.152511596679688, 327.749755859375, -118.56951904296875, 519.1019897460938, -218.57394409179688, 257.3908386230469, 198.18759155273438, 487.54205322265625, 76.10506439208984, -31.911632537841797, 147.2193145751953, 28.224802017211914, 545.80029296875, 86.85247802734375, 126.30193328857422, 535.3968505859375, 306.15399169921875, 1117.1475830078125, 430.97125244140625, -125.14506530761719, 22.326120376586914, 195.31674194335938, 102.2241439819336, -330.11077880859375, 801.6263427734375, 100.90306091308594, 486.7994689941406, 398.29461669921875, 365.976318359375, -180.05426025390625, 150.5061798095703, 577.3546142578125, 393.7672119140625, 165.96673583984375, 778.32958984375, 67.41625213623047, -117.22770690917969, -43.77970886230469, 498.6341247558594, 193.1888427734375, 655.8240356445312, 11.663177490234375, 265.2059020996094, 813.7412109375, 551.4153442382812, 243.77743530273438, 625.852783203125, 168.85826110839844, 220.72772216796875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000359.npy"}
{"epoch": 0.527165932452276, "step": 360, "batch_size": 64, "mean": 177.61534118652344, "std": 209.26573181152344, "min": -130.00494384765625, "p10": -77.46424179077147, "median": 160.89430236816406, "p90": 436.9680603027344, "max": 686.1467895507812, "pos_frac": 0.734375, "sample": [61.885955810546875, 527.2092895507812, 66.03865814208984, 181.5145263671875, -30.7463321685791, 307.54864501953125, 9.435096740722656, 217.41226196289062, 299.7159729003906, 238.85926818847656, 128.78646850585938, 487.3758239746094, -124.00604248046875, 302.8584899902344, 635.6585693359375, 168.52342224121094, 37.73137664794922, -24.439315795898438, 288.2164306640625, 119.63748168945312, 435.1532897949219, 196.93295288085938, -58.22148895263672, 247.5701141357422, 77.226806640625, 348.6375732421875, 428.8840026855469, -35.17939758300781, -19.136669158935547, 341.0413818359375, 526.6825561523438, 353.782470703125, 686.1467895507812, -39.30479431152344, 45.03961944580078, 153.2651824951172, 233.32315063476562, 150.85601806640625, 394.041015625, -64.54852294921875, 96.00282287597656, 626.7517700195312, -130.00494384765625, -97.01504516601562, -18.881561279296875, 211.36846923828125, -127.32793426513672, 178.7379913330078, -41.57477569580078, -120.47511291503906, -35.87451171875, 339.1741638183594, 40.740638732910156, 392.8735046386719, 437.7458190917969, 208.79534912109375, 244.18492126464844, -82.99954986572266, 149.91107177734375, 417.8939208984375, 96.63175964355469, -91.40076446533203, 4.847923278808594, 365.8677673339844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000360.npy"}
{"epoch": 0.5286343612334802, "step": 361, "batch_size": 64, "mean": 171.79086303710938, "std": 257.8415832519531, "min": -451.1902160644531, "p10": -129.8543418884277, "median": 193.81796264648438, "p90": 519.8879516601563, "max": 867.3260498046875, "pos_frac": 0.703125, "sample": [66.0280990600586, 206.27102661132812, 28.339122772216797, 344.24237060546875, -68.13602447509766, -248.79168701171875, -42.802223205566406, 225.5935821533203, 537.5217895507812, 150.4947052001953, 197.8980712890625, 266.3894348144531, -451.1902160644531, 423.1370849609375, -48.55030822753906, -86.86231231689453, 208.94137573242188, 148.6448211669922, -310.2341003417969, -37.701446533203125, 223.23031616210938, -156.2901611328125, -8.545713424682617, -33.22239303588867, 231.9559783935547, 649.0352783203125, 299.76788330078125, 480.37237548828125, 86.92083740234375, 373.6027526855469, -65.02818298339844, -53.12929916381836, 210.7945098876953, 179.84017944335938, 444.3541259765625, 524.8616943359375, -169.4814910888672, 867.3260498046875, 694.1159057617188, 504.55914306640625, 223.82579040527344, 237.66099548339844, 512.3621826171875, 414.51239013671875, 167.86529541015625, 214.00070190429688, 88.15911102294922, 221.72695922851562, -43.492584228515625, -103.52156829833984, 70.10945892333984, -34.58304214477539, 545.9404907226562, 13.199943542480469, 354.63336181640625, 254.8623809814453, 150.98379516601562, -141.1398162841797, 251.74520874023438, 17.597305297851562, 189.73785400390625, -272.7824401855469, 523.11328125, 343.8260498046875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000361.npy"}
{"epoch": 0.5301027900146843, "step": 362, "batch_size": 64, "mean": 268.6226806640625, "std": 263.51806640625, "min": -148.17294311523438, "p10": -24.164958000183102, "median": 216.13208770751953, "p90": 598.0055419921875, "max": 1308.25146484375, "pos_frac": 0.859375, "sample": [102.9381103515625, 151.68783569335938, 530.94287109375, 522.246337890625, 283.195068359375, -30.006309509277344, 1308.25146484375, 284.0094299316406, 115.28115844726562, 642.1492919921875, 506.3959655761719, 106.0864486694336, 35.060401916503906, 120.59439086914062, 291.193115234375, 463.8025817871094, 192.14881896972656, -76.33850860595703, 76.3834228515625, 347.6973876953125, 453.5057373046875, 86.01138305664062, 20.723241806030273, -20.589996337890625, -1.8173065185546875, -25.697084426879883, 133.426513671875, 154.2815399169922, -78.91161346435547, 688.8946533203125, 598.1215209960938, 137.70115661621094, 28.325897216796875, 258.0160827636719, 278.80999755859375, 177.69790649414062, 583.6927490234375, -46.01652526855469, 216.8102264404297, 597.7349243164062, 242.61338806152344, 138.94894409179688, 668.3352661132812, 399.4040222167969, 151.83221435546875, 135.0904541015625, 438.39776611328125, 215.45394897460938, 21.256179809570312, 778.012939453125, 214.1000213623047, 796.3068237304688, 466.0699157714844, 368.93896484375, 217.05416870117188, 446.5879211425781, 493.591552734375, -71.08782958984375, 137.3829803466797, 346.7071838378906, 270.886474609375, 226.66290283203125, 23.038177490234375, -148.17294311523438], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000362.npy"}
{"epoch": 0.5315712187958884, "step": 363, "batch_size": 64, "mean": 278.00701904296875, "std": 271.0914306640625, "min": -245.1898193359375, "p10": -22.188740730285645, "median": 258.03704833984375, "p90": 635.2920654296876, "max": 1309.462646484375, "pos_frac": 0.875, "sample": [-245.1898193359375, 271.68133544921875, 580.9598388671875, 253.7671356201172, 26.902454376220703, 257.5162353515625, 289.004150390625, 254.72604370117188, 216.44187927246094, 109.8335952758789, 334.17938232421875, 263.8804931640625, 324.71466064453125, 12.964149475097656, 349.71575927734375, 813.3045654296875, -48.78142547607422, 615.050537109375, 78.52207946777344, 751.7603149414062, 512.4169921875, 153.26263427734375, 289.5888366699219, 386.7137145996094, 278.5408630371094, 419.476318359375, 1309.462646484375, 530.2628784179688, 20.1275634765625, 641.7979125976562, 287.7205810546875, 252.7956085205078, 64.83038330078125, 258.557861328125, 447.497314453125, 80.51892852783203, 232.68545532226562, -61.29052734375, 643.5728759765625, 448.8782958984375, 417.45159912109375, -140.82363891601562, 40.54875946044922, 444.30078125, -22.52193260192871, 373.27239990234375, 620.1117553710938, -45.2430419921875, 237.83270263671875, 435.5341491699219, -21.411293029785156, 141.15151977539062, -73.30638885498047, 371.41790771484375, 246.87692260742188, 19.27143096923828, 20.188743591308594, 227.8001708984375, 40.69051742553711, 35.23926544189453, 732.0512084960938, 681.6967163085938, 0.19808197021484375, 301.7493896484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000363.npy"}
{"epoch": 0.5330396475770925, "step": 364, "batch_size": 64, "mean": 203.99966430664062, "std": 275.5115051269531, "min": -543.9165649414062, "p10": -66.60769691467284, "median": 199.3627471923828, "p90": 569.2341003417969, "max": 876.2697143554688, "pos_frac": 0.78125, "sample": [444.0520324707031, 261.2136535644531, -309.00274658203125, 60.512611389160156, -543.9165649414062, 574.3067626953125, -293.29132080078125, 343.6381530761719, 236.41213989257812, 876.2697143554688, 186.47377014160156, 693.20654296875, 433.73651123046875, 338.43316650390625, -29.19860076904297, 70.28167724609375, 51.58732604980469, -229.62704467773438, 293.6925354003906, -45.87254333496094, -57.989681243896484, 360.6628723144531, 221.25387573242188, 430.03564453125, 229.49090576171875, 15.888139724731445, -3.8017311096191406, 489.28228759765625, 212.25172424316406, 235.79754638671875, -70.30113220214844, 637.5771484375, 122.1890640258789, 30.705474853515625, 117.23365020751953, 74.84965515136719, 495.5087890625, 336.037109375, 280.8129577636719, -112.55014038085938, 91.94844818115234, 623.767333984375, 674.7472534179688, 110.69691467285156, 320.7896728515625, -50.70166778564453, -38.78998565673828, 342.6737060546875, 557.3978881835938, 359.0711975097656, 42.52647018432617, 177.7305145263672, 9.37763786315918, 178.65628051757812, 838.46826171875, 68.4395751953125, 285.64324951171875, 319.6827697753906, -40.50739288330078, 259.781494140625, 53.862548828125, 118.25738525390625, 510.26324462890625, -215.64639282226562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000364.npy"}
{"epoch": 0.5345080763582967, "step": 365, "batch_size": 64, "mean": 214.514892578125, "std": 251.6396026611328, "min": -189.09976196289062, "p10": -68.78699493408199, "median": 141.17066192626953, "p90": 513.7841918945314, "max": 1166.7293701171875, "pos_frac": 0.84375, "sample": [180.21923828125, 125.8780288696289, 127.63079833984375, 100.47383117675781, 103.44476318359375, 131.65673828125, 335.80035400390625, 83.80643463134766, 223.54074096679688, 82.81472778320312, 311.01898193359375, 465.39178466796875, -86.19572448730469, 215.26083374023438, 52.85829162597656, -28.1666259765625, 100.34547424316406, 736.487548828125, 608.3578491210938, 337.7381896972656, 63.783485412597656, 92.05121612548828, 36.035972595214844, 17.2884521484375, 150.68458557128906, -126.527099609375, 126.10317993164062, 42.54678726196289, 181.01881408691406, 195.3592529296875, -87.74885559082031, -18.13051986694336, 95.1554946899414, 37.90364456176758, 365.26416015625, 647.2937622070312, 399.5030212402344, 70.27053833007812, -108.02398681640625, 472.19366455078125, 308.2601318359375, -189.09976196289062, 230.0558319091797, -13.176076889038086, 54.089271545410156, 310.9432373046875, 269.10931396484375, 253.57421875, 339.8978576660156, -141.6556396484375, 93.73767852783203, 403.400146484375, 40.750267028808594, 271.6829833984375, 315.5347900390625, 316.3325500488281, 1166.7293701171875, 531.6087036132812, 442.06524658203125, -182.7610626220703, 781.66259765625, 723.1099853515625, 124.47831726074219, 418.2345886230469], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000365.npy"}
{"epoch": 0.5359765051395007, "step": 366, "batch_size": 64, "mean": 221.3247528076172, "std": 282.0758972167969, "min": -312.0382080078125, "p10": -140.4030334472656, "median": 188.41178131103516, "p90": 567.8844055175782, "max": 1011.2221069335938, "pos_frac": 0.78125, "sample": [-141.99658203125, 215.5010986328125, 333.1438903808594, 103.493896484375, -288.2828063964844, 34.92516326904297, -91.68659973144531, 64.55380249023438, -51.48876953125, 485.635986328125, 93.57648468017578, -204.52572631835938, 489.0885925292969, 648.572509765625, 186.85459899902344, -69.58491516113281, 314.1570739746094, 16.30996322631836, 110.08992767333984, -83.16545104980469, 344.5525817871094, 544.4740600585938, 402.46893310546875, 88.08477783203125, 527.9356689453125, 1011.2221069335938, 361.80377197265625, 110.80419158935547, 528.2357177734375, 223.26951599121094, 189.96896362304688, -46.071197509765625, -156.76992797851562, 573.3897094726562, 353.2142333984375, 409.64031982421875, 850.3015747070312, 350.8778381347656, 180.5595703125, 129.54368591308594, 214.71795654296875, 147.27886962890625, 64.01348876953125, -144.86273193359375, 540.2255859375, -150.16867065429688, 285.58416748046875, 24.372058868408203, 555.0386962890625, 575.1956787109375, 356.376953125, 312.84136962890625, 654.4871826171875, 322.6505432128906, 833.013916015625, 205.81109619140625, -81.92713928222656, 47.40501403808594, -136.68475341796875, 130.01882934570312, 105.60863494873047, 163.90310668945312, 309.2449035644531, -312.0382080078125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000366.npy"}
{"epoch": 0.5374449339207048, "step": 367, "batch_size": 64, "mean": 185.32125854492188, "std": 232.56837463378906, "min": -483.9673767089844, "p10": -57.85321044921874, "median": 135.4007568359375, "p90": 500.54020996093755, "max": 816.1209716796875, "pos_frac": 0.78125, "sample": [111.12901306152344, 153.25006103515625, 464.33917236328125, 42.1469612121582, 599.5347900390625, 7.417173385620117, 368.71478271484375, 106.59024047851562, 506.86328125, 28.732215881347656, 29.571884155273438, -3.2618541717529297, -82.4066162109375, 526.6827392578125, 485.786376953125, 302.2537841796875, 434.30340576171875, 123.34230041503906, 249.6428985595703, -59.571861267089844, 276.8971862792969, 421.8205261230469, -53.84302520751953, -22.05896759033203, 319.5702209472656, 289.9657897949219, 816.1209716796875, -1.9658336639404297, 543.2395629882812, 81.84033203125, 411.135986328125, 171.6451873779297, -0.7887840270996094, 334.5001220703125, 60.98664855957031, 220.1246337890625, 333.32611083984375, 279.3527526855469, 406.6792907714844, 96.41531372070312, 201.91722106933594, -222.50631713867188, -213.5948028564453, -42.942657470703125, -61.76806640625, 33.84800720214844, 71.78170776367188, 386.3418884277344, 127.08811950683594, 89.96331787109375, -159.09982299804688, -483.9673767089844, 109.4235610961914, 120.87849426269531, 240.35906982421875, 352.10455322265625, 435.5814208984375, 315.649169921875, 509.73199462890625, 23.844894409179688, 143.71339416503906, -20.539207458496094, 510.98187255859375, 11.744842529296875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000367.npy"}
{"epoch": 0.5389133627019089, "step": 368, "batch_size": 64, "mean": 286.89935302734375, "std": 218.54974365234375, "min": -179.53814697265625, "p10": -5.869413375854477, "median": 314.87330627441406, "p90": 560.6645690917969, "max": 1025.673583984375, "pos_frac": 0.890625, "sample": [311.6931457519531, 349.4214172363281, 183.58692932128906, 507.4689025878906, 350.4331970214844, 593.6444702148438, 172.39154052734375, 51.44059753417969, 574.1102294921875, 113.23250579833984, -35.47188949584961, 387.42822265625, 472.86578369140625, 48.4956169128418, 278.4524230957031, 373.6201477050781, 323.8178405761719, 320.00177001953125, -30.733558654785156, 392.5878601074219, 226.95065307617188, 293.3478698730469, 571.3525390625, 333.3941955566406, 366.0979309082031, 99.0106201171875, 373.5841064453125, -15.030445098876953, 20.47750473022461, 8.735607147216797, 220.82655334472656, 398.08349609375, 386.78729248046875, 318.053466796875, 396.39251708984375, 553.0391235351562, 388.26995849609375, 23.768404006958008, 188.01454162597656, -16.201805114746094, 344.3726806640625, 537.7549438476562, 415.8512268066406, 563.9326171875, 168.748291015625, 309.6473388671875, 661.751953125, 21.741775512695312, -179.53814697265625, 126.6080322265625, 104.64037322998047, 530.8881225585938, 463.17266845703125, 473.2435607910156, 360.4994201660156, 38.86631774902344, 205.5562744140625, 1025.673583984375, 283.64300537109375, 248.61196899414062, 300.12945556640625, -12.128707885742188, -83.61518859863281, 578.0665893554688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000368.npy"}
{"epoch": 0.540381791483113, "step": 369, "batch_size": 64, "mean": 151.08335876464844, "std": 273.7601318359375, "min": -610.7976684570312, "p10": -146.59796295166015, "median": 147.53389739990234, "p90": 479.2521728515627, "max": 852.101318359375, "pos_frac": 0.75, "sample": [-194.3966064453125, -126.7971420288086, 852.101318359375, -26.1492977142334, -229.92262268066406, 41.450225830078125, -48.220542907714844, 302.9146728515625, 5.515377044677734, 661.717041015625, 225.544921875, -72.52566528320312, 159.10641479492188, 328.193603515625, 293.09564208984375, 825.6795654296875, 503.8581237792969, 169.46600341796875, 78.56929016113281, 398.5328674316406, 286.59796142578125, 111.96202087402344, 104.02761840820312, 250.7857666015625, 231.92117309570312, 241.97467041015625, -495.76177978515625, 255.17666625976562, 134.86495971679688, 123.63042449951172, 254.37098693847656, 346.49102783203125, 29.64965057373047, 155.34608459472656, 170.21737670898438, 7.714744567871094, 335.4290466308594, 116.28252410888672, -148.9453887939453, 338.25579833984375, 498.5268249511719, -183.1880340576172, 686.7080688476562, -5.4881134033203125, 434.2779846191406, 197.09539794921875, 365.48541259765625, -485.3411560058594, 4.932243347167969, 567.4826049804688, 191.01556396484375, 114.69636535644531, -610.7976684570312, 80.68047332763672, 222.01519775390625, -141.12063598632812, -50.38038635253906, 57.872657775878906, 139.72171020507812, 327.7177429199219, 197.54019165039062, -9.695014953613281, 119.40185546875, -47.549095153808594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000369.npy"}
{"epoch": 0.5418502202643172, "step": 370, "batch_size": 64, "mean": 146.07177734375, "std": 283.50909423828125, "min": -380.3669738769531, "p10": -227.42162933349607, "median": 141.01141357421875, "p90": 510.4307128906251, "max": 909.771728515625, "pos_frac": 0.703125, "sample": [97.3809585571289, 315.4187927246094, 228.1044158935547, -372.8525390625, 724.1023559570312, 316.02166748046875, 129.184814453125, 278.64129638671875, 43.36735534667969, -125.8652114868164, 452.7470703125, 536.1982421875, 42.57025146484375, 134.82089233398438, -87.50205993652344, 490.778076171875, 197.61764526367188, 147.20193481445312, 224.03839111328125, 909.771728515625, 795.5718994140625, 179.10333251953125, 346.1289978027344, -151.93426513671875, 266.2304382324219, -119.02994537353516, -4.89154052734375, 292.09405517578125, -215.73049926757812, 166.2529296875, 474.8985900878906, 125.18353271484375, -21.370101928710938, 272.3321533203125, -23.49256134033203, 387.177978515625, 235.81936645507812, -186.74139404296875, -347.10302734375, 20.530370712280273, 199.96597290039062, 225.16314697265625, -232.43211364746094, 66.6151123046875, 452.32196044921875, 98.36663818359375, 176.90228271484375, 299.04486083984375, 223.7720489501953, -380.3669738769531, -89.68385314941406, -48.5440673828125, 116.8121337890625, 119.65591430664062, -327.0948791503906, -289.2993469238281, 12.028060913085938, -283.5494079589844, -143.72039794921875, 518.853271484375, 607.296875, 241.98822021484375, 31.18121337890625, 580.5410766601562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000370.npy"}
{"epoch": 0.5433186490455213, "step": 371, "batch_size": 64, "mean": 212.38922119140625, "std": 302.83489990234375, "min": -508.69110107421875, "p10": -115.76660537719727, "median": 158.53269958496094, "p90": 623.1354614257816, "max": 962.1849975585938, "pos_frac": 0.734375, "sample": [386.0873718261719, 260.5303039550781, 474.2209167480469, 67.67891693115234, -508.69110107421875, 133.61251831054688, 513.3134765625, 187.28543090820312, 803.4796752929688, 291.9500732421875, 477.6279296875, -59.04230499267578, 371.15625, 198.56448364257812, -301.3827819824219, 98.41560363769531, -4.127410888671875, 162.5730743408203, 500.226318359375, -26.994384765625, -55.24412536621094, -249.72898864746094, 658.9542236328125, -109.74844360351562, -194.99212646484375, 141.2025909423828, 942.1770629882812, 717.8143920898438, -194.93467712402344, 40.279083251953125, 697.3329467773438, -24.319198608398438, 371.6253662109375, 287.33099365234375, -118.34581756591797, -37.658477783203125, 539.558349609375, -121.76981353759766, 262.373291015625, 438.61553955078125, 22.369199752807617, 111.29730987548828, 806.6107177734375, 39.659934997558594, 16.116466522216797, 259.83807373046875, 312.0852355957031, 392.1960754394531, 475.62835693359375, -25.629318237304688, 450.47308349609375, 109.39617156982422, 96.68220520019531, 356.17926025390625, 154.49232482910156, 177.29458618164062, 117.16288757324219, 72.29226684570312, -47.40223693847656, 489.7659912109375, 314.4300231933594, 962.1849975585938, -106.83238220214844, 19.613555908203125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000371.npy"}
{"epoch": 0.5447870778267254, "step": 372, "batch_size": 64, "mean": 252.9879608154297, "std": 342.2443542480469, "min": -370.6527099609375, "p10": -129.67422943115233, "median": 193.677978515625, "p90": 733.2213134765625, "max": 1538.96533203125, "pos_frac": 0.765625, "sample": [319.2071533203125, 243.6520538330078, 706.0411987304688, -304.88531494140625, 45.58308410644531, 733.7640380859375, 519.2138061523438, 215.279052734375, 294.82318115234375, -17.559555053710938, 1538.96533203125, 285.1524658203125, -12.42303466796875, 163.54922485351562, -52.792022705078125, 20.30998992919922, 98.07861328125, 79.28411102294922, 757.4049072265625, -193.2516326904297, 105.75241088867188, 234.95846557617188, 767.8304443359375, -101.5607681274414, 484.7006530761719, 131.80592346191406, 113.6801986694336, 216.17462158203125, 184.83181762695312, 149.64059448242188, 164.43618774414062, 334.67138671875, 327.3236083984375, -141.16665649414062, 168.3011474609375, -160.48651123046875, 354.1983947753906, 731.9549560546875, 58.49988555908203, 1034.8824462890625, 590.9509887695312, 359.1621398925781, 186.31301879882812, -198.23849487304688, 468.33782958984375, 507.2619323730469, 726.4539184570312, 262.00042724609375, -11.577896118164062, 349.0947570800781, 95.17135620117188, 274.3427429199219, 121.35832214355469, -62.361167907714844, 767.4404296875, 169.9091033935547, -49.423309326171875, 201.83087158203125, -102.85856628417969, 855.6041870117188, -218.3343048095703, -370.6527099609375, 468.57550048828125, 201.04293823242188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000372.npy"}
{"epoch": 0.5462555066079295, "step": 373, "batch_size": 64, "mean": 236.47280883789062, "std": 288.06451416015625, "min": -531.0093994140625, "p10": -58.742356872558595, "median": 209.44740295410156, "p90": 620.7612182617188, "max": 839.1682739257812, "pos_frac": 0.796875, "sample": [127.33478546142578, -170.0868682861328, -268.8779296875, 269.92144775390625, 80.66424560546875, 839.1682739257812, 466.7717590332031, -18.064712524414062, 226.93161010742188, 138.0498046875, 620.6068115234375, 182.41990661621094, -58.881805419921875, 258.4645690917969, 404.1795959472656, 24.359994888305664, 25.43777084350586, 816.3776245117188, 320.4146728515625, 32.67839813232422, -376.3194580078125, -28.504058837890625, 243.47918701171875, -531.0093994140625, 117.17779541015625, 54.338050842285156, 149.5743408203125, 148.76681518554688, 633.470947265625, 490.9686279296875, 201.82176208496094, 430.0096435546875, 343.6723937988281, 514.7091674804688, 425.6900634765625, -76.28814697265625, -197.33197021484375, 592.4307250976562, 259.74224853515625, -9.409774780273438, 812.4713745117188, 104.1065902709961, 423.6318359375, 191.33868408203125, 371.8034362792969, 489.64453125, 313.0164794921875, 217.0730438232422, 587.7658081054688, 734.3533935546875, 141.713134765625, -18.530426025390625, 151.5858154296875, 223.45050048828125, -18.407608032226562, 273.13885498046875, 132.5856475830078, 121.93710327148438, 785.0925903320312, 393.75177001953125, 424.2934875488281, 620.827392578125, -58.41697692871094, 11.174308776855469], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000373.npy"}
{"epoch": 0.5477239353891337, "step": 374, "batch_size": 64, "mean": 213.04612731933594, "std": 346.0211486816406, "min": -611.6504516601562, "p10": -120.31813049316403, "median": 140.60107421875, "p90": 551.866259765625, "max": 1236.6290283203125, "pos_frac": 0.71875, "sample": [-150.5777587890625, 142.88772583007812, -8.605422973632812, 1236.6290283203125, 301.3392333984375, 1101.0513916015625, 262.8543701171875, 605.4901123046875, 450.4188232421875, -44.036170959472656, 138.31442260742188, 323.0460205078125, 29.733245849609375, 178.78311157226562, 61.979217529296875, -45.386817932128906, 505.13360595703125, -8.250701904296875, 545.7052001953125, 337.90338134765625, -27.605255126953125, -30.964237213134766, 408.1702880859375, 52.02272033691406, -95.25032043457031, 912.798828125, -10.272634506225586, 115.93925476074219, -174.84642028808594, 338.81646728515625, 252.65908813476562, 288.3068542480469, -59.64935302734375, 982.6369018554688, -510.602783203125, 75.65818786621094, -5.9199981689453125, 554.5067138671875, 93.61920928955078, -36.9796142578125, 156.65077209472656, 72.8725357055664, 81.35309600830078, 99.0028076171875, 282.65997314453125, 164.74441528320312, -149.1544952392578, 469.1519470214844, 359.3336181640625, 422.33709716796875, 104.45283508300781, 545.5850219726562, -131.0614776611328, 429.8280029296875, 278.6512756347656, 17.191383361816406, -188.96487426757812, 244.05047607421875, 4.5701446533203125, 105.52397155761719, 1152.0704345703125, -611.6504516601562, 166.46160888671875, 471.8360900878906], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000374.npy"}
{"epoch": 0.5491923641703378, "step": 375, "batch_size": 64, "mean": 214.07928466796875, "std": 313.1701354980469, "min": -532.3634643554688, "p10": -156.70686492919916, "median": 173.97953033447266, "p90": 566.3610656738282, "max": 1146.4688720703125, "pos_frac": 0.765625, "sample": [732.8019409179688, 336.5022277832031, -19.4268798828125, 308.4554748535156, -72.78767395019531, -186.2640380859375, 507.95892333984375, 222.81158447265625, 304.3125, 31.567089080810547, -16.289438247680664, -87.74012756347656, 24.533958435058594, -532.3634643554688, 152.72222900390625, 944.7613525390625, 354.45965576171875, 46.40391540527344, 338.324951171875, 59.95153045654297, 28.502593994140625, 247.72976684570312, -464.610595703125, 38.123634338378906, 356.4373779296875, -233.65647888183594, 570.1716918945312, 98.15198516845703, 164.141845703125, 71.37091827392578, -297.0049743652344, 95.20555114746094, 558.00537109375, 703.4061889648438, 37.68266677856445, 245.21493530273438, 328.7718811035156, 493.19903564453125, 485.9163818359375, 641.552978515625, 411.0038146972656, 183.8172149658203, 124.5777816772461, -273.76812744140625, 442.7994384765625, 478.4529113769531, 314.3433532714844, -235.50552368164062, 1146.4688720703125, 523.3040161132812, 310.9952087402344, -30.51547622680664, -2.48565673828125, 385.54302978515625, 125.30758666992188, 504.4019775390625, 84.15803527832031, 569.9420776367188, -65.43502044677734, 136.99832153320312, 462.4881591796875, 467.3759460449219, 88.63226318359375, -70.83509826660156], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000375.npy"}
{"epoch": 0.5506607929515418, "step": 376, "batch_size": 64, "mean": 260.70465087890625, "std": 318.838623046875, "min": -510.0660400390625, "p10": -150.60397491455078, "median": 234.13800048828125, "p90": 660.2748474121094, "max": 891.997802734375, "pos_frac": 0.796875, "sample": [-117.84281158447266, 751.7879028320312, 234.04736328125, 54.80537033081055, 770.2265014648438, 18.234848022460938, 487.76177978515625, -291.66741943359375, 46.72996520996094, -56.53321838378906, 459.8074951171875, 0.37986183166503906, 602.1449584960938, 115.88575744628906, -208.12689208984375, 472.65521240234375, 601.1766357421875, -197.10110473632812, -105.96600341796875, 237.32469177246094, 89.19248962402344, 8.507917404174805, -153.495361328125, 842.8666381835938, 777.0648803710938, 147.54031372070312, 372.2142333984375, 296.21099853515625, 517.9632568359375, -143.85740661621094, 640.9762573242188, 563.05224609375, 135.4536895751953, 658.241455078125, 147.83389282226562, 514.54248046875, 616.9083862304688, 438.0726623535156, 477.34808349609375, 275.63629150390625, 249.1291961669922, 503.9288024902344, -162.72691345214844, 234.2286376953125, 551.946044921875, 107.33552551269531, 891.997802734375, -12.863365173339844, -190.53321838378906, 89.69617462158203, 30.098236083984375, 167.19383239746094, 624.4688720703125, 661.1463012695312, 264.156494140625, 700.300048828125, 140.44485473632812, 256.59375, 227.40956115722656, 163.80154418945312, -60.752342224121094, 71.01951599121094, 589.1392822265625, -510.0660400390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000376.npy"}
{"epoch": 0.5521292217327459, "step": 377, "batch_size": 64, "mean": 246.80404663085938, "std": 297.56488037109375, "min": -398.4268493652344, "p10": -70.39076614379883, "median": 203.16128540039062, "p90": 647.0151123046876, "max": 912.493896484375, "pos_frac": 0.765625, "sample": [347.49896240234375, 912.493896484375, 92.09414672851562, -70.99786376953125, 277.2964782714844, -100.80001831054688, 253.58340454101562, -176.1065673828125, 278.13116455078125, 53.55963897705078, 785.1176147460938, 684.57421875, -35.84514617919922, 900.08642578125, 15.849020004272461, 187.66290283203125, 206.5972137451172, 488.8854064941406, 225.4796142578125, -27.992658615112305, 206.52349853515625, 583.9424438476562, 507.3681335449219, -21.33352279663086, 589.8363037109375, 896.4454956054688, 459.9632263183594, -218.68553161621094, 588.3773803710938, 183.46261596679688, 344.4222412109375, 196.75428771972656, 49.59197235107422, 188.93893432617188, 40.279903411865234, 538.5488891601562, 108.65278625488281, 321.78619384765625, 482.95294189453125, 626.3740844726562, 248.86927795410156, -42.459564208984375, -398.4268493652344, 141.57891845703125, -36.669097900390625, 72.52513885498047, 433.25714111328125, 399.032470703125, -316.42034912109375, 386.9297790527344, 83.70011901855469, 655.8612670898438, -57.541748046875, -45.05438232421875, -87.45056915283203, 463.4706726074219, 129.417724609375, 708.2830810546875, 374.482421875, 5.806694030761719, 411.70928955078125, 199.799072265625, 162.363037109375, -68.97420501708984], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000377.npy"}
{"epoch": 0.55359765051395, "step": 378, "batch_size": 64, "mean": 256.518310546875, "std": 280.1162109375, "min": -485.8323974609375, "p10": -37.66330413818359, "median": 231.22791290283203, "p90": 609.7628051757813, "max": 1065.6484375, "pos_frac": 0.828125, "sample": [663.069091796875, 248.51417541503906, 599.1519775390625, 570.72705078125, 455.6834411621094, 135.22340393066406, 234.16537475585938, 423.494873046875, 95.27099609375, 160.37454223632812, 276.2821044921875, 410.48785400390625, 618.7763671875, 190.9279022216797, 160.53656005859375, -31.232192993164062, 178.459716796875, 713.6201171875, 193.23428344726562, 298.27197265625, 229.97772216796875, -104.88959503173828, -0.4470176696777344, 34.11566925048828, 254.66848754882812, 158.54013061523438, -79.32777404785156, 542.8484497070312, 29.708887100219727, 476.1365661621094, 615.900634765625, 495.6871643066406, 209.32565307617188, 157.263916015625, -27.22203826904297, 474.2053527832031, 146.0811767578125, 1065.6484375, 35.34278869628906, -309.649169921875, 284.97186279296875, -23.575035095214844, 232.4781036376953, 595.1905517578125, 224.9063720703125, -40.41949462890625, 449.5254211425781, 288.80816650390625, 478.7243957519531, 349.33599853515625, 79.23573303222656, 331.21783447265625, 459.58502197265625, 298.3768310546875, -485.8323974609375, 129.1421661376953, 225.66244506835938, 501.4794006347656, -119.8004379272461, 773.7062377929688, 55.07261657714844, -418.35931396484375, 134.47593688964844, 614.310302734375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000378.npy"}
{"epoch": 0.5550660792951542, "step": 379, "batch_size": 64, "mean": 248.47398376464844, "std": 320.48370361328125, "min": -513.1692504882812, "p10": -105.84306488037107, "median": 227.7142333984375, "p90": 586.3983154296875, "max": 1382.24560546875, "pos_frac": 0.8125, "sample": [297.8490295410156, 479.293212890625, -79.91696166992188, 215.7774658203125, -112.99437713623047, 121.24286651611328, 224.153076171875, 585.8343505859375, 268.3016052246094, 88.55126953125, 442.276123046875, 811.4376831054688, 11.177658081054688, 544.4639892578125, 337.18634033203125, 201.10134887695312, -513.1692504882812, 932.7852172851562, 38.91374206542969, 324.182861328125, 507.5833740234375, 386.0173645019531, 297.0601806640625, 8.902149200439453, 112.767578125, -84.80661010742188, -362.5533752441406, 151.57376098632812, 26.997901916503906, 352.85205078125, 730.5045166015625, 355.03607177734375, -271.77691650390625, 86.8153076171875, 466.7105407714844, 766.2265014648438, 340.0589599609375, 360.37408447265625, 568.4425048828125, 112.63397216796875, -89.15666961669922, 586.6400146484375, 231.275390625, -2.615001678466797, 376.51605224609375, -158.3863525390625, 173.95010375976562, 463.2351379394531, 260.2413024902344, 327.50482177734375, -83.65937805175781, 474.02972412109375, 610.5845336914062, 180.9342803955078, 1382.24560546875, 215.68649291992188, 49.22728729248047, 449.0320129394531, 68.42354583740234, -205.42703247070312, 16.710403442382812, 571.3326416015625, -145.89547729492188, 20.038223266601562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000379.npy"}
{"epoch": 0.5565345080763583, "step": 380, "batch_size": 64, "mean": 285.31103515625, "std": 326.3614196777344, "min": -742.4903564453125, "p10": -77.31346054077147, "median": 289.3478546142578, "p90": 704.3672607421875, "max": 1155.6123046875, "pos_frac": 0.859375, "sample": [137.11697387695312, 173.9821319580078, 76.72773742675781, 1155.6123046875, 20.16375160217285, 102.00176239013672, 157.95091247558594, 371.81768798828125, 381.58758544921875, 368.71893310546875, 178.3133544921875, 382.189697265625, -49.717613220214844, 610.9420776367188, 894.493408203125, 509.15594482421875, 393.19464111328125, 512.5116577148438, 474.48858642578125, 169.85540771484375, 378.7925109863281, -264.8727722167969, 227.11895751953125, 229.64797973632812, 74.23373413085938, -79.5890884399414, 458.2054443359375, 365.02227783203125, 78.26055908203125, 704.1781616210938, 355.3323974609375, 647.4386596679688, 560.3035888671875, 510.217529296875, 812.5908203125, 408.1636657714844, 180.6387481689453, -72.003662109375, 308.0541687011719, 70.6024398803711, 856.24755859375, 63.50966262817383, -272.06988525390625, 181.71102905273438, -230.00604248046875, 211.24298095703125, 257.9271240234375, 704.4483032226562, 61.834999084472656, -322.02691650390625, 419.53118896484375, 65.70785522460938, 797.0576171875, 370.74053955078125, 270.64154052734375, -89.38758087158203, 470.6136474609375, 378.2601318359375, 173.69522094726562, 11.90911865234375, -742.4903564453125, 386.26556396484375, 854.293701171875, 406.80548095703125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000380.npy"}
{"epoch": 0.5580029368575624, "step": 381, "batch_size": 64, "mean": 202.26556396484375, "std": 332.75286865234375, "min": -734.7512817382812, "p10": -173.85981903076168, "median": 188.94288635253906, "p90": 575.8732421875001, "max": 1037.357421875, "pos_frac": 0.734375, "sample": [231.89599609375, 7.036102294921875, 123.45669555664062, 568.3253173828125, 546.5060424804688, 641.1802978515625, 505.24212646484375, -17.744552612304688, 154.18746948242188, 579.7444458007812, 513.4678955078125, 118.08659362792969, 508.12591552734375, 287.13006591796875, 1037.357421875, -182.70538330078125, 437.2630615234375, 400.0556640625, 660.36865234375, 218.80776977539062, 936.7538452148438, 578.4051513671875, -193.98532104492188, -119.99147033691406, -113.1865005493164, 178.00628662109375, 569.9654541015625, 85.42939758300781, -206.73873901367188, -59.19904327392578, 111.66850280761719, 273.4542236328125, 35.13108825683594, -47.883358001708984, 225.0414276123047, -18.71682357788086, 244.48245239257812, 93.0525894165039, 199.87948608398438, -78.68026733398438, 51.55467987060547, 62.788543701171875, 983.3466186523438, 234.44403076171875, 374.9428405761719, 395.8846130371094, 46.8975830078125, -213.19094848632812, 560.00244140625, -153.2201690673828, -27.61783790588379, 396.1361083984375, -94.76321411132812, 241.5759735107422, 67.23064422607422, -208.6966552734375, 123.62744140625, 326.58349609375, 433.4075927734375, 149.27293395996094, 205.65919494628906, -734.7512817382812, -671.170654296875, 364.375732421875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000381.npy"}
{"epoch": 0.5594713656387665, "step": 382, "batch_size": 64, "mean": 238.81100463867188, "std": 258.7862548828125, "min": -282.0169677734375, "p10": -69.88626708984374, "median": 202.76862335205078, "p90": 620.7418762207034, "max": 877.938232421875, "pos_frac": 0.78125, "sample": [877.938232421875, 646.4571533203125, 159.688232421875, 139.14663696289062, 173.6429901123047, 784.577880859375, -105.25066375732422, 104.74190521240234, 325.1597900390625, -105.85533905029297, 169.9374237060547, 248.63470458984375, 383.54571533203125, 529.4429321289062, 434.97412109375, 555.4036865234375, 209.8013153076172, 109.27680969238281, 387.1709899902344, 105.32174682617188, 414.7689514160156, -102.05674743652344, -13.091659545898438, 277.7183532714844, 721.341552734375, -158.06704711914062, 302.03558349609375, -57.534332275390625, 553.2439575195312, 257.34832763671875, 672.720703125, -66.05806732177734, -7.624217987060547, 300.22613525390625, 357.2699279785156, -282.0169677734375, -174.41543579101562, 444.986572265625, -6.7539215087890625, 195.73593139648438, 166.42303466796875, 218.75930786132812, 410.5821533203125, 53.058189392089844, 257.93597412109375, 136.4835662841797, 139.0358428955078, 240.69876098632812, 211.17259216308594, 152.57923889160156, 689.396484375, 560.7395629882812, 259.087158203125, -71.52692413330078, 102.89000701904297, 681.1547241210938, 70.56159973144531, -55.39987564086914, 489.81036376953125, 547.4991455078125, 89.02495574951172, 89.2394027709961, -35.08552551269531, 116.25167846679688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000382.npy"}
{"epoch": 0.5609397944199707, "step": 383, "batch_size": 64, "mean": 237.33407592773438, "std": 329.57427978515625, "min": -865.0255126953125, "p10": -97.76803665161133, "median": 242.7010040283203, "p90": 668.5015014648437, "max": 937.1829833984375, "pos_frac": 0.734375, "sample": [-51.633018493652344, -92.96844482421875, 166.29751586914062, -260.2835998535156, 526.7637939453125, -245.3559112548828, -96.71117401123047, 580.7591552734375, 312.600830078125, -35.1097526550293, 299.75872802734375, 244.99249267578125, 118.28179931640625, -24.547805786132812, -335.213134765625, 281.34918212890625, 937.1829833984375, 53.711647033691406, 208.60336303710938, 286.2789001464844, 808.7369384765625, -115.3979721069336, -15.240901947021484, 655.7311401367188, 268.48992919921875, -53.198890686035156, 753.0074462890625, 31.280351638793945, -67.05877685546875, 340.45550537109375, 934.8389282226562, 261.054931640625, 662.64404296875, 227.5565185546875, 339.74530029296875, 363.74139404296875, 468.28631591796875, 61.13581848144531, 180.0845184326172, 279.0572814941406, -60.23748779296875, -49.029571533203125, 240.40951538085938, 107.43721008300781, 338.5984191894531, 744.9913940429688, -98.22097778320312, 526.5421752929688, 40.14234924316406, 326.8757019042969, 792.7811279296875, 186.04843139648438, -113.32295227050781, 304.7570495605469, 397.01861572265625, 11.153327941894531, 671.0118408203125, 599.2362060546875, 212.3837890625, 572.2315063476562, -865.0255126953125, 136.2396240234375, 427.3333740234375, 480.3179626464844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000383.npy"}
{"epoch": 0.5624082232011748, "step": 384, "batch_size": 64, "mean": 191.76878356933594, "std": 221.3711700439453, "min": -314.6159362792969, "p10": -81.28263931274414, "median": 195.32923889160156, "p90": 438.63887634277353, "max": 801.596435546875, "pos_frac": 0.8125, "sample": [215.09469604492188, 22.684478759765625, 801.596435546875, -114.23741149902344, 6.3837890625, 91.27836608886719, -165.43382263183594, -30.147933959960938, 739.224609375, 157.5093994140625, 475.3611145019531, 409.92962646484375, 203.6254119873047, -304.3589172363281, 360.3514709472656, 94.74183654785156, -73.68672180175781, 86.112548828125, 348.86810302734375, 369.12420654296875, 237.906982421875, 186.43606567382812, 112.11845397949219, 334.0179443359375, 303.3480529785156, 219.40513610839844, 158.38397216796875, -170.11920166015625, -314.6159362792969, 120.5921859741211, 332.0909423828125, 127.55763244628906, 48.43318176269531, 179.21881103515625, 187.03306579589844, -61.420936584472656, 396.99114990234375, 295.0014953613281, 384.89837646484375, 65.35458374023438, 1.8703422546386719, 271.34796142578125, 329.6910705566406, 489.5267333984375, 450.9428405761719, 298.570556640625, 274.130126953125, 251.0550537109375, 255.23501586914062, -84.53803253173828, 228.073486328125, -22.07341766357422, 82.93025970458984, 456.41802978515625, 109.40570068359375, -99.49968719482422, -54.895198822021484, 405.8758239746094, 400.4131774902344, 75.06565856933594, 605.1109008789062, 347.3212585449219, 81.23880004882812, 283.33251953125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000384.npy"}
{"epoch": 0.5638766519823789, "step": 385, "batch_size": 64, "mean": 226.03790283203125, "std": 224.1696014404297, "min": -336.81097412109375, "p10": -13.391872406005849, "median": 203.18466186523438, "p90": 538.2667449951174, "max": 709.0056762695312, "pos_frac": 0.859375, "sample": [-0.72869873046875, -18.42211151123047, 75.94353485107422, 252.6343994140625, 415.0740661621094, 301.26641845703125, 433.35986328125, 151.55191040039062, -94.93241119384766, 466.2557373046875, 159.2673797607422, 53.25968933105469, 119.3442153930664, 201.54632568359375, 399.5260314941406, 118.01671600341797, 623.8505859375, 344.06719970703125, 83.17659759521484, 96.12171173095703, 362.71917724609375, 138.24530029296875, -157.6382598876953, 290.1209411621094, -209.21145629882812, 223.57232666015625, 163.5220489501953, 590.47216796875, 76.78398132324219, 77.493896484375, 331.59466552734375, 183.38633728027344, 152.8211669921875, 709.0056762695312, 699.7369384765625, 197.39344787597656, 167.96524047851562, 20.674209594726562, 556.3303833007812, 271.21337890625, -1.6546478271484375, 90.17701721191406, 460.7945861816406, 217.5906219482422, 239.73321533203125, 595.10498046875, 381.28240966796875, -104.958251953125, 496.1182556152344, 302.8909912109375, -336.81097412109375, 62.57814025878906, 165.72305297851562, -243.68731689453125, 245.94091796875, 306.5231018066406, 402.0050354003906, 142.39224243164062, 281.1251525878906, 698.5336303710938, 143.80091857910156, 383.7784423828125, 204.822998046875, 306.2402648925781], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000385.npy"}
{"epoch": 0.5653450807635829, "step": 386, "batch_size": 64, "mean": 255.63485717773438, "std": 307.5469970703125, "min": -396.03118896484375, "p10": -62.46279487609863, "median": 189.84271240234375, "p90": 691.6408935546875, "max": 983.1959838867188, "pos_frac": 0.84375, "sample": [-46.04484939575195, 397.4096984863281, -34.66124725341797, 407.5762634277344, 694.4100341796875, 180.4889678955078, 133.99960327148438, 431.0580749511719, 147.34225463867188, 439.45074462890625, 362.1885070800781, 957.6782836914062, -277.74212646484375, 3.863309860229492, 685.1795654296875, 333.66265869140625, 296.9977722167969, 381.29632568359375, 44.67793273925781, 101.89600372314453, 155.12405395507812, 305.33612060546875, 192.5794677734375, 192.91885375976562, 326.79534912109375, 273.5466613769531, 61.88678741455078, -60.12838363647461, 983.1959838867188, 625.238525390625, -63.4632568359375, 153.69802856445312, 111.02908325195312, -189.24310302734375, 138.9640350341797, 815.621337890625, 977.6901245117188, 343.1566162109375, 747.4774169921875, 167.6893768310547, 207.49227905273438, 632.3150634765625, -278.162109375, 165.9786834716797, 43.506561279296875, 157.1536865234375, 183.4550323486328, 187.10595703125, -157.9666290283203, 406.4811706542969, -396.03118896484375, 825.7384033203125, 76.9419937133789, 62.113426208496094, 68.0382080078125, 144.6688995361328, 495.66107177734375, 344.43988037109375, -379.09466552734375, 354.4255065917969, 569.9104614257812, 268.0577087402344, 315.5722351074219, 162.98895263671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000386.npy"}
{"epoch": 0.566813509544787, "step": 387, "batch_size": 64, "mean": 208.208984375, "std": 239.58071899414062, "min": -280.9903564453125, "p10": -67.73522377014159, "median": 188.62154388427734, "p90": 491.0324829101563, "max": 855.73046875, "pos_frac": 0.796875, "sample": [309.6308288574219, -26.85643768310547, 105.17674255371094, 137.11924743652344, 260.75213623046875, 855.73046875, 144.68740844726562, 80.10285949707031, 323.0945739746094, 372.53387451171875, -272.7660217285156, 288.5849914550781, 112.72805786132812, 51.2271842956543, -280.9903564453125, 249.1669921875, 386.25067138671875, 571.29248046875, -126.16931915283203, 193.64944458007812, 751.2783813476562, 77.28839111328125, 242.08041381835938, 698.839599609375, 432.8212890625, 207.23648071289062, 92.8737564086914, 177.56350708007812, 106.5713119506836, 155.733642578125, 35.989601135253906, 183.59364318847656, 475.0273742675781, 182.15492248535156, 497.8918151855469, 171.9468994140625, 407.888671875, 87.31953430175781, 308.9553527832031, 373.2250671386719, 44.39702606201172, -12.49451732635498, 624.025634765625, 411.1012268066406, 319.0412902832031, 115.9947509765625, 324.7966613769531, -48.72970199584961, 523.4696655273438, -75.88044738769531, 392.74609375, -206.54257202148438, 100.17931365966797, 328.13055419921875, -189.81558227539062, 219.1117706298828, -10.6929931640625, 385.48291015625, -40.197265625, -27.763534545898438, 280.6649169921875, -233.29501342773438, 270.91485595703125, 429.50494384765625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000387.npy"}
{"epoch": 0.5682819383259912, "step": 388, "batch_size": 64, "mean": 284.5751037597656, "std": 320.5782470703125, "min": -512.4014892578125, "p10": -54.594956970214824, "median": 241.24929809570312, "p90": 660.0697082519533, "max": 1399.8865966796875, "pos_frac": 0.859375, "sample": [1399.8865966796875, 528.29541015625, -87.20310974121094, 576.982666015625, 504.6321716308594, 792.29931640625, 191.37229919433594, -235.72250366210938, 164.20254516601562, 314.07855224609375, 206.46145629882812, 917.8834228515625, 488.5679931640625, 248.1576385498047, -512.4014892578125, 42.29895782470703, 371.427978515625, 194.17599487304688, 493.34356689453125, 30.500099182128906, 138.6138916015625, 680.3287353515625, 181.52444458007812, -90.27213287353516, 287.1270751953125, 266.0971984863281, 83.7738265991211, -0.1522979736328125, 113.11264038085938, 340.00543212890625, 88.60222625732422, 140.1887664794922, 307.0911865234375, 86.60989379882812, 486.71142578125, 184.2117919921875, 120.75566101074219, 37.867164611816406, 546.3635864257812, 97.82828521728516, 489.0520324707031, 69.25831604003906, 382.2873229980469, 174.34237670898438, 208.10028076171875, 65.26834869384766, 374.8919677734375, 309.5664367675781, -205.78121948242188, 378.9822998046875, 335.3297424316406, 1246.399169921875, -121.74763488769531, 399.3768310546875, 234.34095764160156, 27.31829833984375, -63.459197998046875, 520.7503662109375, 706.15673828125, 706.2088623046875, 612.7986450195312, 418.7323303222656, 252.91798400878906, -33.91172790527344], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000388.npy"}
{"epoch": 0.5697503671071953, "step": 389, "batch_size": 64, "mean": 237.3849334716797, "std": 312.6252746582031, "min": -383.2353515625, "p10": -140.81264801025387, "median": 232.60237884521484, "p90": 590.6101684570314, "max": 968.6485595703125, "pos_frac": 0.75, "sample": [79.0054931640625, 542.353271484375, 312.1817321777344, -353.5305480957031, 113.52606201171875, 315.29437255859375, 155.89329528808594, 549.90625, 402.37774658203125, 191.7950439453125, 220.51034545898438, -90.85709381103516, 340.248779296875, -206.52536010742188, 128.30978393554688, 496.1726379394531, 130.6480712890625, 551.9486694335938, 422.7181396484375, -108.62643432617188, -286.3456726074219, -28.655559539794922, 607.1793823242188, 14.616268157958984, 316.399169921875, 628.0242919921875, -383.2353515625, 1.9730072021484375, 124.30413055419922, 147.37721252441406, 352.5298767089844, -72.48703002929688, 143.9564666748047, 306.3285827636719, -283.8428039550781, 325.04766845703125, 216.70887756347656, 524.1915893554688, 414.70501708984375, 968.6485595703125, 467.47222900390625, -65.97975158691406, 422.0852966308594, 426.287353515625, 301.90936279296875, 392.96343994140625, -4.344789505004883, 188.73023986816406, 498.25677490234375, 207.47921752929688, 78.9280776977539, 936.2708740234375, -83.74018859863281, -152.30389404296875, 244.6944122314453, 799.0245971679688, -33.64788818359375, 949.614013671875, 321.09442138671875, -113.99974060058594, 325.3410339355469, -246.92445373535156, 270.3544006347656, 832.2963256835938], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000389.npy"}
{"epoch": 0.5712187958883994, "step": 390, "batch_size": 64, "mean": 235.72113037109375, "std": 371.68963623046875, "min": -541.5992431640625, "p10": -159.7361297607422, "median": 239.5024185180664, "p90": 535.1035217285156, "max": 2138.1357421875, "pos_frac": 0.75, "sample": [195.5382843017578, 306.4551086425781, 623.81298828125, 506.34075927734375, -541.5992431640625, -153.50827026367188, -276.19915771484375, 144.5928192138672, 343.1802673339844, 251.6019287109375, 500.4530944824219, -10.901046752929688, 336.8459167480469, 410.0374450683594, 235.89404296875, 110.24893188476562, -162.40521240234375, 2138.1357421875, -134.69119262695312, 437.70068359375, 143.41201782226562, 500.2030944824219, 536.462646484375, 173.26504516601562, 204.59776306152344, -53.964656829833984, 102.20865631103516, -106.91091918945312, 161.02940368652344, 295.3031005859375, 195.87796020507812, -12.465423583984375, -200.632080078125, -135.02102661132812, 166.75051879882812, 535.9736938476562, 794.4732666015625, 542.8131713867188, -379.47259521484375, 334.881591796875, 533.0731201171875, 656.3880004882812, 395.6952819824219, 116.66002655029297, 477.4468994140625, 224.89454650878906, 157.276611328125, 71.75242614746094, 514.4811401367188, 460.33856201171875, 500.9452209472656, 270.24517822265625, 362.8472595214844, 321.760498046875, 454.8175048828125, -18.203481674194336, -24.437591552734375, -325.57318115234375, -518.9898681640625, 243.1107940673828, 425.40631103515625, 112.72503662109375, 277.57989501953125, 335.5931701660156], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000390.npy"}
{"epoch": 0.5726872246696035, "step": 391, "batch_size": 64, "mean": 137.55014038085938, "std": 277.8065185546875, "min": -696.9934692382812, "p10": -122.54813537597656, "median": 98.12702560424805, "p90": 448.81882934570325, "max": 1021.6116943359375, "pos_frac": 0.71875, "sample": [-20.219989776611328, 10.568473815917969, 65.90535736083984, -67.69882202148438, 149.43191528320312, 261.0348815917969, 27.01590919494629, 99.20438385009766, 34.9020881652832, 389.4248352050781, -101.70699310302734, 158.33131408691406, -228.55474853515625, -55.79192352294922, 97.04966735839844, 43.912200927734375, 368.51654052734375, 119.83572387695312, 46.51805877685547, -117.88134765625, 225.02806091308594, 469.1525573730469, 544.8006591796875, 407.7306213378906, -102.41605377197266, 21.363845825195312, 1021.6116943359375, -212.6084442138672, -113.70761108398438, 296.52825927734375, 282.90362548828125, -81.8349838256836, 81.87556457519531, 25.85123062133789, -69.42949676513672, 185.5280303955078, 113.93470001220703, -5.304435729980469, 464.0474548339844, 150.512451171875, 89.3917465209961, -184.64755249023438, 159.27084350585938, 330.9710388183594, 669.5445556640625, -218.58554077148438, 595.5048217773438, -47.99565887451172, -489.9102783203125, 39.71343231201172, 52.428672790527344, 244.34756469726562, -124.54818725585938, 395.71533203125, 413.2853698730469, 32.45411682128906, 766.4754028320312, 380.61041259765625, 318.392822265625, -696.9934692382812, 271.6602478027344, 254.50360107421875, 323.31744384765625, 242.9373321533203], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000391.npy"}
{"epoch": 0.5741556534508077, "step": 392, "batch_size": 64, "mean": 275.10516357421875, "std": 266.0367126464844, "min": -183.07733154296875, "p10": 0.29868011474611755, "median": 216.0272674560547, "p90": 610.6688415527344, "max": 1100.1568603515625, "pos_frac": 0.890625, "sample": [24.390724182128906, 193.31015014648438, -147.20510864257812, 29.02090072631836, 214.1413116455078, -63.372901916503906, 615.8138427734375, 261.428466796875, 313.5807800292969, 382.77056884765625, 615.5181884765625, -135.3286590576172, 215.8384246826172, 111.59426879882812, 192.22996520996094, 31.056821823120117, 599.3536987304688, 778.95556640625, 484.3140869140625, 195.22750854492188, -101.53893280029297, 369.3927001953125, 480.3710021972656, 58.33348083496094, 330.3720703125, 417.11669921875, 223.23016357421875, 1100.1568603515625, 39.607757568359375, 208.98687744140625, 206.60780334472656, 283.9293212890625, 435.94830322265625, 166.86154174804688, 521.7149047851562, 263.528564453125, 367.55596923828125, 166.28549194335938, 216.2161102294922, 144.239990234375, 342.523193359375, 435.1395568847656, 56.449249267578125, -10.026481628417969, 514.1901245117188, 901.4425048828125, 414.10601806640625, 442.1540222167969, 416.24688720703125, -183.07733154296875, 111.31746673583984, 78.57615661621094, 100.93606567382812, 309.4334716796875, 145.24795532226562, 171.56893920898438, 292.93341064453125, 40.43119430541992, 1029.0184326171875, 88.67420959472656, -69.92131805419922, 77.22830200195312, 372.5332336425781, 718.050537109375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000392.npy"}
{"epoch": 0.5756240822320118, "step": 393, "batch_size": 64, "mean": 217.66339111328125, "std": 344.2980651855469, "min": -293.9657287597656, "p10": -210.83833007812498, "median": 208.16929626464844, "p90": 602.7476257324222, "max": 1594.342529296875, "pos_frac": 0.71875, "sample": [317.13201904296875, 732.8363037109375, -3.8557167053222656, -112.76935577392578, 431.5836181640625, 472.8310546875, -198.62332153320312, 429.08990478515625, 301.59637451171875, 237.22451782226562, 62.4547119140625, 262.05316162109375, 187.67889404296875, 314.6251220703125, 416.3896484375, 71.15258026123047, 1046.9609375, 965.3270874023438, 640.5960693359375, -7.117652893066406, 154.34152221679688, 493.040283203125, -256.9770812988281, -285.22802734375, -94.45140075683594, 21.296710968017578, -222.1145477294922, 212.53652954101562, 335.88824462890625, 287.51123046875, 121.01911926269531, 445.7554931640625, 447.27655029296875, 203.80206298828125, 518.3980102539062, 313.5010986328125, 288.1024169921875, 137.85617065429688, 378.54638671875, -104.58308410644531, 37.41096496582031, 170.13916015625, 656.1229858398438, -223.68392944335938, -257.2740478515625, 485.5238037109375, 306.14910888671875, -216.07333374023438, 40.24895477294922, 192.0826416015625, 81.60822296142578, 251.55235290527344, -167.77076721191406, -124.38089752197266, -108.92550659179688, 349.3776550292969, 421.69647216796875, -293.9657287597656, 299.6165771484375, 57.17247772216797, -88.45874786376953, 1594.342529296875, 638.8974609375, -133.63504028320312], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000393.npy"}
{"epoch": 0.5770925110132159, "step": 394, "batch_size": 64, "mean": 224.81240844726562, "std": 294.61822509765625, "min": -343.3396301269531, "p10": -119.9631233215332, "median": 154.21025848388672, "p90": 596.0541381835938, "max": 1110.05810546875, "pos_frac": 0.796875, "sample": [100.67094421386719, 271.3133544921875, -208.10617065429688, 431.8973693847656, 160.07186889648438, -130.00433349609375, 41.88655471801758, 148.34864807128906, 560.97607421875, 74.50100708007812, 583.9228515625, -48.740028381347656, -343.3396301269531, 98.08256530761719, -38.93321228027344, -128.86520385742188, -119.99817657470703, 738.0104370117188, 597.7022705078125, -205.9281463623047, 491.205078125, -132.12115478515625, 291.98828125, 287.2658386230469, 186.9966583251953, 378.033447265625, 120.68910217285156, 105.93333435058594, -76.34410858154297, 592.20849609375, 572.8153076171875, 731.837158203125, -119.88133239746094, 359.0935363769531, 100.52841186523438, 45.2547607421875, 143.66506958007812, 5.939689636230469, 131.42431640625, 633.873291015625, 32.73999786376953, 466.0400695800781, 17.946613311767578, 240.7670440673828, 77.30278015136719, 54.63671875, 482.9805603027344, 828.7779541015625, 233.85231018066406, 202.54519653320312, 477.4464111328125, 279.867919921875, 1110.05810546875, 951.5376586914062, 314.45550537109375, 48.60466766357422, 83.33757019042969, -110.42828369140625, 141.18667602539062, 422.0383605957031, 170.18911743164062, -17.572221755981445, 178.35968017578125, 267.4484558105469], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000394.npy"}
{"epoch": 0.57856093979442, "step": 395, "batch_size": 64, "mean": 252.47410583496094, "std": 321.2036437988281, "min": -478.4792785644531, "p10": -87.63991851806641, "median": 244.41361236572266, "p90": 721.6834899902345, "max": 1017.582763671875, "pos_frac": 0.765625, "sample": [456.5647888183594, -87.62954711914062, 313.2381591796875, 70.28387451171875, 277.54986572265625, 348.19647216796875, 362.1468811035156, 238.8087158203125, -274.38330078125, 271.0001220703125, -176.56849670410156, 508.146240234375, 371.3469543457031, -28.911643981933594, 181.75173950195312, -56.81447219848633, 671.5513305664062, 312.69036865234375, 919.795166015625, -77.34260559082031, 636.7213134765625, 73.3718032836914, 521.7039184570312, -139.9072723388672, 215.81192016601562, 6.319305419921875, 203.17947387695312, 363.94158935546875, -55.39531707763672, 100.60668182373047, 743.168701171875, 1009.0609741210938, 437.85406494140625, 85.59561157226562, 347.9495544433594, 50.202880859375, 178.4134521484375, 255.30812072753906, 913.0576171875, 196.8304443359375, 488.979736328125, 519.35400390625, -478.4792785644531, -24.522903442382812, -245.54287719726562, 292.4664306640625, -65.56700134277344, 808.8951416015625, 87.73059844970703, -87.64436340332031, 66.27738952636719, -198.72030639648438, 72.73815155029297, 1017.582763671875, 756.9873657226562, 250.0185089111328, 273.5699157714844, 565.7011108398438, 262.89886474609375, 455.09759521484375, 444.8139343261719, 64.92652893066406, 110.55742645263672, -24.99197769165039], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000395.npy"}
{"epoch": 0.580029368575624, "step": 396, "batch_size": 64, "mean": 227.20425415039062, "std": 262.1206359863281, "min": -506.9161376953125, "p10": -31.94180030822752, "median": 187.7626190185547, "p90": 541.2054107666017, "max": 979.0399169921875, "pos_frac": 0.828125, "sample": [154.72764587402344, 668.982177734375, 161.55746459960938, 191.902587890625, 105.4671630859375, 554.8818969726562, 137.56005859375, 207.01629638671875, 183.62265014648438, 293.752197265625, 349.3065490722656, 286.76824951171875, 220.07205200195312, 387.8800964355469, -118.35092163085938, 141.1732635498047, 216.65573120117188, -6.2875823974609375, 68.52531433105469, 168.1968994140625, 82.75052642822266, 407.9421691894531, 885.0584106445312, 298.5067138671875, 119.28457641601562, 509.2936096191406, 169.85362243652344, 122.905517578125, 208.6993408203125, 645.4730224609375, -11.562671661376953, 368.0743103027344, 192.50714111328125, 273.3490905761719, 62.7453498840332, 208.1393280029297, 474.2098388671875, -206.4697265625, 497.713134765625, 696.2078857421875, 399.8137512207031, 1.994873046875, -71.89715576171875, 500.5503234863281, -4.675537109375, -64.77285766601562, 482.937744140625, -40.67571258544922, -8.735811233520508, 250.23745727539062, 881.8126831054688, 106.67681121826172, 57.73822021484375, -122.4429931640625, 37.79106140136719, 312.9552917480469, 295.1003112792969, 165.70803833007812, 202.60971069335938, -506.9161376953125, 97.5684814453125, 979.0399169921875, 60.70440673828125, 149.85885620117188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000396.npy"}
{"epoch": 0.5814977973568282, "step": 397, "batch_size": 64, "mean": 286.0792541503906, "std": 299.88714599609375, "min": -305.480224609375, "p10": -55.99844245910644, "median": 246.64161682128906, "p90": 717.4257934570313, "max": 1015.45703125, "pos_frac": 0.84375, "sample": [11.41330337524414, 549.9032592773438, -0.67486572265625, 376.7953796386719, 175.31643676757812, 17.277423858642578, 277.6094665527344, 359.158447265625, 503.033935546875, 223.38087463378906, 685.835693359375, 42.53410339355469, 623.7943115234375, -109.759521484375, 108.95941162109375, 334.3002014160156, 183.98590087890625, -208.60147094726562, 474.27288818359375, 752.5478515625, 249.4490966796875, 741.06591796875, 312.34625244140625, 442.1363220214844, 1015.45703125, -16.571212768554688, 700.4664306640625, 477.2279968261719, -92.64016723632812, 144.25169372558594, 565.15673828125, 195.61940002441406, 102.71707153320312, 864.8189697265625, 375.6083068847656, 243.83413696289062, 110.56529235839844, 301.40350341796875, 155.25921630859375, 150.59796142578125, -305.480224609375, -169.84548950195312, 96.0056381225586, -294.178466796875, 619.4061279296875, 173.45704650878906, -58.59215545654297, 141.19912719726562, -49.94644546508789, 482.69415283203125, 29.833660125732422, 724.694091796875, 158.97244262695312, 980.0023803710938, 142.99505615234375, 879.9630126953125, 106.97419738769531, 279.884521484375, 508.49786376953125, 152.60018920898438, 270.85968017578125, 335.46942138671875, 390.9642639160156, 292.78961181640625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000397.npy"}
{"epoch": 0.5829662261380323, "step": 398, "batch_size": 64, "mean": 261.866943359375, "std": 345.701904296875, "min": -455.0228271484375, "p10": -101.20583190917968, "median": 177.7581024169922, "p90": 654.6960144042969, "max": 1256.3289794921875, "pos_frac": 0.765625, "sample": [61.437744140625, 82.01893615722656, 480.2477111816406, 657.3906860351562, -163.38356018066406, 103.53125762939453, 342.8505859375, -358.9023742675781, 109.94914245605469, -90.30596160888672, 515.2340087890625, -378.65582275390625, 638.8750610351562, 381.226318359375, 493.7153625488281, 967.0706787109375, 163.01087951660156, 4.844657897949219, 575.345703125, 317.98095703125, 551.182373046875, 175.24420166015625, 648.408447265625, 482.7845153808594, 1161.681884765625, -10.652557373046875, 274.18804931640625, 216.68980407714844, 751.5993041992188, 391.81610107421875, 98.39126586914062, 197.62539672851562, -106.11871337890625, -37.33363342285156, 307.4895935058594, 90.24861145019531, 70.92491912841797, 399.6468505859375, 46.178504943847656, 457.66204833984375, 159.2539825439453, 442.5589904785156, 136.35043334960938, 92.0022964477539, 137.97116088867188, -49.99976348876953, 374.07122802734375, -54.563018798828125, -79.53233337402344, 76.57571411132812, 135.18133544921875, 1256.3289794921875, 614.3013916015625, 782.6704711914062, -455.0228271484375, 534.2784423828125, 463.47802734375, 516.3206176757812, 817.0726318359375, 180.27200317382812, -48.95744323730469, -33.4658203125, -105.87720489501953, -202.92401123046875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000398.npy"}
{"epoch": 0.5844346549192364, "step": 399, "batch_size": 64, "mean": 231.94464111328125, "std": 346.38043212890625, "min": -570.634521484375, "p10": -211.1583496093749, "median": 204.91522979736328, "p90": 661.6203735351563, "max": 1105.3603515625, "pos_frac": 0.75, "sample": [130.48135375976562, 340.74853515625, 90.22876739501953, -242.98541259765625, 626.422119140625, -78.83934783935547, 668.464599609375, 28.784069061279297, 101.34992218017578, -66.09481811523438, 271.2580871582031, -510.3868103027344, 684.7734375, -64.18804931640625, 645.6505126953125, 193.4720001220703, 320.123291015625, -101.22075653076172, 24.522897720336914, 100.74505615234375, 730.0079345703125, 196.4323272705078, 324.67132568359375, 246.12705993652344, 597.1517333984375, 833.52734375, 213.39813232421875, 399.4041748046875, -297.061767578125, 281.7485046386719, -5.3831634521484375, 417.5249938964844, 184.95742797851562, 151.9457550048828, 423.2783508300781, -260.2622375488281, 1105.3603515625, 518.42724609375, 573.0945434570312, -293.52581787109375, 306.7810363769531, 494.4844055175781, 186.75941467285156, -570.634521484375, -74.21124267578125, 439.453857421875, 96.27357482910156, 921.6849365234375, 77.95854949951172, -136.89520263671875, 422.2339782714844, 1029.53173828125, 496.74224853515625, 131.2593231201172, 519.6102294921875, 215.10426330566406, 183.00375366210938, 242.08395385742188, -347.7770080566406, 245.26318359375, 470.12994384765625, -61.34980773925781, 151.2679443359375, -98.4350814819336], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000399.npy"}
{"epoch": 0.5859030837004405, "step": 400, "batch_size": 64, "mean": 337.75396728515625, "std": 309.8727111816406, "min": -553.495361328125, "p10": -43.59839782714843, "median": 331.25514221191406, "p90": 693.2622497558594, "max": 1119.57080078125, "pos_frac": 0.84375, "sample": [264.0190734863281, 495.4971923828125, 424.91290283203125, 261.18231201171875, 580.3359375, -95.51211547851562, 534.0650024414062, 211.97914123535156, -60.18486785888672, 624.9921264648438, 230.29266357421875, 122.7349624633789, 537.6251220703125, 296.08233642578125, -553.495361328125, 732.205322265625, 694.79248046875, 203.5580596923828, 409.1290283203125, 338.98590087890625, 832.7168579101562, 510.54876708984375, 42.309478759765625, 307.50921630859375, 54.78990936279297, 337.19744873046875, 90.34544372558594, 495.26654052734375, 335.95977783203125, 408.7899169921875, 397.74566650390625, 194.522216796875, -89.52460479736328, 349.7366943359375, 282.4183654785156, -250.90159606933594, 21.242149353027344, 783.347412109375, 326.5505065917969, 540.268798828125, -41.25721740722656, 294.1652526855469, 383.5010681152344, 1022.0303955078125, -0.3023052215576172, 607.1626586914062, 278.05413818359375, 196.69969177246094, 14.546405792236328, 1119.57080078125, 687.34619140625, -20.904754638671875, 249.58587646484375, 479.4346923828125, 293.935302734375, 689.6917114257812, -249.26002502441406, 688.785888671875, 644.6812744140625, 417.27423095703125, 282.03509521484375, 662.1444091796875, 737.8994140625, -44.60176086425781], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000400.npy"}
{"epoch": 0.5873715124816447, "step": 401, "batch_size": 64, "mean": 189.05294799804688, "std": 396.8406066894531, "min": -859.5543823242188, "p10": -248.506233215332, "median": 137.12760162353516, "p90": 830.7562438964846, "max": 1105.3958740234375, "pos_frac": 0.6875, "sample": [138.60987854003906, 511.2022705078125, -38.21860885620117, 212.465087890625, 555.9268188476562, 105.3243637084961, -395.71612548828125, -390.9252014160156, -859.5543823242188, -100.95011901855469, 30.42711639404297, 254.72702026367188, -173.849365234375, 87.7509994506836, -26.380828857421875, -36.75183868408203, 312.10711669921875, 267.955078125, 143.84384155273438, 44.66499328613281, -104.19723510742188, -208.8143768310547, -75.29108428955078, 964.6915283203125, 49.951148986816406, 634.229736328125, 998.4945678710938, 456.6385498046875, 862.1024169921875, 219.28294372558594, 78.24449920654297, 294.29779052734375, -62.622920989990234, 33.782630920410156, -73.56320190429688, -465.4627990722656, 1000.31005859375, -9.280284881591797, 234.54583740234375, 553.098876953125, 193.91310119628906, 555.9521484375, 347.6187438964844, -51.62163162231445, 356.6201171875, 170.44564819335938, 173.2386016845703, -265.51702880859375, 1105.3958740234375, -491.5390625, 595.1256713867188, 507.232666015625, 274.5989685058594, 92.23772430419922, 885.6436767578125, 868.3699951171875, 16.446205139160156, 292.0390319824219, 22.92889404296875, 59.334171295166016, 135.64532470703125, 757.6151733398438, -412.4019470214844, -113.02957153320312], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000401.npy"}
{"epoch": 0.5888399412628488, "step": 402, "batch_size": 64, "mean": 349.14495849609375, "std": 313.59747314453125, "min": -366.16815185546875, "p10": 9.849178314208991, "median": 321.88720703125, "p90": 813.3113952636719, "max": 1334.9197998046875, "pos_frac": 0.90625, "sample": [322.2843017578125, 28.79371452331543, 6.768943786621094, 479.70758056640625, 238.62472534179688, 438.5246276855469, -5.236602783203125, 328.6858825683594, -43.86693572998047, 618.1591186523438, 425.1956787109375, 577.7628173828125, 655.3698120117188, 320.2077331542969, -106.5723876953125, 578.412841796875, 23.063827514648438, 317.1501159667969, 234.21002197265625, 379.1146545410156, 561.9768676757812, 245.75247192382812, 582.9995727539062, -366.16815185546875, 408.1977233886719, 500.19189453125, 23.956695556640625, 917.436279296875, 31.041152954101562, 942.4952392578125, 227.14230346679688, 385.19500732421875, 160.01979064941406, 484.578369140625, 104.59339141845703, 393.8470153808594, 879.2048950195312, 100.83627319335938, 184.79945373535156, 669.6023559570312, 17.036392211914062, -57.81742477416992, 812.6411743164062, 911.6917724609375, 206.32339477539062, 483.3046569824219, 813.5986328125, 166.51168823242188, 340.71075439453125, 1334.9197998046875, 434.17724609375, 557.3523559570312, 50.72370910644531, 899.4443359375, 241.17996215820312, 321.4901123046875, 370.0670166015625, 289.5602111816406, 143.63430786132812, -271.8105163574219, 371.6593933105469, 169.88809204101562, 190.77719116210938, 294.1521301269531], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000402.npy"}
{"epoch": 0.5903083700440529, "step": 403, "batch_size": 64, "mean": 262.4422302246094, "std": 307.4713439941406, "min": -341.9852294921875, "p10": -122.79305953979492, "median": 252.69981384277344, "p90": 659.5537414550784, "max": 1164.4117431640625, "pos_frac": 0.8125, "sample": [254.77627563476562, 47.576637268066406, 90.99420928955078, 208.48577880859375, 326.3131408691406, 55.56563186645508, 250.62335205078125, 400.59716796875, 38.07453155517578, -240.33631896972656, 144.71420288085938, -54.071990966796875, -341.9852294921875, 569.95703125, 198.4319305419922, 802.8333740234375, 263.1698303222656, 18.83839988708496, 473.400146484375, 269.59796142578125, 402.2720947265625, 58.90494918823242, 693.236083984375, -124.01615142822266, -145.97775268554688, 578.6783447265625, 450.35418701171875, 678.724609375, 257.3485412597656, 893.3778076171875, -166.1950225830078, 458.0297546386719, 598.0782470703125, -10.327152252197266, 8.478813171386719, 546.8626708984375, -142.41339111328125, 270.0296325683594, 614.8217163085938, 496.82696533203125, -190.3622283935547, 250.32980346679688, 1164.4117431640625, 3.604604721069336, 541.4100952148438, 49.08811950683594, 422.2756652832031, 780.956787109375, 341.69781494140625, 891.8248291015625, 311.8388977050781, 378.4951171875, 352.9056091308594, 97.00550842285156, -96.61625671386719, 31.246963500976562, -46.70869445800781, 431.4001159667969, 232.3123016357422, 79.32475280761719, 323.23779296875, 217.03158569335938, -119.93917846679688, 154.88038635253906], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000403.npy"}
{"epoch": 0.591776798825257, "step": 404, "batch_size": 64, "mean": 256.16156005859375, "std": 348.86016845703125, "min": -628.1909790039062, "p10": -204.01891784667964, "median": 239.97496795654297, "p90": 674.9394287109376, "max": 1150.8199462890625, "pos_frac": 0.765625, "sample": [-313.0771484375, 287.1291198730469, 38.22539520263672, 643.28271484375, 521.60302734375, 175.6399383544922, 196.1952667236328, 918.246337890625, 142.04833984375, 743.6709594726562, 1150.8199462890625, 229.769775390625, 233.56764221191406, 792.6731567382812, -77.16931915283203, -362.4808349609375, 873.0459594726562, 141.36175537109375, -261.14752197265625, 205.1322479248047, 25.51009750366211, -26.98253631591797, -78.36332702636719, 425.998779296875, 276.20562744140625, 207.53005981445312, -269.0441589355469, 395.3358459472656, 254.62986755371094, 377.3844909667969, 547.604248046875, -224.369140625, 414.11614990234375, 246.38229370117188, 569.593017578125, 91.4800796508789, -147.1136932373047, 189.2926483154297, 376.23468017578125, 553.8394775390625, -628.1909790039062, 399.716796875, 91.56314086914062, 597.2929077148438, 230.72906494140625, -280.55499267578125, -156.20193481445312, 500.7700500488281, 604.4033203125, -130.4222869873047, 345.8269958496094, 31.456321716308594, 400.8211364746094, 392.9874267578125, 566.7149658203125, 688.506591796875, 351.0098571777344, 57.12342834472656, -0.42882728576660156, -156.53506469726562, 842.00927734375, 132.11895751953125, 642.1741943359375, 387.6478271484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000404.npy"}
{"epoch": 0.593245227606461, "step": 405, "batch_size": 64, "mean": 210.2313995361328, "std": 390.4349670410156, "min": -518.18115234375, "p10": -313.5563232421875, "median": 174.51744079589844, "p90": 770.4191345214844, "max": 1147.056396484375, "pos_frac": 0.6875, "sample": [853.1793823242188, 767.898681640625, 321.0024108886719, -76.48213195800781, -406.02520751953125, -40.537689208984375, -499.01507568359375, 58.342926025390625, 389.3312683105469, -295.15936279296875, 62.43523406982422, 1067.820068359375, 540.5291748046875, 110.3172607421875, 55.436241149902344, 1147.056396484375, -29.423128128051758, -48.421485900878906, 57.145111083984375, -132.98721313476562, 134.69723510742188, 220.7923126220703, 111.33076477050781, 456.1453857421875, 169.01409912109375, 207.79026794433594, -110.18159484863281, 532.9447021484375, -429.35028076171875, -74.6395492553711, 405.8335876464844, 93.27909088134766, 403.22772216796875, -51.133995056152344, 400.85125732421875, 119.53883361816406, 281.25030517578125, -321.44073486328125, -105.47301483154297, 63.738101959228516, 447.3880310058594, 180.02078247070312, 715.15283203125, 368.9501953125, -3.9548683166503906, 181.61090087890625, 620.082275390625, -271.87060546875, 233.35183715820312, 262.0946044921875, 455.17242431640625, -438.468994140625, 194.8556671142578, 969.7763671875, 84.1343765258789, -518.18115234375, 520.1132202148438, 362.55120849609375, -51.293212890625, 948.822265625, 864.9520874023438, -341.997802734375, 489.3907470703125, 771.4993286132812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000405.npy"}
{"epoch": 0.5947136563876652, "step": 406, "batch_size": 64, "mean": 280.27996826171875, "std": 344.54620361328125, "min": -337.92864990234375, "p10": -107.24825592041014, "median": 180.8355712890625, "p90": 690.5219299316407, "max": 1179.090087890625, "pos_frac": 0.8125, "sample": [570.08447265625, 143.33265686035156, 199.19638061523438, 51.64366149902344, 134.00537109375, 169.5135498046875, 147.23001098632812, -316.73431396484375, 1139.86181640625, 157.1788330078125, 470.1042175292969, 163.6376495361328, 1179.090087890625, 521.0576171875, 312.39471435546875, -59.48857498168945, 568.9527587890625, 316.4449157714844, 507.24761962890625, -139.58729553222656, 857.3562622070312, 357.63763427734375, 315.056640625, -166.4644317626953, 47.300331115722656, 146.7664337158203, 79.36970520019531, -98.69638061523438, 522.2158203125, 19.723602294921875, -110.91334533691406, 276.1129150390625, 584.0430908203125, -322.25653076171875, -156.4305419921875, 487.74957275390625, 845.9351806640625, 168.0736083984375, 186.8231964111328, -14.351287841796875, 174.8479461669922, 144.39990234375, 446.6251220703125, -34.48970031738281, -22.869537353515625, 120.53543853759766, 685.5565185546875, 365.7172546386719, 33.5484619140625, 221.73043823242188, 587.3909912109375, 653.7811279296875, 389.87713623046875, 14.244125366210938, -337.92864990234375, 12.34200668334961, 881.3873901367188, 677.1058959960938, 1106.9615478515625, 376.44427490234375, 309.19805908203125, 692.6499633789062, 119.18232727050781, 59.463661193847656], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000406.npy"}
{"epoch": 0.5961820851688693, "step": 407, "batch_size": 64, "mean": 175.37591552734375, "std": 332.9995422363281, "min": -909.220947265625, "p10": -214.25228576660155, "median": 132.79068756103516, "p90": 582.8771240234375, "max": 851.525390625, "pos_frac": 0.71875, "sample": [-259.1201171875, 583.3631591796875, 428.6889343261719, -392.0909118652344, -338.3951721191406, -68.2612533569336, 280.7760009765625, -206.84048461914062, -217.42877197265625, 256.0814208984375, 457.45904541015625, 113.70828247070312, 354.01873779296875, 767.0881958007812, 658.4876098632812, 26.82523536682129, -68.19100952148438, 511.243408203125, 96.92597198486328, -78.86320495605469, 416.9671325683594, 108.21966552734375, 76.28242492675781, 146.99301147460938, 107.05949401855469, 541.1283569335938, 256.7041931152344, 299.90338134765625, 103.23603820800781, 74.21867370605469, -164.21658325195312, 232.27560424804688, 14.185104370117188, -119.89240264892578, 26.08135223388672, 96.29783630371094, 478.2250671386719, 753.9352416992188, 82.64042663574219, -909.220947265625, 720.0308837890625, 354.0473327636719, 447.15692138671875, -28.683189392089844, 420.60980224609375, 215.3428192138672, 184.400634765625, 851.525390625, 402.7583923339844, 295.55853271484375, 555.4300537109375, -206.06451416015625, -92.03742980957031, 581.7430419921875, -50.22101593017578, 57.58915710449219, 118.58836364746094, 541.09326171875, -192.0841827392578, -358.7498779296875, 658.368408203125, 220.9312286376953, -277.82720947265625, 278.0535888671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000407.npy"}
{"epoch": 0.5976505139500734, "step": 408, "batch_size": 64, "mean": 196.483642578125, "std": 259.4457702636719, "min": -304.40728759765625, "p10": -147.39761657714843, "median": 179.0909881591797, "p90": 600.0732971191409, "max": 742.9478759765625, "pos_frac": 0.75, "sample": [59.61698913574219, 694.9782104492188, 742.9478759765625, 4.89581298828125, 406.25921630859375, 314.9200134277344, -50.28424072265625, 147.3036651611328, 10.88531494140625, -153.1426239013672, -9.02802848815918, -266.03173828125, 27.936378479003906, -18.864990234375, -5.089805603027344, -304.40728759765625, 61.64219665527344, 303.4225158691406, 353.0231628417969, 51.87760925292969, 208.60597229003906, 272.71484375, -123.84295654296875, 349.1664123535156, -133.9925994873047, 205.29290771484375, 96.27135467529297, 168.0069580078125, 78.7293930053711, 718.357666015625, 308.69873046875, 121.38809204101562, 515.855224609375, -5.728431701660156, -212.3295135498047, -193.44094848632812, 255.1837158203125, 204.78147888183594, -0.5877838134765625, 184.40469360351562, 483.4405212402344, 347.1588439941406, 348.75115966796875, 531.505126953125, 172.86634826660156, -208.93063354492188, 649.8670654296875, 71.9325180053711, -22.848033905029297, 65.53729248046875, 173.77728271484375, 640.1881713867188, 289.4070129394531, -167.99493408203125, 308.44122314453125, 321.3114318847656, 64.87268829345703, 454.81573486328125, 234.26612854003906, 330.8917236328125, 681.7224731445312, 629.4596557617188, 452.0032958984375, 332.11602783203125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000408.npy"}
{"epoch": 0.5991189427312775, "step": 409, "batch_size": 64, "mean": 197.42636108398438, "std": 283.26446533203125, "min": -472.7220153808594, "p10": -85.07880325317382, "median": 161.3213882446289, "p90": 561.5595336914064, "max": 941.704345703125, "pos_frac": 0.765625, "sample": [-36.974639892578125, -345.7773132324219, 300.5571594238281, 226.26422119140625, 195.17774963378906, 407.82440185546875, -45.89502716064453, -89.57293701171875, -7.459381103515625, 54.23765563964844, 487.657470703125, 489.0207824707031, -265.8012390136719, 199.30191040039062, 463.3354797363281, 123.25359344482422, 189.54769897460938, 240.79579162597656, 202.84085083007812, 780.194091796875, 349.5700988769531, -124.4553451538086, 369.1503601074219, 67.66056823730469, 705.5736694335938, -125.66692352294922, 36.43956756591797, 60.98011016845703, -50.101593017578125, 140.6998748779297, 301.49920654296875, 723.8529663085938, 530.6366577148438, -472.7220153808594, 56.79993438720703, 941.704345703125, -9.476890563964844, 94.76347351074219, -371.72222900390625, 163.6593017578125, 520.974609375, -74.59249114990234, 574.8121948242188, 48.5348014831543, 195.31777954101562, 251.23330688476562, 79.31549072265625, 176.49525451660156, 760.6885375976562, 377.54315185546875, -62.21076202392578, 174.9548797607422, -38.99873352050781, 158.9834747314453, 146.70448303222656, 454.15386962890625, 351.1621398925781, 311.639892578125, 73.41781616210938, 112.50917053222656, 739.752685546875, 88.02046203613281, 134.5338134765625, 122.96759796142578], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000409.npy"}
{"epoch": 0.6005873715124816, "step": 410, "batch_size": 64, "mean": 241.77908325195312, "std": 281.7442626953125, "min": -528.0017700195312, "p10": -62.39760360717772, "median": 248.352294921875, "p90": 549.1097045898438, "max": 851.172607421875, "pos_frac": 0.84375, "sample": [202.99530029296875, 264.16986083984375, -68.2034912109375, 48.313514709472656, 501.21337890625, 323.9449768066406, 445.9762268066406, 500.2174987792969, 512.4860229492188, 18.162979125976562, 149.0696563720703, 235.66848754882812, 12.442276000976562, 479.7447509765625, -104.26029205322266, -85.45281982421875, 644.546875, -431.12286376953125, -32.46554946899414, 333.8729248046875, 474.5442199707031, 747.89453125, 1.4975433349609375, 105.30464172363281, 537.905517578125, 599.955810546875, 351.5530700683594, 13.69082260131836, 355.1671142578125, 851.172607421875, 261.0361022949219, 29.122718811035156, 275.2821960449219, 27.76116943359375, 398.8656005859375, 23.225555419921875, -48.85053253173828, 40.234169006347656, 182.47015380859375, -153.9144287109375, 118.18464660644531, 469.61444091796875, 158.98947143554688, 378.35107421875, 196.9508056640625, -528.0017700195312, 379.0189514160156, 232.4464874267578, 836.0469970703125, 321.23516845703125, 103.26068115234375, 127.69766998291016, 434.63348388671875, 553.9114990234375, 283.3742980957031, 772.611572265625, 181.97914123535156, 475.5009765625, -352.74407958984375, 185.5936737060547, 289.3263244628906, -24.663904190063477, 530.865966796875, 324.4391174316406], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000410.npy"}
{"epoch": 0.6020558002936858, "step": 411, "batch_size": 64, "mean": 231.06285095214844, "std": 330.6865539550781, "min": -580.5426635742188, "p10": -40.79407348632812, "median": 165.26168823242188, "p90": 648.6674804687501, "max": 1326.1649169921875, "pos_frac": 0.78125, "sample": [174.75567626953125, -30.197608947753906, -90.78551483154297, 61.851036071777344, 472.6849670410156, 153.58041381835938, 118.39227294921875, 175.9304656982422, 323.1685791015625, 387.275390625, 182.270263671875, -42.967567443847656, 155.85418701171875, -2.9616661071777344, 84.83466339111328, -16.042055130004883, 449.80987548828125, 226.11985778808594, 92.46908569335938, 437.8763122558594, 818.4149780273438, 277.4653015136719, -244.37603759765625, -22.312150955200195, 76.8439712524414, 186.3742218017578, 189.35792541503906, 241.84837341308594, 115.03765869140625, 1121.6942138671875, 130.87908935546875, 380.5769958496094, 929.7384643554688, -35.72258758544922, 528.483642578125, -17.068361282348633, 343.6702880859375, 633.712158203125, 624.0249633789062, 140.65029907226562, 93.80367279052734, 353.69122314453125, 127.08588409423828, 302.5751647949219, 147.88436889648438, 346.3273010253906, 16.212207794189453, -580.5426635742188, 1326.1649169921875, 319.20989990234375, 101.50428771972656, 341.16339111328125, 621.3228759765625, 714.8682861328125, 655.076904296875, -216.92034912109375, -24.44684600830078, -395.3453369140625, 174.669189453125, 3.281248092651367, 741.29443359375, -184.3859100341797, 14.316207885742188, 55.99983596801758], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000411.npy"}
{"epoch": 0.6035242290748899, "step": 412, "batch_size": 64, "mean": 238.66929626464844, "std": 318.6330871582031, "min": -509.2379150390625, "p10": -129.84920806884762, "median": 206.5784912109375, "p90": 627.7627685546875, "max": 1209.3548583984375, "pos_frac": 0.765625, "sample": [-141.48336791992188, 201.46621704101562, 465.59124755859375, 438.2127685546875, 365.78173828125, -49.536033630371094, 625.8040161132812, -46.499183654785156, 187.7457275390625, 554.9964599609375, 155.38995361328125, 341.2074890136719, 53.50384521484375, 364.1913146972656, 27.633893966674805, 628.6022338867188, 369.1965026855469, 110.51081085205078, -478.92828369140625, 211.69076538085938, 52.78827667236328, 2.6766738891601562, 641.0067749023438, 197.0278778076172, 71.92596435546875, 354.9390563964844, -2.642120361328125, 335.1309509277344, -156.63320922851562, 222.30279541015625, -76.77208709716797, 82.3915786743164, 168.27764892578125, 268.7208557128906, 125.73500061035156, -72.06858825683594, -313.99932861328125, -102.70283508300781, 116.49639129638672, 877.8380126953125, 13.90826416015625, -509.2379150390625, 407.896484375, 574.0552368164062, 691.7962646484375, -144.6564483642578, 443.88507080078125, 779.010009765625, -6.229339599609375, 522.353759765625, 354.39447021484375, 30.940275192260742, 122.12608337402344, 614.779296875, 583.9539794921875, 1209.3548583984375, -61.661869049072266, 408.5614318847656, 405.502685546875, -171.12738037109375, 448.3690185546875, 317.41156005859375, 639.92236328125, 422.0083923339844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000412.npy"}
{"epoch": 0.604992657856094, "step": 413, "batch_size": 64, "mean": 244.07077026367188, "std": 325.5622863769531, "min": -876.3580932617188, "p10": -141.219024658203, "median": 244.57884979248047, "p90": 592.4346130371094, "max": 980.8792114257812, "pos_frac": 0.828125, "sample": [19.268402099609375, 7.249347686767578, 154.09661865234375, 391.87359619140625, 191.427978515625, 166.3169403076172, 168.96426391601562, 200.5269775390625, 93.3434829711914, 215.88272094726562, 551.3377075195312, 944.1611328125, -17.20838165283203, 250.24432373046875, -36.016502380371094, -12.440595626831055, 377.8490905761719, 165.32949829101562, -13.665023803710938, 798.8075561523438, 96.97550964355469, 556.1380004882812, 477.3130798339844, -876.3580932617188, 75.72903442382812, 310.04083251953125, 318.42431640625, 177.32623291015625, 558.4601440429688, 980.8792114257812, 328.80438232421875, 287.0459899902344, 674.593505859375, 388.18896484375, -186.30581665039062, 540.8416137695312, 308.9235534667969, 146.65032958984375, 361.7501525878906, 322.69354248046875, 55.98143005371094, 248.3148956298828, 595.8460693359375, -336.87261962890625, 584.4745483398438, 449.16729736328125, 240.84280395507812, -277.0918273925781, -306.0789794921875, -511.931884765625, 161.3677520751953, 443.78509521484375, 453.10552978515625, 756.7218017578125, 13.53249740600586, 510.4996337890625, 238.1636962890625, 616.8056640625, 164.0904083251953, 489.8782958984375, 103.28216552734375, 286.52301025390625, 420.47064208984375, -245.81207275390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000413.npy"}
{"epoch": 0.6064610866372981, "step": 414, "batch_size": 64, "mean": 266.1756286621094, "std": 325.603759765625, "min": -883.7804565429688, "p10": -52.174200820922835, "median": 267.05589294433594, "p90": 719.998797607422, "max": 1045.8499755859375, "pos_frac": 0.84375, "sample": [330.5924072265625, 347.7389831542969, 371.2596435546875, 263.8789978027344, -39.03792953491211, 1045.8499755859375, 270.2327880859375, -883.7804565429688, 25.518783569335938, 143.03704833984375, 303.00537109375, 247.34539794921875, 685.2637329101562, 828.17236328125, 312.0235900878906, -57.80403137207031, 425.6103515625, 80.37478637695312, -81.73250579833984, 105.02570343017578, 186.38870239257812, 534.31689453125, -36.09440994262695, 144.51358032226562, 524.5864868164062, 318.321533203125, -197.52224731445312, -13.48735237121582, 491.00445556640625, 27.078445434570312, 764.309326171875, 478.5042724609375, 20.959735870361328, 387.648193359375, 734.88525390625, 94.06913757324219, 361.04638671875, -59.56715393066406, -307.45550537109375, 236.51744079589844, 278.2039794921875, 296.07781982421875, 627.6044921875, -266.1809387207031, 96.16148376464844, 489.9726867675781, 30.19598388671875, 584.756591796875, 183.29754638671875, 34.11079025268555, 219.6238250732422, 163.99192810058594, 165.44827270507812, 70.12991333007812, 270.8958740234375, 307.63653564453125, 821.689697265625, 26.499420166015625, 439.4817199707031, 944.2708129882812, 948.1436767578125, 425.68212890625, 135.93923950195312, 329.0085754394531], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000414.npy"}
{"epoch": 0.6079295154185022, "step": 415, "batch_size": 64, "mean": 198.95370483398438, "std": 306.0118713378906, "min": -789.0570068359375, "p10": -114.93735122680664, "median": 174.26956176757812, "p90": 638.7980712890629, "max": 958.973876953125, "pos_frac": 0.75, "sample": [245.50271606445312, -99.54232788085938, 369.5544128417969, 789.2943725585938, -30.536178588867188, 718.663818359375, 521.0176391601562, 126.4485092163086, -55.798667907714844, 152.54591369628906, 521.746826171875, 319.7212219238281, 432.68609619140625, 414.8548583984375, -187.75015258789062, -37.45823287963867, 228.27871704101562, -171.52963256835938, 335.11627197265625, 229.90406799316406, 6.793388366699219, 53.67010498046875, 7.172943115234375, 790.1512451171875, -139.41366577148438, 151.62509155273438, 688.962890625, -112.17154693603516, 111.81946563720703, 394.224609375, -31.094329833984375, 340.3221435546875, 254.1396484375, 35.73980712890625, 958.973876953125, -308.07037353515625, -116.12269592285156, 119.15260314941406, 482.79937744140625, 733.5068359375, 183.10000610351562, 342.0047302246094, 144.54327392578125, 48.04052734375, 158.6290283203125, 230.92823791503906, 30.15459632873535, 302.24774169921875, 455.72430419921875, -73.39777374267578, 206.7037353515625, 403.6095886230469, 205.8202667236328, 10.42724609375, 395.03948974609375, -2.586362838745117, 165.43911743164062, -19.921112060546875, 401.4276428222656, -380.1174011230469, 725.7767333984375, 62.29533004760742, -789.0570068359375, 281.3040466308594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000415.npy"}
{"epoch": 0.6093979441997063, "step": 416, "batch_size": 64, "mean": 308.59735107421875, "std": 286.38604736328125, "min": -151.015869140625, "p10": -55.71486778259277, "median": 267.10589599609375, "p90": 697.0240051269532, "max": 965.1353759765625, "pos_frac": 0.8125, "sample": [774.6727905273438, 100.08606719970703, 151.8172607421875, 403.994873046875, -72.84566497802734, 607.0196533203125, 181.19253540039062, 110.9906234741211, 511.750244140625, 684.5873413085938, -99.61930084228516, -77.49020385742188, 53.463905334472656, 329.79998779296875, 234.32260131835938, 513.3463745117188, 539.7689819335938, 3.9121932983398438, 599.6174926757812, 169.51426696777344, 414.8570861816406, 441.0513916015625, 137.7615203857422, 182.38577270507812, 580.8973999023438, 236.98321533203125, 301.10247802734375, 726.7959594726562, 89.84194946289062, 702.35400390625, 273.2950439453125, 557.4732666015625, 965.1353759765625, -151.015869140625, -116.4752197265625, 60.219871520996094, 559.7897338867188, 475.6614990234375, -53.43799591064453, 833.619384765625, -4.383308410644531, 964.7872924804688, 263.1200866699219, 237.84715270996094, 462.55706787109375, 482.04473876953125, 387.29632568359375, 621.4483032226562, 236.19313049316406, 33.074562072753906, -13.624408721923828, 215.3594207763672, -86.67135620117188, 761.0205688476562, 383.6983337402344, -51.184844970703125, 340.33331298828125, -56.690670013427734, 171.4405975341797, -22.951583862304688, 114.6578598022461, 271.0917053222656, 441.4151916503906, 660.1526489257812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000416.npy"}
{"epoch": 0.6108663729809104, "step": 417, "batch_size": 64, "mean": 267.00482177734375, "std": 271.95977783203125, "min": -352.7342224121094, "p10": -62.49158096313476, "median": 240.86483001708984, "p90": 672.2350524902345, "max": 922.901123046875, "pos_frac": 0.84375, "sample": [51.02405548095703, 18.043655395507812, -16.438817977905273, 322.39105224609375, 3.983989715576172, 477.7626953125, 79.05580139160156, 360.4866943359375, -127.0533676147461, 676.3372192382812, 24.60336685180664, 573.302734375, 340.44989013671875, 78.11585998535156, 246.3627166748047, -65.79518127441406, -140.10269165039062, 720.4628295898438, 687.437744140625, 73.40658569335938, 662.663330078125, 206.33633422851562, 791.9161987304688, 235.366943359375, 660.36328125, 922.901123046875, 357.3431091308594, 317.57635498046875, 196.86502075195312, 357.1894836425781, 128.78204345703125, 200.70619201660156, -74.17374420166016, 125.91654205322266, 353.8196105957031, 594.2181396484375, 53.62820816040039, 409.2265625, 417.6870422363281, 173.26161193847656, 131.71958923339844, 343.57904052734375, 223.1887664794922, 299.7948303222656, -79.66021728515625, 319.74200439453125, 90.87761688232422, 132.66592407226562, 496.30859375, 283.20404052734375, 440.73553466796875, -352.7342224121094, 254.20498657226562, 387.5096130371094, -21.191551208496094, -241.7379150390625, 180.59075927734375, 123.39452362060547, 606.267333984375, 414.4764099121094, 690.6397094726562, 156.9276580810547, 787.1580810546875, -54.783180236816406], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000417.npy"}
{"epoch": 0.6123348017621145, "step": 418, "batch_size": 64, "mean": 267.85089111328125, "std": 376.42333984375, "min": -407.4979248046875, "p10": -187.93696441650388, "median": 194.14337921142578, "p90": 777.9847351074221, "max": 1276.6951904296875, "pos_frac": 0.734375, "sample": [214.04086303710938, -176.93679809570312, 171.04571533203125, -56.78279495239258, 144.3920440673828, 955.2317504882812, 66.96027374267578, 78.326171875, 720.5958862304688, 589.0531005859375, -14.71282958984375, -31.351104736328125, 521.2084350585938, -63.00023651123047, 1143.3724365234375, 623.9398803710938, 404.078369140625, -192.6513214111328, 430.6601257324219, 31.5418701171875, -271.8824768066406, 802.5799560546875, -164.37445068359375, 451.2493591308594, 226.60897827148438, 97.49296569824219, 295.40435791015625, 635.9270629882812, 372.82763671875, 218.09292602539062, 549.9320068359375, 65.44886779785156, 132.72811889648438, 837.687744140625, -407.4979248046875, -199.43756103515625, -204.62911987304688, 435.3873291015625, 243.18919372558594, 541.3570556640625, 1276.6951904296875, -54.48802947998047, 54.264373779296875, 132.69869995117188, 321.8414306640625, 174.2458953857422, 141.09783935546875, 144.8293914794922, -319.7139892578125, -313.006103515625, 367.78094482421875, -12.817092895507812, 575.7000732421875, -63.93901062011719, 583.7924194335938, 269.2185363769531, 609.6849975585938, 1155.0689697265625, 121.8960952758789, 37.24106979370117, 541.8699951171875, 917.5262451171875, 321.0559997558594, -57.19158935546875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000418.npy"}
{"epoch": 0.6138032305433186, "step": 419, "batch_size": 64, "mean": 294.68603515625, "std": 317.0032958984375, "min": -454.5220947265625, "p10": -72.68016967773437, "median": 298.9014892578125, "p90": 715.9300415039063, "max": 940.69873046875, "pos_frac": 0.828125, "sample": [823.9533081054688, -364.6188659667969, 333.5043640136719, 589.5771484375, 98.03827667236328, 735.0540771484375, -72.66085815429688, -35.42226028442383, 26.35345458984375, 685.6383666992188, 908.3314208984375, 161.07455444335938, 310.62237548828125, 540.6019287109375, 495.8589782714844, 522.3831787109375, 156.69818115234375, 160.0624237060547, -454.5220947265625, 51.995628356933594, -90.39434051513672, 713.735107421875, -246.78868103027344, 285.8938293457031, 498.88763427734375, -263.9625244140625, 153.937255859375, 178.92393493652344, 518.5474853515625, 857.0205688476562, 389.2344970703125, 63.7694091796875, 456.27886962890625, 330.2181396484375, 425.540283203125, 110.9740219116211, 146.40980529785156, 67.85263061523438, 489.5542297363281, 378.2091064453125, 653.9918212890625, 183.29489135742188, 648.9381103515625, 46.24876022338867, 97.50328826904297, 83.80424499511719, -36.39373779296875, -72.68844604492188, 716.8707275390625, 581.66845703125, 608.550048828125, 448.52923583984375, 82.22122192382812, 940.69873046875, 398.9360656738281, 413.1546936035156, 468.14654541015625, 287.18060302734375, 138.70309448242188, 717.8302612304688, -159.99131774902344, -27.168304443359375, 156.27645874023438, 347.2375183105469], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000419.npy"}
{"epoch": 0.6152716593245228, "step": 420, "batch_size": 64, "mean": 178.9881591796875, "std": 379.40399169921875, "min": -810.14697265625, "p10": -255.35082397460937, "median": 115.44184112548828, "p90": 603.7057495117188, "max": 1176.0589599609375, "pos_frac": 0.671875, "sample": [-246.38433837890625, 212.46273803710938, 48.894248962402344, -3.8896827697753906, 602.3338623046875, -91.85994720458984, -259.193603515625, -22.3970947265625, 237.1442413330078, 604.293701171875, -39.06990051269531, -110.87999725341797, -41.34932327270508, 160.35740661621094, -57.9639892578125, 90.24568939208984, 551.29296875, 331.8228454589844, 88.81443786621094, 287.1098327636719, 64.25750732421875, 252.31375122070312, -64.28921508789062, 396.6509094238281, -99.63265991210938, 121.60455322265625, 34.60289001464844, 456.66619873046875, 109.68352508544922, -118.10260009765625, 504.7649230957031, 447.5986328125, 414.6241455078125, -288.96697998046875, 121.20015716552734, 247.21829223632812, -337.89678955078125, -10.254417419433594, 687.9579467773438, 760.1005859375, -27.377614974975586, -810.14697265625, 622.3203125, 398.6975402832031, 589.9427490234375, 143.0848846435547, 499.0873107910156, 409.6041564941406, 434.62347412109375, 335.8251037597656, 59.605735778808594, -515.0216064453125, 205.4874725341797, 42.53726577758789, 47.20026779174805, 1134.2744140625, -596.7621459960938, 56.36806106567383, 105.52751159667969, 370.1923828125, 1176.0589599609375, -50.52336883544922, -384.6477355957031, 1167.3980712890625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000420.npy"}
{"epoch": 0.6167400881057269, "step": 421, "batch_size": 64, "mean": 207.74978637695312, "std": 287.7579345703125, "min": -755.8883056640625, "p10": -151.31679077148436, "median": 255.62740325927734, "p90": 525.1832885742188, "max": 811.5947265625, "pos_frac": 0.78125, "sample": [301.3463439941406, 64.01364135742188, 137.03456115722656, -154.5386962890625, 257.2454833984375, 504.0718994140625, 736.5586547851562, -36.007171630859375, 136.1851348876953, 100.29609680175781, 284.6795654296875, 22.19411849975586, 587.886962890625, 676.8580322265625, 496.458984375, 527.432373046875, 308.76605224609375, -135.9488067626953, 139.55067443847656, 221.1181640625, 735.4571533203125, 506.8077392578125, 62.81488037109375, -209.62344360351562, 253.4047088623047, 222.96949768066406, 407.84368896484375, 280.66046142578125, 35.928749084472656, 254.0093231201172, 519.9354248046875, 310.9141845703125, 599.2933349609375, 314.2663879394531, 354.980712890625, -249.92755126953125, 368.6976013183594, 345.29998779296875, -384.7272033691406, 508.7304382324219, 285.56451416015625, -18.589468002319336, 355.0543212890625, 811.5947265625, 411.4552917480469, 301.13037109375, 201.68209838867188, 18.771467208862305, 12.490701675415039, 384.72454833984375, 96.77296447753906, -17.43675994873047, 319.41583251953125, -755.8883056640625, -185.55052185058594, 105.30982971191406, -12.154296875, -143.79901123046875, -131.64862060546875, 265.541259765625, 294.4447937011719, -285.5397644042969, 214.45999145507812, 355.2421875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000421.npy"}
{"epoch": 0.618208516886931, "step": 422, "batch_size": 64, "mean": 230.2337646484375, "std": 313.5883483886719, "min": -756.7940673828125, "p10": -71.43351440429687, "median": 221.06221771240234, "p90": 613.918505859375, "max": 1125.3262939453125, "pos_frac": 0.78125, "sample": [262.3699951171875, 648.362060546875, 815.3049926757812, 504.80865478515625, 291.38604736328125, -201.3013458251953, -420.88067626953125, 610.97119140625, -37.129844665527344, 1125.3262939453125, -66.47175598144531, 176.31968688964844, 183.68577575683594, 43.39788818359375, 363.4736022949219, 179.72244262695312, 42.381065368652344, -60.868492126464844, 6.085422515869141, 231.8047637939453, 43.239990234375, 752.4805908203125, 326.8604736328125, 67.46817016601562, 438.3871765136719, 193.00006103515625, 122.57977294921875, 604.6358642578125, 242.68606567382812, 615.181640625, 726.0272216796875, 365.765380859375, 324.6454772949219, 174.4174041748047, 354.51629638671875, 339.92425537109375, 308.99810791015625, -293.72674560546875, 330.0157165527344, -15.812164306640625, -46.72298049926758, 378.92718505859375, 210.31967163085938, -756.7940673828125, -58.72335433959961, 544.907958984375, 458.8896484375, 109.84913635253906, 658.52880859375, -33.47520446777344, 393.5148010253906, 193.1118927001953, 82.31072998046875, 48.55888748168945, 432.36517333984375, 442.6853332519531, 245.12640380859375, -73.55998229980469, 397.665283203125, 503.8457946777344, -153.7437744140625, 167.0352020263672, 200.67837524414062, -330.37884521484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000422.npy"}
{"epoch": 0.6196769456681351, "step": 423, "batch_size": 64, "mean": 256.3762512207031, "std": 304.4154357910156, "min": -498.33880615234375, "p10": -69.86052169799804, "median": 204.15470123291016, "p90": 682.1188110351562, "max": 1057.743408203125, "pos_frac": 0.78125, "sample": [227.94821166992188, 137.70632934570312, 169.57350158691406, 47.04095458984375, -106.333740234375, -321.5530700683594, 835.1232299804688, 243.06658935546875, 629.6881103515625, 452.8621520996094, 684.0331420898438, -75.6524887084961, 881.277099609375, 197.076904296875, 495.2583923339844, -28.086963653564453, 784.2783203125, 814.343994140625, 712.9874267578125, 677.6520385742188, -50.87457275390625, -9.412353515625, 373.802001953125, 180.32470703125, 576.9441528320312, 153.15069580078125, 28.264429092407227, 27.753387451171875, -498.33880615234375, -163.67855834960938, 370.7271728515625, -88.11982727050781, 315.9173278808594, 312.2222595214844, 642.9381103515625, 249.24185180664062, -7.349748611450195, 399.36737060546875, -179.35374450683594, 150.75271606445312, 283.99786376953125, 498.8349609375, 234.89675903320312, 266.6310729980469, 1057.743408203125, 142.20692443847656, 382.56854248046875, -30.055633544921875, 463.41064453125, 19.586471557617188, 116.66041564941406, 112.25303649902344, 463.22552490234375, 409.5602722167969, 175.538330078125, 210.6759490966797, -56.34593200683594, 197.63345336914062, 143.73922729492188, -49.269622802734375, 551.1071166992188, 116.13748931884766, 142.1865234375, 312.5891418457031], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000423.npy"}
{"epoch": 0.6211453744493393, "step": 424, "batch_size": 64, "mean": 247.9236602783203, "std": 335.3094787597656, "min": -300.5739440917969, "p10": -194.3860900878906, "median": 240.60821533203125, "p90": 653.5700134277346, "max": 1175.8975830078125, "pos_frac": 0.765625, "sample": [-300.5739440917969, 370.8070068359375, 38.50718688964844, -90.03791809082031, -171.64373779296875, 498.5238037109375, 402.1526794433594, 311.88555908203125, 52.773597717285156, -204.1328125, 283.869873046875, 269.1526184082031, -54.83622741699219, -230.9601593017578, 236.48838806152344, 672.3949584960938, 436.961181640625, 879.6279296875, 121.67256164550781, 448.4465026855469, 536.4755249023438, 258.6002502441406, 609.6451416015625, 549.5900268554688, 319.6345520019531, -120.82062530517578, 219.99710083007812, -17.940879821777344, -213.248779296875, -125.49285888671875, 137.13291931152344, 1161.98046875, -217.95370483398438, 287.9710998535156, 319.59783935546875, -64.9306640625, 334.02801513671875, -244.96046447753906, 204.69924926757812, 141.16989135742188, -87.5241928100586, 104.23416900634766, -278.4765930175781, 33.814395904541016, 294.91693115234375, 329.39398193359375, 232.59219360351562, 91.46212005615234, 302.1771240234375, 1175.8975830078125, 404.1610107421875, 767.0736083984375, 396.7101135253906, 244.72804260253906, 374.79425048828125, 95.88348388671875, 849.2341918945312, 209.25942993164062, 193.37721252441406, 1105.8836669921875, 498.3559875488281, 260.3297424316406, 156.94496154785156, 65.6378173828125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000424.npy"}
{"epoch": 0.6226138032305433, "step": 425, "batch_size": 64, "mean": 196.43971252441406, "std": 309.2083435058594, "min": -638.4942626953125, "p10": -94.68398590087891, "median": 196.0566864013672, "p90": 568.1730590820314, "max": 906.617919921875, "pos_frac": 0.796875, "sample": [-91.62030029296875, 10.68342399597168, 451.6860656738281, -423.88751220703125, -563.5529174804688, 214.50888061523438, 69.85484313964844, 310.3718566894531, 91.0257568359375, 239.7809600830078, 126.35718536376953, 542.4719848632812, -638.4942626953125, 39.712215423583984, 98.85688781738281, 118.65045166015625, 29.105812072753906, 91.51654052734375, 613.8031616210938, 86.08946228027344, 310.6634521484375, 142.0594482421875, -6.654445648193359, 283.72705078125, 674.8543090820312, -0.35002899169921875, 495.84796142578125, 52.25025939941406, -53.7989387512207, 776.5465087890625, 130.37911987304688, 579.1878051757812, 638.6250610351562, -156.04429626464844, 177.6044921875, 293.0345153808594, 527.5301513671875, 164.9380340576172, 43.53631591796875, 698.58544921875, -549.3533935546875, 376.2015380859375, 162.0109405517578, -353.1730041503906, 462.2133483886719, 348.762451171875, 410.69140625, 221.7054443359375, 6.6105804443359375, 361.9599914550781, 227.0958251953125, 242.94363403320312, -95.99699401855469, -80.29168701171875, 290.3275146484375, 350.01617431640625, 480.63494873046875, 906.617919921875, 520.295166015625, 279.4952392578125, 480.05029296875, -25.031089782714844, 268.1135559082031, 90.79854583740234], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000425.npy"}
{"epoch": 0.6240822320117474, "step": 426, "batch_size": 64, "mean": 245.40122985839844, "std": 301.22686767578125, "min": -602.857177734375, "p10": -43.348996734619135, "median": 194.6224822998047, "p90": 690.2280700683594, "max": 903.5559692382812, "pos_frac": 0.8125, "sample": [269.9084167480469, 101.7535171508789, 91.69718170166016, 152.87222290039062, 192.2606201171875, 58.85731506347656, 97.60089111328125, 196.98434448242188, 791.9276733398438, 126.64430236816406, 169.66197204589844, 242.81988525390625, 1.6802940368652344, -16.683828353881836, 669.1780395507812, -210.97869873046875, 79.40040588378906, 244.96682739257812, 489.6110534667969, 199.81253051757812, 5.531455993652344, 727.020751953125, -127.8437728881836, 258.46514892578125, 118.19514465332031, 122.31561279296875, -126.87535095214844, 354.78814697265625, 809.8961181640625, 554.4771118164062, 293.4164123535156, 599.9024658203125, 64.16390228271484, 372.3013610839844, 160.44850158691406, 786.5035400390625, 92.01458740234375, -315.02801513671875, 169.24765014648438, 405.9249267578125, -32.171142578125, 347.3392333984375, 903.5559692382812, -4.515205383300781, 431.18524169921875, 25.666954040527344, 313.18377685546875, 544.5094604492188, -36.4578857421875, 161.5616912841797, -278.7994384765625, 295.59613037109375, -8.899524688720703, 409.3904724121094, 570.7952270507812, 311.55859375, -46.302330017089844, 468.94091796875, 776.8004150390625, -602.857177734375, 574.4166870117188, 699.24951171875, 65.27255249023438, 541.818115234375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000426.npy"}
{"epoch": 0.6255506607929515, "step": 427, "batch_size": 64, "mean": 181.67835998535156, "std": 274.5413513183594, "min": -386.97662353515625, "p10": -170.5698760986328, "median": 153.9450912475586, "p90": 471.9296966552738, "max": 938.0718994140625, "pos_frac": 0.75, "sample": [309.4147644042969, 93.15013122558594, 294.30487060546875, 369.0985412597656, 769.5361328125, 99.67064666748047, -19.589136123657227, -29.703636169433594, 360.2727355957031, 232.47567749023438, -26.302703857421875, 14.632247924804688, 99.59640502929688, 149.26161193847656, 130.74488830566406, 325.88690185546875, 270.3330993652344, 858.3824462890625, 27.79706573486328, -179.44830322265625, 30.98925018310547, 123.54632568359375, 310.51983642578125, 374.89154052734375, 158.62857055664062, 306.7181396484375, 314.8094177246094, 899.6793823242188, 37.15187072753906, 193.19198608398438, -179.58087158203125, 512.55908203125, 319.835693359375, -140.65725708007812, -88.64799499511719, 302.28839111328125, 298.0032958984375, -39.177242279052734, 551.7120361328125, 203.916748046875, 307.7861328125, 127.40473937988281, 98.9165267944336, 139.22508239746094, -386.97662353515625, 300.3101806640625, 351.439453125, 380.41900634765625, -346.9888916015625, -214.23007202148438, 508.85455322265625, 223.64743041992188, 385.7716979980469, 33.31175994873047, 938.0718994140625, -149.85354614257812, 57.87828063964844, 317.9048156738281, 375.8778991699219, 114.69461059570312, -90.0173568725586, -244.5433807373047, -24.511741638183594, -216.87059020996094], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000427.npy"}
{"epoch": 0.6270190895741556, "step": 428, "batch_size": 64, "mean": 245.36434936523438, "std": 383.0762939453125, "min": -766.1964111328125, "p10": -199.02544403076172, "median": 249.25514221191406, "p90": 684.5253295898438, "max": 1221.668701171875, "pos_frac": 0.75, "sample": [-74.96995544433594, -251.23532104492188, 39.670982360839844, 435.0060729980469, 875.4212646484375, 436.91131591796875, 125.29579162597656, 623.4058837890625, 419.35723876953125, 1221.668701171875, -259.3076477050781, 114.06842041015625, 513.572509765625, 131.25094604492188, 433.3609924316406, -83.21343994140625, -551.2911376953125, 688.34619140625, 356.0789794921875, 610.3126220703125, 484.927001953125, 347.6197509765625, -99.25814819335938, 1.3219871520996094, 508.5746765136719, 751.5360717773438, 323.87689208984375, 251.5888671875, -110.47705078125, 282.1159973144531, -59.15636444091797, 334.65484619140625, 718.6783447265625, -94.95507049560547, 246.92141723632812, 79.39726257324219, 630.8229370117188, 549.2900390625, 38.26091384887695, 101.2938232421875, 55.417789459228516, 149.4976806640625, -24.46649932861328, -201.40432739257812, -766.1964111328125, -179.87550354003906, 37.25245666503906, 154.35372924804688, 1094.5537109375, 169.10540771484375, 572.0960083007812, 436.58990478515625, 584.47412109375, -661.7639770507812, 526.3536987304688, 289.919921875, 644.4425048828125, 675.6099853515625, 379.58349609375, 226.2949676513672, 699.2735595703125, -193.47471618652344, 148.6671600341797, -203.73129272460938], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000428.npy"}
{"epoch": 0.6284875183553598, "step": 429, "batch_size": 64, "mean": 194.86798095703125, "std": 350.1412658691406, "min": -491.51031494140625, "p10": -269.6597030639648, "median": 197.79464721679688, "p90": 611.7719604492188, "max": 1131.892822265625, "pos_frac": 0.71875, "sample": [93.98738861083984, 205.72787475585938, -418.7008056640625, 321.8895263671875, -130.41241455078125, 699.3735961914062, -172.9110107421875, 145.03265380859375, 142.37823486328125, 399.62066650390625, -436.6837158203125, 174.96554565429688, 78.2051010131836, 146.7133331298828, -282.82196044921875, 349.6371765136719, 125.41067504882812, -358.0054931640625, 731.83935546875, -238.94776916503906, 1131.892822265625, 899.876953125, -17.1285400390625, 463.5923156738281, 189.86141967773438, 23.69711685180664, 315.31170654296875, 40.13154220581055, 585.9496459960938, 612.1424560546875, 237.05810546875, 307.4229736328125, 399.6075439453125, -491.51031494140625, -173.33786010742188, -196.5927276611328, -82.9073715209961, 235.42552185058594, 224.61856079101562, -131.1206817626953, 610.907470703125, 386.22314453125, 246.48452758789062, 4.95391845703125, 857.7662963867188, 327.1673278808594, 515.3139038085938, -199.94589233398438, 108.52180480957031, -327.1919250488281, 247.22817993164062, 164.23304748535156, 501.96466064453125, 271.28350830078125, -78.21137237548828, -395.6521911621094, -6.5465240478515625, 263.1466064453125, 44.844154357910156, 587.931884765625, 439.6214904785156, 764.757568359375, 515.7303466796875, 470.73077392578125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000429.npy"}
{"epoch": 0.6299559471365639, "step": 430, "batch_size": 64, "mean": 205.6468505859375, "std": 282.02313232421875, "min": -287.77392578125, "p10": -80.53417739868163, "median": 151.93719482421875, "p90": 630.3252014160157, "max": 1069.782958984375, "pos_frac": 0.75, "sample": [223.43988037109375, 190.4602508544922, -65.73226165771484, -72.77053833007812, 685.3973388671875, 363.7509460449219, -28.084381103515625, 243.95526123046875, 219.2303466796875, 511.10015869140625, 103.10441589355469, -31.465797424316406, 50.82892608642578, -81.25174713134766, 198.5780487060547, 636.5925903320312, -287.77392578125, 404.2548828125, 76.26817321777344, 59.88869857788086, 153.30421447753906, 100.5702896118164, 180.3518524169922, 102.32286834716797, -71.68891143798828, 142.5924530029297, 308.556884765625, 396.1211242675781, -75.35549926757812, 161.43348693847656, 103.27058410644531, -157.5066375732422, 1069.782958984375, -17.777603149414062, 118.81513214111328, 550.5946044921875, 2.0060958862304688, -275.143798828125, 20.866750717163086, 240.3965606689453, 400.899169921875, 736.19140625, 469.8955993652344, 140.85797119140625, 259.81298828125, 718.6763305664062, -132.3826446533203, -144.45458984375, 615.7012939453125, 114.26057434082031, 122.47657775878906, -78.85984802246094, 462.9036560058594, 241.7022247314453, -12.937816619873047, 376.01947021484375, 342.82904052734375, -253.69207763671875, 331.8798522949219, 799.8310546875, 765.9197387695312, 233.15496826171875, 46.85834503173828, 150.57017517089844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000430.npy"}
{"epoch": 0.631424375917768, "step": 431, "batch_size": 64, "mean": 294.536865234375, "std": 326.7882385253906, "min": -534.6610107421875, "p10": -104.31574478149412, "median": 294.99786376953125, "p90": 662.0416992187501, "max": 1032.3841552734375, "pos_frac": 0.828125, "sample": [-327.04656982421875, 176.73878479003906, -162.44468688964844, 476.5483703613281, 272.8796081542969, 116.00055694580078, 848.128662109375, 191.3655548095703, 420.5711364746094, 542.7560424804688, 243.82766723632812, 480.9305114746094, 54.442596435546875, -84.80491638183594, -112.67752838134766, 143.47019958496094, 29.683929443359375, -433.1270751953125, 553.6602783203125, -216.63731384277344, 650.4139404296875, 535.1871337890625, 1032.3841552734375, 881.9671630859375, 125.11933898925781, 614.5711669921875, 391.3472595214844, 380.9810485839844, 296.36669921875, 368.3150634765625, 754.2197265625, 298.1630554199219, 317.1400146484375, -9.787708282470703, 425.620361328125, -534.6610107421875, 925.3548583984375, 9.922134399414062, 235.99197387695312, -44.178245544433594, 534.181640625, 194.6242218017578, 975.9655151367188, -19.333663940429688, 111.42701721191406, 249.0721893310547, 468.4508056640625, 24.09208106994629, 233.21446228027344, 296.4442138671875, 406.1252746582031, 667.0250244140625, 547.9964599609375, 360.05767822265625, 542.5597534179688, 466.2340087890625, 178.94549560546875, 127.00638580322266, -210.2593994140625, 222.30833435058594, 628.990478515625, 293.6290283203125, 148.29212951660156, 534.6078491210938], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000431.npy"}
{"epoch": 0.6328928046989721, "step": 432, "batch_size": 64, "mean": 290.38677978515625, "std": 307.05523681640625, "min": -277.8949890136719, "p10": -14.568104171752926, "median": 249.69974517822266, "p90": 735.7304992675782, "max": 1351.1097412109375, "pos_frac": 0.859375, "sample": [32.17063903808594, 243.08428955078125, 267.55859375, 257.23040771484375, 256.7061767578125, 236.5428924560547, 127.37696075439453, -10.971324920654297, 172.98516845703125, 92.97773742675781, 20.946796417236328, 27.384021759033203, 557.4024658203125, 462.509765625, -93.99708557128906, 1351.1097412109375, 828.1991577148438, 201.50790405273438, -7.2919769287109375, 152.6768798828125, 111.04791259765625, 148.78524780273438, 254.25192260742188, 50.033416748046875, 502.36761474609375, 260.1048583984375, 744.1990356445312, 166.2352752685547, 425.443115234375, 73.56134033203125, 320.8701477050781, -16.109580993652344, -256.5851745605469, 245.14756774902344, -148.612060546875, 572.593017578125, 128.35289001464844, -116.40895080566406, 356.8293762207031, 95.22398376464844, 237.8609619140625, -173.05221557617188, 346.331298828125, 453.50469970703125, 315.76666259765625, 414.9752502441406, 678.904541015625, 439.1500244140625, 415.35052490234375, 438.16790771484375, 147.47291564941406, 451.62347412109375, 46.78340148925781, 829.5875244140625, 281.8720397949219, 134.64773559570312, 755.4334106445312, 443.1611022949219, 708.7521362304688, 930.8267822265625, -277.8949890136719, 754.6315307617188, 1.4856643676757812, 715.9705810546875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000432.npy"}
{"epoch": 0.6343612334801763, "step": 433, "batch_size": 64, "mean": 196.98556518554688, "std": 346.1278991699219, "min": -864.6849365234375, "p10": -196.83184204101562, "median": 238.92959594726562, "p90": 582.4096618652344, "max": 779.1370849609375, "pos_frac": 0.71875, "sample": [22.088287353515625, 506.99127197265625, 484.5827331542969, 521.6681518554688, 443.37664794921875, -170.0580596923828, 319.2345275878906, -618.4719848632812, 470.5585632324219, 705.0670166015625, -864.6849365234375, -13.236099243164062, 631.30126953125, 395.5535583496094, 134.327392578125, -165.5002899169922, 103.83433532714844, 305.82183837890625, 682.458740234375, -833.8828735351562, -17.80664825439453, 590.3157348632812, 656.6168212890625, 292.2978515625, 172.43817138671875, 196.46653747558594, 34.58155822753906, 213.63906860351562, -40.32670593261719, 480.9739074707031, 117.1064453125, 779.1370849609375, -319.7008972167969, 499.4287109375, 391.03680419921875, 468.5751953125, 400.82940673828125, -234.1248779296875, 377.3697509765625, 3.1413040161132812, 703.382080078125, 563.484619140625, 239.463623046875, -75.12480926513672, 271.28118896484375, 87.40323638916016, 395.3137512207031, -5.715415954589844, -200.7820587158203, -208.71356201171875, 563.962158203125, 455.2153625488281, 238.39556884765625, 34.37837600708008, 112.21856689453125, -187.6146697998047, -41.10454559326172, 393.8143615722656, -61.188385009765625, -26.529396057128906, 344.0299072265625, 486.02288818359375, 159.84852600097656, 242.61000061035156], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000433.npy"}
{"epoch": 0.6358296622613803, "step": 434, "batch_size": 64, "mean": 238.1170654296875, "std": 401.5022888183594, "min": -463.2184143066406, "p10": -122.11861267089841, "median": 162.93578338623047, "p90": 745.8773193359377, "max": 2300.85302734375, "pos_frac": 0.71875, "sample": [471.5173034667969, 164.42970275878906, 2300.85302734375, 220.9578399658203, -99.88053894042969, 85.08981323242188, 182.52297973632812, 573.8995361328125, 45.44782257080078, -69.39402770996094, 244.81199645996094, -80.93302917480469, 91.50615692138672, 774.2747802734375, -33.1779899597168, 115.867431640625, -160.69317626953125, -463.2184143066406, 496.7054748535156, 679.6165771484375, 816.5738525390625, -180.64889526367188, 146.56362915039062, 257.62188720703125, 472.099609375, 184.48736572265625, -74.79781341552734, -65.64312744140625, 220.6269989013672, 265.3147277832031, 27.4443359375, 161.44186401367188, 453.9423828125, -131.6492156982422, -0.3421478271484375, 423.71246337890625, 100.77494812011719, 44.7967529296875, 251.0909423828125, 177.61557006835938, -16.82403564453125, 461.6485900878906, 626.8482666015625, 81.00682067871094, 122.1258316040039, 225.888916015625, 403.1839294433594, 81.08946228027344, 178.24411010742188, 874.2803955078125, 259.29486083984375, -84.34832763671875, 877.98779296875, -160.02540588378906, 140.1361846923828, 42.419097900390625, 891.0062866210938, 165.2479248046875, -0.581817626953125, -259.5763244628906, 923.8901977539062, 541.676025390625, -8.606063842773438, -217.7500457763672], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000434.npy"}
{"epoch": 0.6372980910425844, "step": 435, "batch_size": 64, "mean": 270.16436767578125, "std": 339.4571533203125, "min": -722.23583984375, "p10": -67.42284545898437, "median": 229.8694839477539, "p90": 710.281219482422, "max": 1188.97998046875, "pos_frac": 0.828125, "sample": [1188.97998046875, 511.6412353515625, -722.23583984375, 425.0024108886719, 563.5513916015625, 200.17269897460938, 317.7635192871094, 526.06591796875, 326.6473083496094, 139.4259796142578, 587.2698364257812, 98.45527648925781, 48.792503356933594, 245.3038330078125, -80.73564147949219, 643.0918579101562, 988.9891967773438, 819.3453369140625, 261.93963623046875, -67.68138122558594, 69.30368041992188, 7.3353271484375, -17.404006958007812, 227.01466369628906, -144.82550048828125, 128.65988159179688, 245.9534454345703, 107.62551879882812, -187.8428955078125, 232.72430419921875, 292.74359130859375, 118.92039489746094, 220.7205810546875, 562.2547607421875, 540.5562133789062, 371.30706787109375, 395.8670654296875, -494.5867919921875, 441.2245788574219, -312.15093994140625, 167.99423217773438, 313.1996765136719, 734.8662109375, 21.955841064453125, 94.1463394165039, -66.81959533691406, 717.7452392578125, 667.2833862304688, 51.219791412353516, -65.39776611328125, 896.115478515625, 46.29698181152344, 317.7273864746094, 84.61288452148438, 165.27407836914062, 692.8651733398438, 785.6552734375, 389.5457763671875, 210.0553741455078, 567.6301879882812, 123.16035461425781, -28.907913208007812, 380.96087646484375, 196.14865112304688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000435.npy"}
{"epoch": 0.6387665198237885, "step": 436, "batch_size": 64, "mean": 226.842529296875, "std": 308.173828125, "min": -647.267333984375, "p10": -139.89419631958006, "median": 190.78108978271484, "p90": 585.8865539550782, "max": 1006.951416015625, "pos_frac": 0.796875, "sample": [-60.29301452636719, 158.07350158691406, -119.98257446289062, 76.3781967163086, 509.1080627441406, 269.94140625, 390.6153259277344, 763.692626953125, 585.3119506835938, -168.23452758789062, 83.53825378417969, 911.3179931640625, -42.437225341796875, -145.53501892089844, 298.03192138671875, 239.53457641601562, -164.0255889892578, 44.978973388671875, 947.1324462890625, -282.15802001953125, 440.59765625, 536.9252319335938, 323.2906494140625, 372.86981201171875, 482.5967102050781, 101.91802978515625, 419.1480407714844, 590.7529296875, 49.90586853027344, 74.66828918457031, 1006.951416015625, 60.48102569580078, 508.9169616699219, 151.9499053955078, 286.8920593261719, 177.25579833984375, -41.56660461425781, 32.915992736816406, 278.84954833984375, 123.89849853515625, 383.0018310546875, 45.72269058227539, 207.45790100097656, 167.63604736328125, 187.40586853027344, 333.4112854003906, 269.2981262207031, -160.07928466796875, 180.81838989257812, 194.15631103515625, 250.22171020507812, 208.21319580078125, 440.1135559082031, 586.1328125, 957.0067749023438, -5.8584747314453125, 132.343505859375, 27.952804565429688, -647.267333984375, 273.5115966796875, -126.7322769165039, 76.96246337890625, 445.7332763671875, -183.44699096679688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000436.npy"}
{"epoch": 0.6402349486049926, "step": 437, "batch_size": 64, "mean": 227.85081481933594, "std": 309.9287109375, "min": -639.5846557617188, "p10": -92.21544952392577, "median": 218.5354995727539, "p90": 604.6912963867189, "max": 964.5463256835938, "pos_frac": 0.75, "sample": [105.54203796386719, -307.6271057128906, 284.25030517578125, 640.0574340820312, 320.6492614746094, 2.834524154663086, 571.1180419921875, 142.63848876953125, 355.5374755859375, -77.29983520507812, -639.5846557617188, 203.1998748779297, -328.51373291015625, 528.5936279296875, -21.335479736328125, 177.87046813964844, 224.96597290039062, -301.2021484375, 141.33712768554688, 650.7516479492188, 49.701622009277344, 534.7747192382812, 212.1050262451172, -106.63594818115234, -202.23193359375, 724.741455078125, 148.077392578125, 94.74706268310547, 373.9688720703125, 463.95611572265625, 822.8692626953125, 431.45391845703125, 862.96240234375, -72.14317321777344, 964.5463256835938, -85.65048217773438, -70.16161346435547, 0.4647502899169922, 232.1632080078125, -12.998992919921875, 19.656517028808594, 14.773590087890625, 415.724853515625, 321.38525390625, 461.55322265625, 131.13339233398438, 385.6488037109375, 501.75897216796875, 566.3342895507812, 534.551513671875, 166.34974670410156, 477.9098205566406, 361.71173095703125, -60.57672119140625, -83.31636810302734, 619.079833984375, 349.47509765625, 375.83502197265625, -95.02900695800781, 268.32379150390625, 85.40838623046875, 439.93548583984375, 301.0068359375, -16.67479705810547], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000437.npy"}
{"epoch": 0.6417033773861968, "step": 438, "batch_size": 64, "mean": 249.1588134765625, "std": 322.45452880859375, "min": -602.0903930664062, "p10": -124.97240295410154, "median": 214.99293518066406, "p90": 638.0232421875, "max": 1022.1152954101562, "pos_frac": 0.765625, "sample": [90.92312622070312, -44.739524841308594, 432.19769287109375, -182.34507751464844, 262.1372375488281, 135.46646118164062, 887.6534423828125, -52.12236022949219, 173.47799682617188, -29.15240478515625, 285.0523986816406, 430.8840026855469, -1.1506385803222656, 123.38964080810547, 433.95941162109375, 516.33056640625, 289.238037109375, 1022.1152954101562, 84.55805969238281, 180.8814697265625, 208.87115478515625, 602.989013671875, 435.4967041015625, 561.6388549804688, 156.74465942382812, 221.11471557617188, 39.78845977783203, 328.2561340332031, -399.9762878417969, -55.46868896484375, -80.64151000976562, -32.04618835449219, 456.0242614746094, 742.9342651367188, 639.19921875, 451.3997497558594, 283.22076416015625, -602.0903930664062, 570.2298583984375, 296.8535461425781, 776.7662353515625, 162.22625732421875, -176.7940216064453, 818.8966674804688, -132.23776245117188, 370.5480651855469, 510.05902099609375, 314.8679504394531, 518.517578125, 50.94316864013672, 114.1724853515625, 199.28091430664062, 785.9578857421875, 84.78392028808594, -108.0198974609375, 113.12799072265625, 142.4305419921875, -417.0672607421875, -165.71728515625, 574.366943359375, 160.7838897705078, 635.279296875, 311.00177001953125, 438.6961975097656], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000438.npy"}
{"epoch": 0.6431718061674009, "step": 439, "batch_size": 64, "mean": 216.2918701171875, "std": 358.6029968261719, "min": -482.3184814453125, "p10": -151.85403442382812, "median": 154.43020629882812, "p90": 751.4056518554688, "max": 1183.0576171875, "pos_frac": 0.734375, "sample": [255.87673950195312, 752.8081665039062, 105.21650695800781, 329.49273681640625, 105.5570068359375, -61.436737060546875, 748.1331176757812, 27.602218627929688, 140.85256958007812, 41.38182830810547, -317.908203125, 800.7509155273438, 83.93144226074219, -52.94524383544922, -135.55673217773438, 337.936767578125, 338.3647766113281, 675.4752197265625, 687.3038330078125, 205.51760864257812, 98.45777893066406, 1030.1221923828125, 278.5931701660156, -93.00723266601562, 223.55633544921875, -205.87689208984375, 129.34524536132812, -144.8704376220703, -3.6537017822265625, 20.12343406677246, 330.8461608886719, -264.1727294921875, 343.8562927246094, 494.36212158203125, 1183.0576171875, 357.1219787597656, 552.0478515625, 227.24319458007812, 12.939224243164062, 305.4791259765625, 124.74120330810547, -42.955589294433594, -482.3184814453125, -48.92425537109375, -52.865440368652344, 238.08270263671875, 156.45980834960938, 241.9249267578125, -456.04168701171875, 582.6803588867188, 766.1925659179688, 20.593395233154297, 176.81980895996094, 788.8661499023438, 152.40060424804688, -144.93382263183594, 462.9522705078125, 346.9645690917969, 76.89666748046875, 83.83256530761719, 1078.8721923828125, 401.19854736328125, -417.86456298828125, -154.81983947753906], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000439.npy"}
{"epoch": 0.644640234948605, "step": 440, "batch_size": 64, "mean": 259.22357177734375, "std": 372.509033203125, "min": -252.5276641845703, "p10": -121.57080688476559, "median": 165.93637084960938, "p90": 757.0916015625002, "max": 1683.328125, "pos_frac": 0.796875, "sample": [356.0694885253906, 322.226806640625, 539.306396484375, 387.79925537109375, -199.471435546875, 84.98326873779297, 689.9705810546875, 150.93243408203125, 70.26222229003906, 859.2089233398438, 51.01799774169922, 49.327606201171875, 209.2564697265625, 209.59649658203125, 771.5370483398438, -184.11550903320312, 153.46139526367188, -252.5276641845703, 72.75862121582031, 7.861869812011719, 191.87179565429688, 661.4151000976562, 66.16099548339844, 65.3799819946289, 386.1298828125, 215.0579071044922, -138.14840698242188, 262.77996826171875, 399.99627685546875, 182.61427307128906, 143.22036743164062, 723.3855590820312, -53.279693603515625, 259.61346435546875, 12.727479934692383, -82.88973999023438, 1328.8427734375, -250.60604858398438, 239.45269775390625, 50.43250274658203, 882.1743774414062, 245.4105224609375, 1036.0543212890625, 598.7376708984375, 204.56906127929688, 61.645668029785156, -180.043212890625, 102.41831970214844, 359.2935485839844, 546.70947265625, 170.18475341796875, 792.033203125, -29.42804718017578, 127.79410552978516, 17.746318817138672, 502.885009765625, 47.54581069946289, -40.398162841796875, -71.97257232666016, 161.68798828125, 1683.328125, 560.6361694335938, -199.75225830078125, -2.5712890625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000440.npy"}
{"epoch": 0.6461086637298091, "step": 441, "batch_size": 64, "mean": 240.76620483398438, "std": 449.72418212890625, "min": -704.8296508789062, "p10": -274.24784545898433, "median": 214.5010757446289, "p90": 758.3546020507813, "max": 2054.329345703125, "pos_frac": 0.703125, "sample": [284.0030517578125, 164.99610900878906, 225.92230224609375, 533.8573608398438, 529.912841796875, 736.6396484375, -56.50030517578125, 28.759475708007812, 138.24722290039062, -382.6339416503906, -296.1722717285156, -71.81526947021484, 722.1728515625, 977.3470458984375, 334.2001953125, -517.3765869140625, 116.75563049316406, 774.525146484375, 86.37554931640625, -30.482757568359375, 39.03984832763672, 116.77870178222656, -40.752532958984375, 767.6610107421875, 785.2916870117188, 18.483123779296875, 234.5542449951172, 446.62188720703125, 397.0751953125, 2054.329345703125, 20.33087730407715, -553.6114501953125, 336.02569580078125, 529.61328125, -430.7911376953125, -166.10763549804688, 150.63238525390625, 292.3895263671875, 564.0826416015625, -443.3299560546875, -86.68438720703125, 503.0731201171875, -92.07670593261719, -704.8296508789062, 372.92926025390625, 389.44793701171875, 346.37646484375, 437.1180419921875, 408.7207336425781, -105.238037109375, 203.07984924316406, -223.09085083007812, -105.7677001953125, 469.230712890625, 359.3533935546875, 66.72344970703125, 729.8168334960938, 359.99737548828125, 616.4473876953125, 1224.13037109375, -10.34689712524414, -116.6705551147461, 123.95258331298828, 826.29443359375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000441.npy"}
{"epoch": 0.6475770925110133, "step": 442, "batch_size": 64, "mean": 281.21954345703125, "std": 379.4697265625, "min": -482.519287109375, "p10": -45.18935508728027, "median": 168.4522476196289, "p90": 804.0425537109375, "max": 1211.29638671875, "pos_frac": 0.734375, "sample": [-101.41243743896484, 172.286865234375, 543.7725830078125, -8.387451171875, 305.2149963378906, 140.2667236328125, -42.69939422607422, 258.3354797363281, 64.06880187988281, -8.645572662353516, -11.590778350830078, 424.1211853027344, -32.84162139892578, 440.4083251953125, 389.28289794921875, 698.5195922851562, 961.0360717773438, 321.23297119140625, -2.635528564453125, 43.59208297729492, 140.30889892578125, 671.7191772460938, 106.55888366699219, 254.595947265625, -224.3927459716797, 336.6635437011719, 709.0393676757812, 300.59136962890625, 905.106201171875, 220.220947265625, 18.04193115234375, -482.519287109375, 129.89370727539062, -32.04880905151367, -5.208137512207031, 573.0106811523438, -46.2564811706543, 455.2419738769531, 76.43114471435547, 1159.1058349609375, -272.2974853515625, 413.7610778808594, 804.154541015625, 61.539573669433594, 65.01129150390625, 1144.6629638671875, 741.7315673828125, 256.32745361328125, 549.0516357421875, 82.3232650756836, 71.79486083984375, -279.6299743652344, 19.2667236328125, 95.81990051269531, -10.297073364257812, -228.7417755126953, 306.4396667480469, 415.64410400390625, 1211.29638671875, 597.87158203125, 1165.6556396484375, 803.78125, 164.6176300048828, -1.7626190185546875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000442.npy"}
{"epoch": 0.6490455212922174, "step": 443, "batch_size": 64, "mean": 174.6428680419922, "std": 411.2413024902344, "min": -903.1358642578125, "p10": -235.03449249267575, "median": 151.5550994873047, "p90": 670.5789855957034, "max": 1332.5810546875, "pos_frac": 0.75, "sample": [1191.4364013671875, 112.73506164550781, 900.656494140625, 20.869644165039062, 281.0279846191406, 128.72593688964844, -91.58914947509766, -77.99827575683594, -402.81048583984375, 124.18770599365234, 29.873855590820312, 41.84715270996094, 152.2931365966797, 203.495361328125, 18.781951904296875, -715.3743896484375, 28.551834106445312, -157.0271759033203, -160.58676147460938, -27.213258743286133, 1230.5067138671875, 340.52154541015625, 593.43701171875, 305.15386962890625, -656.79296875, -621.4890747070312, 258.8396301269531, 225.73765563964844, 703.6398315429688, -903.1358642578125, 376.7673645019531, 366.9481201171875, 207.57225036621094, 150.8170623779297, 139.03517150878906, -100.81372833251953, 148.038330078125, 444.60552978515625, 448.220458984375, 192.86997985839844, -246.4744415283203, -37.96940612792969, 333.0578918457031, -208.34127807617188, 359.88427734375, 82.68617248535156, 317.77545166015625, 1332.5810546875, 252.9254913330078, 100.4991226196289, 555.577880859375, 84.0592041015625, 184.8182830810547, 121.84333801269531, -179.5193328857422, 260.8696594238281, -251.18797302246094, 208.12657165527344, 144.0528564453125, 189.65887451171875, 857.7828369140625, 839.259521484375, 211.94271850585938, 210.87289428710938], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000443.npy"}
{"epoch": 0.6505139500734214, "step": 444, "batch_size": 64, "mean": 290.8316345214844, "std": 389.5690612792969, "min": -622.3632202148438, "p10": -107.28206481933593, "median": 269.7771759033203, "p90": 732.8353759765629, "max": 1448.8765869140625, "pos_frac": 0.765625, "sample": [340.58380126953125, -227.05084228515625, 83.6739273071289, 563.8295288085938, -24.43625831604004, 347.3070068359375, -187.5489044189453, 1436.8314208984375, 882.2132568359375, -33.28361892700195, -98.92489624023438, 257.4082336425781, 282.1461181640625, 529.8892822265625, 599.0187377929688, 157.0745849609375, 769.1983642578125, 470.57574462890625, -277.2077331542969, 165.4876251220703, -53.21807098388672, -45.008277893066406, 392.4023132324219, 799.8712768554688, 112.18086242675781, 380.1043701171875, 324.5820617675781, -34.818511962890625, 305.2016296386719, -379.595703125, -20.84234046936035, 1448.8765869140625, 324.23272705078125, 301.0619812011719, 337.15728759765625, 440.07684326171875, 1150.423583984375, 380.1468505859375, 481.31927490234375, 236.79132080078125, -110.86370849609375, 1075.1851806640625, 596.683837890625, 81.34920501708984, 539.5372314453125, 499.7281188964844, 618.1687622070312, 38.820350646972656, 166.3419952392578, 156.2361297607422, 228.1758575439453, 235.30101013183594, 336.7399597167969, 223.58697509765625, 413.4866638183594, 199.753662109375, 8.786275863647461, -26.167129516601562, -384.3555603027344, 44.532989501953125, 647.9884033203125, -622.3632202148438, 251.082275390625, 477.757080078125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000444.npy"}
{"epoch": 0.6519823788546255, "step": 445, "batch_size": 64, "mean": 267.3776550292969, "std": 340.0048828125, "min": -477.68829345703125, "p10": -141.38472442626946, "median": 235.15777587890625, "p90": 648.2677307128907, "max": 1255.9417724609375, "pos_frac": 0.796875, "sample": [225.08892822265625, 67.4103775024414, 21.17784881591797, 907.7657470703125, 24.597671508789062, 482.2359924316406, 425.77337646484375, 170.2359619140625, 89.52532958984375, 207.3899383544922, 100.0718994140625, 569.6201171875, 12.359634399414062, 391.0618896484375, 100.07682037353516, -477.68829345703125, 117.40673828125, 480.0368347167969, 352.0198974609375, 182.79270935058594, 400.84100341796875, 353.6242980957031, 509.924560546875, 707.1943359375, -78.48118591308594, 128.42352294921875, 1255.9417724609375, 470.4971618652344, 450.252197265625, 624.0849609375, 655.1736450195312, 573.9603271484375, 173.65916442871094, 740.9489135742188, 67.55009460449219, -239.26849365234375, 251.18344116210938, 358.9388122558594, -168.3433837890625, 463.0802001953125, -1.3301773071289062, 1007.9171142578125, 459.1531982421875, -222.35951232910156, 632.1539306640625, 374.932861328125, 216.27890014648438, -174.938720703125, 2.4542617797851562, -42.43302917480469, 262.5657653808594, 5.377189636230469, -68.23393249511719, 418.9073181152344, 204.64242553710938, 282.0130615234375, -30.035614013671875, -199.72689819335938, 1185.305419921875, -55.32170104980469, 489.48638916015625, 245.22662353515625, 272.104736328125, -300.1141662597656], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000445.npy"}
{"epoch": 0.6534508076358296, "step": 446, "batch_size": 64, "mean": 284.8059997558594, "std": 306.0168151855469, "min": -595.2217407226562, "p10": -34.55018386840818, "median": 265.48277282714844, "p90": 701.9154052734376, "max": 1069.6536865234375, "pos_frac": 0.875, "sample": [10.367988586425781, 285.44281005859375, 12.455093383789062, 710.483642578125, 772.5632934570312, 354.0826721191406, 33.91170120239258, -595.2217407226562, 194.06776428222656, 675.8060913085938, 858.7572631835938, 385.71539306640625, -43.562156677246094, 681.9228515625, 562.666259765625, 592.3915405273438, 578.7349853515625, 350.5726623535156, 201.13494873046875, 370.52581787109375, -143.86318969726562, 179.06195068359375, 110.13694763183594, 210.36817932128906, 224.37794494628906, -315.7470703125, 119.75868225097656, 138.17654418945312, 486.11541748046875, 250.9583740234375, 581.871826171875, -89.74519348144531, 39.559844970703125, 190.65931701660156, 12.40971565246582, 140.01510620117188, -13.522247314453125, 404.86077880859375, 284.5324401855469, 757.085205078125, 310.56427001953125, 186.4521942138672, 681.1781005859375, 1069.6536865234375, 426.3345947265625, 10.293067932128906, 787.6488037109375, 450.9944152832031, 282.55670166015625, 280.0071716308594, 335.8122863769531, 2.2316131591796875, -98.93927001953125, 168.72610473632812, 77.3703842163086, 286.6029052734375, 99.18161010742188, 371.4710693359375, 768.8236694335938, 211.994140625, -235.68890380859375, 652.5182495117188, 249.02633666992188, 292.8808288574219], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000446.npy"}
{"epoch": 0.6549192364170338, "step": 447, "batch_size": 64, "mean": 294.4344482421875, "std": 370.6329040527344, "min": -479.6954345703125, "p10": -129.55921020507813, "median": 258.5492248535156, "p90": 730.1938049316408, "max": 1903.661865234375, "pos_frac": 0.84375, "sample": [269.24468994140625, 102.32447052001953, 339.2470703125, 293.0570373535156, 22.635162353515625, -479.6954345703125, 493.4407653808594, 174.47775268554688, 273.0560302734375, -139.16329956054688, 50.30986404418945, 754.7871704101562, 257.2489318847656, 198.9334259033203, 669.34716796875, 551.3610229492188, 770.7936401367188, 51.41276550292969, 1903.661865234375, 810.4113159179688, 220.83737182617188, 1015.345703125, 483.24029541015625, 395.0020751953125, -243.1293487548828, 203.12921142578125, 468.675537109375, -125.01617431640625, 259.8495178222656, 148.3334503173828, -142.03195190429688, 8.624404907226562, 22.208168029785156, -117.39682006835938, 405.48583984375, -131.5062255859375, 247.77529907226562, 521.1676025390625, 88.89910888671875, 31.666847229003906, 521.7535400390625, 139.79324340820312, 128.0285186767578, -335.5257263183594, 745.177490234375, 524.0136108398438, 427.7277526855469, -191.86767578125, 239.87521362304688, 29.215051651000977, 452.3721923828125, 338.5481262207031, 377.747802734375, 324.5599060058594, 161.93429565429688, 593.9139404296875, 301.0660095214844, 118.11639404296875, 996.8037109375, 589.112548828125, 46.34923553466797, 695.2318725585938, 538.0455322265625, -46.239105224609375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000447.npy"}
{"epoch": 0.6563876651982379, "step": 448, "batch_size": 64, "mean": 284.32830810546875, "std": 343.540283203125, "min": -666.3853149414062, "p10": -113.7600936889648, "median": 237.86071014404297, "p90": 651.7819091796875, "max": 1287.3712158203125, "pos_frac": 0.84375, "sample": [9.756889343261719, 87.08102416992188, -313.1786193847656, 154.43341064453125, 617.12841796875, 25.89630126953125, 132.03053283691406, -58.11090850830078, 355.40716552734375, -666.3853149414062, 477.89947509765625, 171.94537353515625, 653.75732421875, 66.3236083984375, 211.4278564453125, 502.875244140625, 112.51262664794922, 32.35276794433594, 596.609619140625, -141.86062622070312, -132.21055603027344, 156.0302734375, 853.9375, 423.300537109375, 231.20680236816406, 249.07064819335938, 244.51461791992188, 146.69921875, 463.536865234375, 831.2904052734375, 327.4167175292969, 573.7705688476562, -15.852396011352539, 327.93487548828125, 1287.3712158203125, 32.37208557128906, 134.73548889160156, 216.7979278564453, 24.178077697753906, 359.621826171875, 616.1295166015625, 595.410400390625, 282.4679260253906, -207.47341918945312, 1094.8126220703125, 273.3472900390625, 158.566162109375, -70.70901489257812, 385.45379638671875, 170.99179077148438, 305.6435241699219, -183.48936462402344, 1024.969482421875, -141.046630859375, 599.1404418945312, 818.7510375976562, 59.3504638671875, 340.521728515625, 125.73963165283203, 262.60650634765625, 559.81005859375, 647.172607421875, 498.7467041015625, 216.47247314453125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000448.npy"}
{"epoch": 0.657856093979442, "step": 449, "batch_size": 64, "mean": 288.65020751953125, "std": 425.3541259765625, "min": -875.581298828125, "p10": -188.23087005615233, "median": 241.99139404296875, "p90": 735.8935241699219, "max": 2185.8525390625, "pos_frac": 0.78125, "sample": [168.3135223388672, 83.5773696899414, 433.71783447265625, 77.12992858886719, 701.0665893554688, 704.0482177734375, 237.4854736328125, 67.51769256591797, 246.497314453125, 248.74887084960938, 827.9854736328125, -223.12294006347656, -1.8252029418945312, 214.55889892578125, 585.8402099609375, 27.276012420654297, 450.73284912109375, 169.699951171875, 111.41911315917969, -263.21600341796875, 776.8653564453125, 274.96246337890625, 528.3338012695312, 740.3392333984375, 319.90765380859375, 725.5202026367188, -290.3553771972656, 65.63020324707031, 326.5245361328125, 433.45794677734375, 195.31890869140625, 183.56338500976562, 16.29150390625, 563.994140625, 538.3242797851562, 669.9561157226562, -119.3697738647461, 131.0701141357422, -183.948486328125, -203.18052673339844, 2185.8525390625, 633.5552368164062, -190.06617736816406, 363.0982971191406, 363.1927490234375, 172.46078491210938, -49.27227783203125, -875.581298828125, -54.287357330322266, 1004.2689819335938, 280.1026306152344, 852.5073852539062, 612.075439453125, -205.81434631347656, 440.80560302734375, 198.76422119140625, 425.982421875, 1033.330078125, 292.0753479003906, -49.07600402832031, -136.4521026611328, 11.5198974609375, 169.04124450683594, 434.8732604980469], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000449.npy"}
{"epoch": 0.6593245227606461, "step": 450, "batch_size": 64, "mean": 307.92584228515625, "std": 309.13720703125, "min": -680.7169799804688, "p10": -11.205527496337865, "median": 266.4678497314453, "p90": 681.785986328125, "max": 1152.163818359375, "pos_frac": 0.890625, "sample": [387.9178161621094, 598.0779418945312, 775.6473388671875, 410.8598327636719, 42.5390625, 668.8629150390625, 1152.163818359375, -64.56803894042969, 317.5708312988281, -25.53392791748047, 597.98388671875, 464.5213623046875, 683.3471069335938, 340.5336608886719, 396.2177734375, 678.1433715820312, 97.73220825195312, 95.908935546875, 227.93963623046875, 39.52597427368164, 370.05072021484375, 661.4407348632812, 466.41107177734375, 690.42822265625, 203.8726348876953, -43.96889877319336, 77.40260314941406, 904.266845703125, 399.298828125, 636.4541625976562, 92.80091857910156, 372.751220703125, 248.5380096435547, 229.68084716796875, 266.7939453125, 14.313491821289062, 251.81517028808594, 378.4147644042969, 229.54464721679688, 125.98695373535156, 884.8560791015625, -217.7402801513672, 162.4349365234375, 412.9396057128906, 154.8289794921875, 320.6177673339844, 52.858421325683594, 646.1485595703125, 189.25991821289062, 17.959026336669922, 458.838623046875, -74.46627807617188, 556.6565551757812, 281.25628662109375, 26.9798641204834, -680.7169799804688, 297.0003662109375, 266.1417541503906, 41.23261260986328, 195.59226989746094, 180.25668334960938, 970.6494750976562, -22.142250061035156, 124.12408447265625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000450.npy"}
{"epoch": 0.6607929515418502, "step": 451, "batch_size": 64, "mean": 233.21820068359375, "std": 423.9560241699219, "min": -1168.837890625, "p10": -229.33618316650387, "median": 267.2328567504883, "p90": 696.4185607910158, "max": 1160.11572265625, "pos_frac": 0.796875, "sample": [99.10079956054688, -98.37734985351562, 31.986602783203125, 306.5462951660156, 509.17999267578125, 285.8721618652344, 430.4622802734375, 518.4022216796875, -19.607994079589844, 309.87347412109375, 445.8159484863281, 1134.42138671875, 496.18975830078125, 851.8388671875, 156.03692626953125, 242.80758666992188, 101.23323822021484, -1168.837890625, -752.457275390625, -152.53546142578125, 105.39989471435547, 40.47229766845703, 501.7970275878906, 526.4849853515625, 649.0292358398438, 251.70091247558594, 419.55535888671875, 190.17759704589844, 346.8501281738281, 559.73095703125, 177.77651977539062, 1160.11572265625, 1013.1573486328125, 291.77716064453125, 474.9632263183594, 576.8635864257812, 443.464599609375, -298.923095703125, 308.6811828613281, 316.41650390625, -39.22027587890625, -150.96072387695312, 431.1593322753906, -1049.5416259765625, 141.05284118652344, 134.2325439453125, 182.95516967773438, 167.05615234375, 282.7648010253906, 112.06299591064453, 737.7037963867188, -272.15863037109375, -180.10116577148438, 827.365478515625, 558.9719848632812, 121.614990234375, 412.2271728515625, -431.4040832519531, 173.01832580566406, 9.10698127746582, 716.728271484375, -250.43690490722656, 334.58807373046875, 173.73580932617188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000451.npy"}
{"epoch": 0.6622613803230544, "step": 452, "batch_size": 64, "mean": 281.4200439453125, "std": 279.9942626953125, "min": -364.0804748535156, "p10": -71.02135696411132, "median": 279.8431854248047, "p90": 680.0461608886719, "max": 919.8273315429688, "pos_frac": 0.84375, "sample": [831.1229248046875, -219.73123168945312, 509.3836975097656, 600.2515869140625, -98.5259017944336, 510.97705078125, 413.1509094238281, 510.2973937988281, 532.458251953125, -41.426780700683594, 490.750244140625, 279.35662841796875, 134.5442352294922, 177.27687072753906, 349.68017578125, 280.3297424316406, 710.4971923828125, 295.2258605957031, 453.78472900390625, 19.32581901550293, 384.1221618652344, 8.280067443847656, 429.50390625, 149.64923095703125, 298.60504150390625, 671.2000732421875, 173.7406463623047, -264.2775573730469, 522.5908203125, 194.79824829101562, -63.234283447265625, 434.58160400390625, 114.04959106445312, 190.49920654296875, 728.7426147460938, 104.16265869140625, 286.9875793457031, 641.2525634765625, 74.32048797607422, 192.16641235351562, 240.58822631835938, -129.7978515625, 214.24826049804688, 736.7596435546875, 203.066650390625, 398.4496154785156, 121.415771484375, 740.8780517578125, 919.8273315429688, 103.14115905761719, 112.42112731933594, 358.78192138671875, 358.89105224609375, 45.085140228271484, -364.0804748535156, 315.339111328125, -73.02761840820312, 485.0277404785156, 424.7917175292969, 683.8373413085938, -66.34008026123047, 241.7803955078125, -86.28413391113281, 15.613580703735352], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000452.npy"}
{"epoch": 0.6637298091042585, "step": 453, "batch_size": 64, "mean": 260.94769287109375, "std": 348.0165710449219, "min": -282.7266845703125, "p10": -129.96163101196288, "median": 180.85411071777344, "p90": 725.9584045410159, "max": 1566.005615234375, "pos_frac": 0.765625, "sample": [598.1887817382812, 524.5545654296875, -96.03630828857422, 112.88640594482422, 220.45877075195312, 109.83851623535156, 281.5459899902344, 131.62586975097656, 145.85226440429688, 47.52862548828125, 210.94192504882812, 588.6975708007812, 484.5709533691406, 606.6255493164062, 30.877445220947266, 47.38612365722656, 313.939697265625, 193.05267333984375, -92.46086120605469, 80.34364318847656, 594.838623046875, -185.64361572265625, 223.550048828125, 285.8758850097656, 668.0548095703125, -246.55914306640625, -60.16410827636719, 790.6900634765625, -282.7266845703125, 345.716552734375, -81.49986267089844, 148.94467163085938, 481.77886962890625, 870.9159545898438, 750.7742309570312, 7.238636016845703, -45.88883972167969, 152.40069580078125, 1566.005615234375, 784.4629516601562, 363.7234191894531, -134.4895477294922, 851.0762329101562, 14.909385681152344, 262.078857421875, 94.57955932617188, 585.6936645507812, 240.7255859375, 447.91455078125, 430.890380859375, -174.39222717285156, 455.94659423828125, -147.32772827148438, 513.84814453125, 41.97900390625, -70.2298355102539, -179.5679168701172, 154.21002197265625, 971.9261474609375, 511.1957092285156, -27.212360382080078, -119.39649200439453, 134.73184204101562, 168.65554809570312], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000453.npy"}
{"epoch": 0.6651982378854625, "step": 454, "batch_size": 64, "mean": 296.76806640625, "std": 364.30523681640625, "min": -682.17822265625, "p10": -41.038256835937496, "median": 264.19361877441406, "p90": 725.604333496094, "max": 1417.7197265625, "pos_frac": 0.84375, "sample": [223.49557495117188, 2.7869873046875, 947.1454467773438, 313.634033203125, 525.0897827148438, 55.52803039550781, 668.1383056640625, 357.55267333984375, 41.63486099243164, 312.54766845703125, -555.28466796875, 517.231201171875, 357.9873962402344, 299.7896728515625, 191.233642578125, 261.64300537109375, 395.1107177734375, 405.43157958984375, -56.97698211669922, -682.17822265625, 25.970064163208008, 534.7734985351562, -36.69256591796875, 753.5322875976562, -325.4265441894531, -42.90069580078125, 750.154052734375, 99.4739761352539, 37.57112121582031, 245.98028564453125, 537.613525390625, 138.6621551513672, 314.93914794921875, 108.91387176513672, 428.14691162109375, 144.09878540039062, 213.80178833007812, 180.23724365234375, 391.82489013671875, 180.29563903808594, 410.24462890625, 460.5915222167969, 95.50459289550781, 397.699951171875, -4.322906494140625, 668.3216552734375, 1417.7197265625, 352.3404541015625, -57.220848083496094, 157.212158203125, 1394.0010986328125, 1033.8924560546875, 266.7442321777344, 501.003173828125, 68.37981414794922, 474.4200134277344, 609.9024658203125, -93.40371704101562, 462.6368713378906, -16.659957885742188, 187.3912811279297, 87.39129638671875, 770.1121826171875, 86.74278259277344], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000454.npy"}
{"epoch": 0.6666666666666666, "step": 455, "batch_size": 64, "mean": 339.77191162109375, "std": 392.5646667480469, "min": -661.0161743164062, "p10": -70.04964141845696, "median": 234.4259796142578, "p90": 1006.1561950683598, "max": 1285.82421875, "pos_frac": 0.875, "sample": [144.89466857910156, 130.7767333984375, 756.6123046875, 1055.3984375, -438.8948669433594, 356.1557922363281, 649.176025390625, -97.58489990234375, 1098.916259765625, -190.74642944335938, 1049.2958984375, 221.0980987548828, 905.4968872070312, -222.11148071289062, 1091.013916015625, -107.7318344116211, 196.17486572265625, 600.591796875, 225.23751831054688, 210.02159118652344, 71.74191284179688, 462.7081604003906, 195.9842987060547, 32.733009338378906, 355.64752197265625, 1123.613525390625, 684.6720581054688, -98.35521697998047, 274.7523193359375, 373.0635986328125, 212.83236694335938, 497.4844970703125, 243.61444091796875, 199.01644897460938, 151.15890502929688, 202.88885498046875, 213.56466674804688, 317.1557922363281, 70.6422119140625, -661.0161743164062, 423.3482971191406, 64.62722778320312, 267.302001953125, 569.348388671875, 44.00335693359375, 93.0674819946289, 22.339723587036133, 431.43359375, 810.3949584960938, 201.40341186523438, 632.7294921875, 132.31890869140625, 449.643798828125, 444.0935363769531, 64.4164810180664, 110.20758056640625, -5.800701141357422, 803.5689697265625, 435.23785400390625, 393.50634765625, 1183.662841796875, 280.2123107910156, 50.8166618347168, 1285.82421875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000455.npy"}
{"epoch": 0.6681350954478708, "step": 456, "batch_size": 64, "mean": 335.6260986328125, "std": 334.8710021972656, "min": -163.71560668945312, "p10": -5.770201110839842, "median": 262.1735076904297, "p90": 867.4423156738284, "max": 1351.2314453125, "pos_frac": 0.875, "sample": [149.946533203125, 223.8936004638672, 774.4845581054688, 94.8070068359375, -19.92523193359375, 314.57757568359375, 41.58419418334961, -4.221038818359375, 220.22161865234375, 734.2832641601562, 451.6693115234375, 35.855812072753906, 116.58192443847656, 1351.2314453125, 280.6156311035156, -71.05524444580078, -6.4341278076171875, 229.25546264648438, 305.1508483886719, 422.8179016113281, 284.9873046875, 61.01171112060547, -54.184814453125, 1019.14208984375, -84.8541259765625, 406.27447509765625, 1210.7984619140625, 411.6781921386719, 569.0597534179688, 46.23822021484375, 340.60986328125, 504.4579772949219, 649.18896484375, 185.04037475585938, 162.638671875, 230.86976623535156, 77.31370544433594, 433.88671875, 973.4115600585938, 92.24972534179688, 893.6228637695312, 433.73046875, 276.1412353515625, 88.87824249267578, -163.71560668945312, 406.8709716796875, 52.88666534423828, 9.653717041015625, 806.3543701171875, 70.33959197998047, 900.2452392578125, 977.7168579101562, 342.7752990722656, -8.50341796875, 281.5700988769531, 84.58335876464844, 365.1856384277344, 61.800445556640625, 423.17974853515625, 216.18182373046875, 248.20578002929688, 592.274169921875, 725.994873046875, 228.93936157226562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000456.npy"}
{"epoch": 0.6696035242290749, "step": 457, "batch_size": 64, "mean": 333.8603820800781, "std": 331.0036315917969, "min": -319.67547607421875, "p10": -49.333857727050756, "median": 385.1793670654297, "p90": 737.5216247558594, "max": 1273.76904296875, "pos_frac": 0.8125, "sample": [-19.84355926513672, 426.2017822265625, 424.31817626953125, 376.49530029296875, 550.5745239257812, 715.6124877929688, 746.9112548828125, 847.2694091796875, 77.37911224365234, 51.568634033203125, 1078.939697265625, 397.21246337890625, 38.64670181274414, 195.32862854003906, 75.95405578613281, 221.5631866455078, 496.7751770019531, 222.8820343017578, -61.972557067871094, 534.6405029296875, 699.616943359375, -13.505931854248047, 127.25294494628906, 506.0806884765625, 469.9258117675781, 956.6845092773438, -183.38609313964844, -163.2112274169922, 601.0107421875, 618.970458984375, 49.12120056152344, 537.9254150390625, 443.8893737792969, 527.4114990234375, 292.39324951171875, -78.04066467285156, 662.942626953125, 425.2742004394531, 77.08963012695312, -9.3468017578125, 491.181396484375, 788.2315673828125, 436.10296630859375, 113.22035217285156, 72.0147705078125, 131.7793731689453, 519.8319091796875, -17.24340057373047, 503.94354248046875, 382.8060607910156, -191.04727172851562, 692.8871459960938, 309.96966552734375, 796.0662841796875, -319.67547607421875, 421.6435546875, 57.76872253417969, -147.29244995117188, 387.55267333984375, -13.862316131591797, 192.450439453125, 477.09814453125, 63.311607360839844, 1273.76904296875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000457.npy"}
{"epoch": 0.671071953010279, "step": 458, "batch_size": 64, "mean": 295.9541015625, "std": 361.4006652832031, "min": -496.7081604003906, "p10": -119.02403717041015, "median": 290.33331298828125, "p90": 660.5240966796875, "max": 1600.3056640625, "pos_frac": 0.796875, "sample": [-496.7081604003906, 797.4224853515625, 463.19622802734375, 292.3165283203125, 513.9425659179688, -108.05668640136719, 1600.3056640625, -167.17454528808594, 288.35009765625, 529.33935546875, 190.66302490234375, 255.45294189453125, 463.96563720703125, 299.4197082519531, 722.137451171875, -167.16986083984375, -113.43116760253906, -7.238748550415039, 191.82350158691406, 623.4927368164062, 459.9421691894531, 508.2313537597656, 102.25606536865234, 530.9157104492188, 320.2283935546875, 349.31048583984375, 345.46075439453125, 11.24700927734375, 199.86204528808594, 410.28076171875, 279.09539794921875, 669.6607666015625, 545.04931640625, 788.5296020507812, 186.751708984375, -4.151275634765625, 370.4878845214844, 73.881591796875, 440.28143310546875, 292.9224853515625, 698.4449462890625, 334.6448669433594, -121.28426361083984, -266.2160339355469, 124.63880157470703, -296.60546875, 206.04205322265625, -113.75017547607422, 238.04017639160156, 1554.087646484375, 558.4490966796875, 127.3943862915039, 276.6898498535156, 81.80387878417969, 200.514404296875, 639.2052001953125, 336.53594970703125, 407.7079772949219, 242.7762908935547, 329.24920654296875, -88.21131134033203, 437.6187438964844, -254.58615112304688, 235.57948303222656], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000458.npy"}
{"epoch": 0.6725403817914831, "step": 459, "batch_size": 64, "mean": 325.8946533203125, "std": 406.16046142578125, "min": -686.7733154296875, "p10": -192.3835266113281, "median": 290.6429443359375, "p90": 850.702557373047, "max": 1427.71728515625, "pos_frac": 0.796875, "sample": [69.73692321777344, 77.25323486328125, 451.2447509765625, 247.41534423828125, 208.05361938476562, 376.337890625, -208.41253662109375, 146.0804901123047, 648.237548828125, -287.9101257324219, 505.8873596191406, 388.79058837890625, 486.6911315917969, 615.6749267578125, -8.744279861450195, 427.68524169921875, -56.1483154296875, 679.967041015625, 308.2978515625, 881.5768432617188, 31.33258056640625, 854.6188354492188, 622.0416870117188, -121.09471130371094, 302.1663513183594, -239.33750915527344, 794.2293701171875, -187.6001434326172, 45.9213981628418, 16.7436580657959, 247.09703063964844, -34.75286102294922, 841.5645751953125, -686.7733154296875, 1427.71728515625, 295.6646423339844, 1132.3936767578125, 554.22509765625, 245.04417419433594, -292.3236389160156, 174.43768310546875, 253.59628295898438, 413.1688232421875, 221.30804443359375, 102.60113525390625, 285.6212463378906, -443.6672058105469, 192.49746704101562, 436.3224182128906, 649.3535766601562, 736.60498046875, 102.00791931152344, 321.55865478515625, 1210.756591796875, 265.3121032714844, 126.25505828857422, 945.1149291992188, -194.4335479736328, 775.3173217773438, 752.909423828125, 866.2874145507812, 308.529296875, -19.89333724975586, 569.0972900390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000459.npy"}
{"epoch": 0.6740088105726872, "step": 460, "batch_size": 64, "mean": 348.79052734375, "std": 385.83587646484375, "min": -424.14697265625, "p10": -112.0334159851074, "median": 301.4751739501953, "p90": 856.2886230468752, "max": 1406.7431640625, "pos_frac": 0.828125, "sample": [893.9622802734375, 749.5989379882812, 533.4935913085938, -122.35993957519531, 108.31895446777344, 813.147705078125, 874.777587890625, 296.6470031738281, 35.020286560058594, 236.4975128173828, -142.62933349609375, 682.8460693359375, 109.31291198730469, 551.40283203125, 199.9698944091797, 172.02435302734375, 278.0039367675781, -424.14697265625, 177.50123596191406, 1278.31884765625, 1338.5206298828125, 237.4061279296875, 501.3993225097656, 38.03131103515625, 1011.8409423828125, 306.3033447265625, 1406.7431640625, 398.71136474609375, -73.2499008178711, 25.32489013671875, 289.0975036621094, -51.036582946777344, 556.8731079101562, -177.79737854003906, 518.0272216796875, 531.1602783203125, -87.93819427490234, 408.48883056640625, 224.41610717773438, -49.61363983154297, 795.5137939453125, 588.4287109375, 535.4551391601562, 375.6474304199219, 471.4515380859375, 110.39053344726562, 95.94859313964844, 127.69535827636719, 349.7993469238281, 231.3647918701172, -237.43296813964844, 76.4581527709961, 120.85363006591797, 372.1017150878906, -250.2199249267578, 455.01068115234375, 898.9122314453125, 545.566162109375, -216.31460571289062, 773.3795776367188, 543.4063720703125, 404.6824645996094, 101.75328826904297, 398.324951171875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000460.npy"}
{"epoch": 0.6754772393538914, "step": 461, "batch_size": 64, "mean": 281.6998291015625, "std": 390.4303283691406, "min": -534.334228515625, "p10": -145.88890609741208, "median": 206.44406127929688, "p90": 756.9691284179688, "max": 1506.2635498046875, "pos_frac": 0.765625, "sample": [108.73062133789062, 54.44755554199219, 769.469482421875, 401.2553405761719, -156.98809814453125, 568.9447021484375, -534.334228515625, 1226.5771484375, 81.67984008789062, 665.22998046875, 158.55429077148438, -57.26197052001953, 306.15325927734375, 577.7156372070312, 1014.310791015625, 437.42041015625, 620.396728515625, -59.364501953125, 190.87940979003906, 335.671142578125, 326.6722412109375, 679.4774780273438, 727.8016357421875, 963.1094970703125, -177.1687469482422, -50.505706787109375, 1142.996337890625, 1506.2635498046875, 234.703125, 284.7835693359375, 493.2734069824219, 171.86680603027344, 207.96246337890625, 14.481597900390625, 114.466796875, 268.8045654296875, -221.15673828125, -51.60943603515625, -98.20675659179688, 99.94680786132812, 818.7493286132812, -65.88025665283203, 616.1630859375, 651.9356079101562, -247.5519256591797, 311.9516296386719, 220.20726013183594, 577.849365234375, 21.434326171875, -105.7666015625, 558.498291015625, 94.1664047241211, 100.69729614257812, 189.20654296875, 427.98553466796875, -119.99079132080078, -261.2665100097656, 136.89553833007812, 204.9256591796875, 162.43722534179688, 118.82848358154297, -294.78704833984375, 351.25177001953125, 213.39776611328125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000461.npy"}
{"epoch": 0.6769456681350955, "step": 462, "batch_size": 64, "mean": 227.35406494140625, "std": 388.22882080078125, "min": -472.55902099609375, "p10": -210.4788040161133, "median": 176.86573791503906, "p90": 590.3112854003907, "max": 1343.0986328125, "pos_frac": 0.6875, "sample": [383.7762451171875, 40.666847229003906, 24.676307678222656, -171.8871307373047, 385.6463928222656, 598.0819702148438, 32.884307861328125, 481.1377868652344, 547.9685668945312, 850.8118286132812, 464.522216796875, -231.55322265625, -328.67138671875, 43.35433578491211, 158.28500366210938, 485.9671630859375, 514.052734375, 182.58578491210938, -202.63470458984375, 652.5537109375, -192.82223510742188, 150.55905151367188, 344.655517578125, -472.55902099609375, -138.78614807128906, -12.868377685546875, 565.9341430664062, -361.55084228515625, 22.030303955078125, -42.0588493347168, 1179.0277099609375, 420.1370544433594, -267.1337890625, 216.22132873535156, 523.977294921875, 986.3202514648438, -213.84056091308594, -58.195777893066406, 1343.0986328125, 277.93896484375, 107.31535339355469, -20.704757690429688, -83.23719787597656, 171.14569091796875, -199.87478637695312, -84.13990783691406, 359.6274719238281, 494.94415283203125, -279.9922790527344, 187.21902465820312, -122.39614868164062, 415.6549072265625, 1313.306884765625, 457.4498596191406, 425.2539367675781, -75.67132568359375, 208.11859130859375, 495.5195007324219, 572.1796875, 534.7815551757812, 38.20436096191406, 131.84915161132812, 106.46229553222656, 215.3343505859375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000462.npy"}
{"epoch": 0.6784140969162996, "step": 463, "batch_size": 64, "mean": 309.1751708984375, "std": 426.23394775390625, "min": -489.5069580078125, "p10": -204.36580963134764, "median": 296.496337890625, "p90": 797.9018005371094, "max": 1511.686767578125, "pos_frac": 0.78125, "sample": [56.94239807128906, 574.3338623046875, -45.48218536376953, 251.17416381835938, -132.24945068359375, 366.2579345703125, -145.16082763671875, 65.49791717529297, 440.3701171875, 11.703262329101562, 563.9224243164062, -205.10922241210938, 606.5587158203125, 653.4063720703125, 143.50277709960938, 350.8948059082031, 383.0308837890625, 519.8645629882812, 659.6767578125, 24.860118865966797, 60.428253173828125, 1511.686767578125, 497.8439636230469, 250.86996459960938, 1193.9659423828125, 722.0750122070312, 141.03512573242188, 301.9068298339844, -32.099884033203125, 316.0548400878906, -275.7122497558594, -50.176918029785156, -489.5069580078125, 109.72937774658203, 795.5274658203125, -372.1500549316406, 24.287765502929688, -109.53314208984375, 411.0664367675781, -202.6311798095703, 579.5001831054688, 302.8410339355469, 657.3768920898438, 313.5599365234375, 798.9193725585938, 1037.6533203125, 20.145111083984375, 263.5483093261719, 874.5300903320312, -370.82366943359375, 214.38563537597656, 1244.2611083984375, 257.16357421875, 773.6446533203125, -269.8756103515625, 29.012298583984375, 383.6756591796875, 291.0858459472656, -461.5316162109375, 408.4035949707031, 524.1567993164062, 552.569091796875, 1241.88623046875, 172.46018981933594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000463.npy"}
{"epoch": 0.6798825256975036, "step": 464, "batch_size": 64, "mean": 467.7832946777344, "std": 461.0181579589844, "min": -474.6652526855469, "p10": -50.896622848510724, "median": 450.5513610839844, "p90": 969.1621582031255, "max": 2082.009765625, "pos_frac": 0.875, "sample": [-260.67095947265625, 849.95263671875, 366.5598449707031, 278.9507141113281, 9.245849609375, 1016.455810546875, 662.8565063476562, 210.85133361816406, 107.68203735351562, 458.72906494140625, 730.2760620117188, 609.6806640625, -419.8581237792969, 704.5029296875, 323.5332946777344, 578.9888305664062, 712.223876953125, 212.8396453857422, 459.27752685546875, 711.3062133789062, 793.8959350585938, 442.3736572265625, 716.0361328125, -156.45582580566406, 1100.103515625, 231.32675170898438, 556.5731811523438, 485.74017333984375, -474.6652526855469, 224.78387451171875, 814.3583984375, 247.58026123046875, 523.283935546875, 39.033634185791016, 355.9790954589844, 647.057861328125, 836.1574096679688, 244.76490783691406, 371.5576171875, 243.43927001953125, 193.625244140625, -34.493953704833984, 858.810302734375, 581.1002197265625, 77.12916564941406, 413.0518493652344, -73.70465850830078, 256.75897216796875, 15.198112487792969, 671.6370849609375, 541.2676391601562, 1406.9974365234375, 2082.009765625, 1188.6025390625, 439.2510986328125, 1045.2728271484375, 778.8295288085938, -57.92633819580078, 1674.8095703125, 710.8699340820312, 543.0147705078125, -400.2937927246094, 348.84930419921875, 111.15555572509766], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000464.npy"}
{"epoch": 0.6813509544787077, "step": 465, "batch_size": 64, "mean": 374.6343994140625, "std": 486.353515625, "min": -1406.984619140625, "p10": -163.8538040161133, "median": 436.73260498046875, "p90": 866.5635681152344, "max": 1637.0076904296875, "pos_frac": 0.859375, "sample": [674.2877197265625, 1270.843017578125, 562.16015625, 252.03697204589844, -245.15695190429688, 62.372650146484375, 622.0498657226562, 350.3622741699219, 99.9910659790039, 22.332855224609375, 818.4105224609375, 294.14923095703125, 326.1282653808594, 152.3858642578125, 603.868408203125, 671.2483520507812, 461.678955078125, 542.6985473632812, 246.12049865722656, 73.00762176513672, -772.5938110351562, 957.5255737304688, 508.87322998046875, 228.83291625976562, 1637.0076904296875, -157.69679260253906, 377.31134033203125, 138.5921630859375, 357.125244140625, 449.0498046875, 719.5338134765625, 424.4154052734375, -166.49252319335938, 175.19635009765625, 302.5182189941406, 529.8834838867188, 34.777565002441406, 290.853759765625, -372.8384094238281, 691.8067626953125, 890.74072265625, 227.28439331054688, 508.1732177734375, -901.2391357421875, 767.454345703125, 756.7369384765625, 152.34120178222656, 775.5089111328125, 180.5768585205078, 894.4576416015625, 839.2431640625, -175.07188415527344, 1011.6734619140625, -1406.984619140625, 806.8248291015625, 868.80322265625, -78.95106506347656, 449.07537841796875, 624.0860595703125, 486.7645568847656, 58.45951843261719, 472.2843017578125, 861.3377075195312, 692.3634033203125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000465.npy"}
{"epoch": 0.6828193832599119, "step": 466, "batch_size": 64, "mean": 328.0135803222656, "std": 444.7925720214844, "min": -875.6861572265625, "p10": -263.84342498779296, "median": 315.18505859375, "p90": 920.2305847167971, "max": 1325.5023193359375, "pos_frac": 0.796875, "sample": [608.399169921875, -230.8904571533203, 247.28765869140625, -277.96612548828125, 747.369140625, 349.233154296875, 584.6082153320312, 142.0958709716797, 652.1259765625, 147.1271209716797, 206.721435546875, 861.8048095703125, -415.4640197753906, 824.913330078125, -60.34786605834961, 183.51089477539062, 373.71240234375, 15.706501007080078, 350.7829895019531, 178.12994384765625, 526.243896484375, 209.58804321289062, 471.215576171875, 346.65313720703125, 817.7503662109375, 150.21029663085938, 73.5483169555664, 731.26904296875, 184.1256561279297, 509.5791015625, 471.6787109375, 162.53787231445312, 678.13037109375, 1325.5023193359375, -875.6861572265625, 945.2702026367188, 396.3043518066406, 800.387939453125, 1082.7548828125, 396.3870849609375, 947.9293823242188, 4.703563690185547, 217.94342041015625, 198.65243530273438, 537.0467529296875, 1095.2530517578125, -168.65274047851562, -419.1794128417969, -30.766517639160156, 42.352294921875, 1144.840576171875, 297.2964172363281, 783.9793701171875, -27.919166564941406, 841.2684326171875, 445.0283203125, -420.7079162597656, 6.9942779541015625, 960.4994506835938, -323.74072265625, 333.0736999511719, -172.9033203125, -341.3260192871094, 148.89346313476562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000466.npy"}
{"epoch": 0.684287812041116, "step": 467, "batch_size": 64, "mean": 352.9036865234375, "std": 633.1687622070312, "min": -1352.634033203125, "p10": -413.4305572509765, "median": 355.9839782714844, "p90": 1140.6565551757812, "max": 1802.892333984375, "pos_frac": 0.765625, "sample": [796.4140014648438, 1515.6632080078125, 712.3358764648438, 1261.7919921875, -632.4076538085938, -1352.634033203125, 571.8480224609375, 557.4580078125, 496.1584167480469, 315.1688232421875, 195.81631469726562, 415.4884338378906, 1758.92333984375, 137.37586975097656, 443.70294189453125, 487.6895751953125, 103.28321075439453, 1082.1678466796875, 1396.4520263671875, 533.5526733398438, 428.743408203125, 11.517864227294922, 75.65008544921875, 836.2576904296875, -353.875244140625, -376.8265686035156, -555.7681274414062, 584.5450439453125, 1124.219970703125, 427.443603515625, 977.5697021484375, 142.2228546142578, 431.832763671875, -490.4761657714844, 1092.11181640625, 54.457427978515625, 1174.7203369140625, 168.03326416015625, -43.736690521240234, 855.007568359375, 225.1270751953125, -930.8302612304688, 143.3294677734375, -247.30804443359375, 94.44013977050781, 396.79913330078125, -79.2867431640625, -317.79681396484375, -307.74298095703125, 698.6162109375, 889.6032104492188, 870.060791015625, 1147.7008056640625, 269.8639831542969, 829.978759765625, 5.348808288574219, 644.0126342773438, -23.310577392578125, -429.11798095703125, 1802.892333984375, -880.0995483398438, 5.654018402099609, 103.90276336669922, 314.099609375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000467.npy"}
{"epoch": 0.6857562408223201, "step": 468, "batch_size": 64, "mean": 428.48065185546875, "std": 537.4736328125, "min": -899.2742919921875, "p10": -103.53534088134762, "median": 382.4371032714844, "p90": 1305.2560058593751, "max": 2002.823486328125, "pos_frac": 0.859375, "sample": [417.00872802734375, 60.30406951904297, 1320.600830078125, 575.7454223632812, 1334.0238037109375, 958.9634399414062, 532.1350708007812, 718.0250244140625, 434.8605041503906, 506.3005065917969, 347.99493408203125, 672.5465087890625, 140.563720703125, 441.1595153808594, 757.1124267578125, 102.47747802734375, 246.94996643066406, -181.03121948242188, 400.0439453125, 246.76022338867188, 139.13128662109375, 10.518352508544922, 30.61363983154297, 379.3857727050781, 385.4884338378906, 1365.8770751953125, 448.07183837890625, 1514.777099609375, 894.1790771484375, 110.33688354492188, -422.2697448730469, 19.497207641601562, 646.2584838867188, 320.06866455078125, 501.4671325683594, 544.63525390625, 115.96338653564453, 94.26889038085938, 615.0931396484375, -899.2742919921875, 1681.0443115234375, 81.72908782958984, 511.97650146484375, 459.2400817871094, 329.53839111328125, 102.32113647460938, -269.54931640625, 365.1847229003906, 7.8925018310546875, 1269.451416015625, 262.8056945800781, 1864.5389404296875, 234.90850830078125, 630.0097045898438, -28.81279754638672, 469.317626953125, 588.2154541015625, 2002.823486328125, 532.3997802734375, -116.13177490234375, 289.33453369140625, -404.9918212890625, -74.14366149902344, -212.97332763671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000468.npy"}
{"epoch": 0.6872246696035242, "step": 469, "batch_size": 64, "mean": 446.3761291503906, "std": 513.00390625, "min": -642.806396484375, "p10": -136.19218139648433, "median": 428.6153869628906, "p90": 1154.516442871094, "max": 1990.6435546875, "pos_frac": 0.828125, "sample": [1042.8623046875, 599.5115966796875, 105.08943939208984, 525.6157836914062, 1990.6435546875, 657.518310546875, 1145.6407470703125, -2.4993362426757812, -96.41648864746094, 264.8538818359375, 560.26953125, 149.5532684326172, -407.94512939453125, 51.812042236328125, 1249.421875, 63.41020965576172, 438.0976257324219, 544.4813232421875, 469.140380859375, 491.3076477050781, 1109.5291748046875, 276.1105041503906, 152.26902770996094, 171.2018280029297, 1207.1552734375, 0.5108642578125, 916.8128051757812, 419.1331481933594, 1129.79931640625, -315.4508056640625, 663.0359497070312, -153.23890686035156, 856.866943359375, 222.66773986816406, 626.2730712890625, 261.80218505859375, -21.267494201660156, -185.11703491210938, 6.512298583984375, -365.25164794921875, 516.1451416015625, 827.3694458007812, 1249.1265869140625, 55.62739181518555, -398.28326416015625, 174.49484252929688, 286.6414489746094, 990.0358276367188, 338.41107177734375, 98.52295684814453, 1158.3203125, 924.6610717773438, 486.6835021972656, 1196.24853515625, 1310.84765625, 505.9706115722656, 500.1940612792969, 294.75860595703125, 50.08323669433594, -47.622962951660156, 178.02496337890625, 946.5147705078125, 746.3812866210938, -642.806396484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000469.npy"}
{"epoch": 0.6886930983847284, "step": 470, "batch_size": 64, "mean": 383.54962158203125, "std": 515.0982055664062, "min": -1167.6683349609375, "p10": -260.52339172363276, "median": 428.9376220703125, "p90": 943.2962036132814, "max": 1677.6656494140625, "pos_frac": 0.75, "sample": [428.55426025390625, 492.14794921875, -151.84893798828125, 75.25641632080078, 251.72073364257812, -79.23583221435547, 286.59619140625, 110.39482879638672, -46.950927734375, -712.8250122070312, 511.70635986328125, 769.6736450195312, 530.392822265625, 400.6957702636719, 685.4332275390625, 950.5279541015625, -155.11167907714844, 722.6602172851562, -1167.6683349609375, 769.2279663085938, -118.60745239257812, 663.678466796875, 194.27468872070312, -359.07244873046875, 280.47467041015625, 216.62789916992188, -273.5247497558594, 429.32098388671875, 491.84405517578125, 391.84869384765625, 683.899658203125, -297.51171875, 1371.6563720703125, 132.9535369873047, -18.19476890563965, 707.4066162109375, 457.62261962890625, 577.05908203125, 1677.6656494140625, 636.102783203125, 1349.180419921875, 103.37255859375, 782.4152221679688, 885.4893798828125, -94.31329345703125, 1298.5159912109375, 543.0040893554688, 604.5916137695312, 679.4977416992188, 925.1948852539062, 360.9752197265625, -305.3970947265625, -230.1868896484375, 244.38320922851562, 507.06695556640625, 926.422119140625, -349.49285888671875, 1280.7239990234375, 753.516357421875, -115.39649963378906, 958.8696899414062, 108.43936157226562, 307.9537353515625, 505.4790954589844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000470.npy"}
{"epoch": 0.6901615271659325, "step": 471, "batch_size": 64, "mean": 319.95709228515625, "std": 519.2809448242188, "min": -746.6712646484375, "p10": -283.28920593261716, "median": 210.4449005126953, "p90": 867.1086853027344, "max": 1962.0516357421875, "pos_frac": 0.75, "sample": [1107.1363525390625, -21.248947143554688, 1.349365234375, 697.9239501953125, 360.812744140625, 687.4669799804688, 786.8800659179688, 1962.0516357421875, 860.3231201171875, 290.2113037109375, -76.17060852050781, 349.6761474609375, -12.408525466918945, 70.53433227539062, 347.6020202636719, -4.053897857666016, -290.32257080078125, -174.9770965576172, 171.3658447265625, 2.7907371520996094, 1637.3857421875, -401.15423583984375, 272.6110534667969, 266.7439270019531, 756.6810302734375, -8.249883651733398, -373.6131286621094, -6.025999069213867, -427.79364013671875, 510.6388244628906, -266.8780212402344, 1006.58642578125, 643.5092163085938, 871.16015625, 462.1439514160156, 623.7174072265625, 779.070556640625, 166.67018127441406, 121.49015808105469, 564.7333374023438, 131.29141235351562, 249.52395629882812, 99.96575164794922, 1.3936882019042969, 759.18505859375, 1796.9620361328125, 95.82916259765625, -746.6712646484375, 791.478759765625, 31.780502319335938, -468.11700439453125, 166.25927734375, 40.207069396972656, 375.00482177734375, 71.79020690917969, 610.3624877929688, 14.842477798461914, -182.4779510498047, 458.12774658203125, 138.33511352539062, 714.02880859375, 870.0167846679688, -395.4408874511719, 537.2069702148438], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000471.npy"}
{"epoch": 0.6916299559471366, "step": 472, "batch_size": 64, "mean": 392.803466796875, "std": 648.0316162109375, "min": -1288.2791748046875, "p10": -216.10407867431636, "median": 242.6477813720703, "p90": 1306.460278320313, "max": 2316.820556640625, "pos_frac": 0.796875, "sample": [-1288.2791748046875, -484.14837646484375, 839.60791015625, -409.2907409667969, 223.5039520263672, 179.45223999023438, 654.2771606445312, -154.83499145507812, -128.13507080078125, -833.1857299804688, 287.5569152832031, 133.95361328125, 133.79629516601562, 470.0271911621094, 59.848480224609375, 942.1380615234375, -30.848129272460938, 481.0748291015625, 279.67626953125, 33.275146484375, 1202.5576171875, 91.92852783203125, 56.1256103515625, 14.501907348632812, 195.55084228515625, 527.6369018554688, 296.3749694824219, -271.3421325683594, 243.08343505859375, 787.447509765625, 36.34480285644531, 854.2474365234375, 212.73471069335938, 1010.611572265625, 2222.512451171875, 1794.448486328125, 106.15726470947266, 1463.674072265625, 411.52947998046875, 501.9674377441406, 411.30340576171875, 1038.01708984375, 94.71499633789062, 264.17779541015625, -242.3622589111328, 1350.989990234375, 1685.110595703125, 164.99269104003906, 1391.8350830078125, 66.11366271972656, 2316.820556640625, -105.01530456542969, -123.69435119628906, -77.04911041259766, 1066.54052734375, 791.1436767578125, 640.5950317382812, 242.21212768554688, 332.39422607421875, -253.4601287841797, 287.2849426269531, 255.7615203857422, 200.15994262695312, 193.2766571044922], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000472.npy"}
{"epoch": 0.6930983847283406, "step": 473, "batch_size": 64, "mean": 190.32020568847656, "std": 778.18505859375, "min": -1915.14013671875, "p10": -509.73521423339844, "median": 188.84033966064453, "p90": 1027.3034912109379, "max": 3589.12890625, "pos_frac": 0.671875, "sample": [-83.6274185180664, 846.1576538085938, 367.86065673828125, -231.86842346191406, -488.3495178222656, 1110.4083251953125, -78.83416748046875, 172.56744384765625, 139.04200744628906, 321.2157287597656, 1262.1629638671875, 591.0650024414062, -87.79164123535156, 177.88934326171875, -518.9005126953125, -443.52935791015625, 25.414886474609375, -12.409164428710938, 213.9590606689453, 1312.4315185546875, -285.7987365722656, -49.16096878051758, 540.4074096679688, 299.2899475097656, 138.77456665039062, 161.72799682617188, 369.12359619140625, 314.34063720703125, 198.74893188476562, 153.42752075195312, -1915.14013671875, 37.211280822753906, -538.95654296875, 234.53517150878906, -1392.7921142578125, -1817.0126953125, 3589.12890625, 249.40850830078125, 511.712646484375, 1184.58203125, 195.4479217529297, 255.86795043945312, 721.140869140625, 808.1805419921875, 163.24635314941406, 618.774658203125, -539.864501953125, -388.9672546386719, 235.4202423095703, 951.4471435546875, 535.8629150390625, 1059.8133544921875, -382.59149169921875, -171.40386962890625, 132.50631713867188, -119.98026275634766, 726.8338623046875, -1461.7581787109375, 528.5185546875, 182.23275756835938, -301.83831787109375, 332.2593994140625, 213.65463256835938, 1307.2672119140625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000473.npy"}
{"epoch": 0.6945668135095447, "step": 474, "batch_size": 64, "mean": 331.4249572753906, "std": 588.0437622070312, "min": -1474.12939453125, "p10": -242.27708129882808, "median": 321.61888122558594, "p90": 935.6385742187501, "max": 2225.93896484375, "pos_frac": 0.765625, "sample": [-1177.6187744140625, 327.8024597167969, 180.26113891601562, 943.957275390625, 850.210693359375, -497.8804626464844, 916.228271484375, 151.7746124267578, 262.9344177246094, 551.6367797851562, 1008.6739501953125, 2225.93896484375, 27.667882919311523, 363.4789123535156, 62.943199157714844, 802.5830078125, 324.3713684082031, -18.533042907714844, -400.666015625, 415.26611328125, 25.518814086914062, 257.4167785644531, -204.579833984375, 158.52243041992188, 408.8233947753906, 675.1612548828125, 738.66259765625, 216.03607177734375, 522.0457763671875, 473.71185302734375, 560.5962524414062, -48.695674896240234, 43.53484344482422, 312.74578857421875, 547.668701171875, 275.93701171875, 693.0926513671875, 112.16314697265625, -41.08006286621094, -258.43304443359375, 733.2509765625, -71.51834106445312, 651.220703125, 464.8729248046875, 166.8115234375, 313.0508117675781, 1296.861572265625, 1052.1265869140625, -58.15459060668945, -655.597900390625, -57.04474639892578, 847.6221313476562, 400.9407043457031, 614.0891723632812, 503.7605895996094, 233.53199768066406, 318.86639404296875, -803.0840454101562, 702.8466186523438, -99.64334106445312, 373.73406982421875, 1281.7030029296875, -1474.12939453125, 1685.2015380859375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000474.npy"}
{"epoch": 0.6960352422907489, "step": 475, "batch_size": 64, "mean": 419.064697265625, "std": 435.9761047363281, "min": -265.4236755371094, "p10": -153.9347702026367, "median": 373.5669708251953, "p90": 985.0767456054693, "max": 1512.3641357421875, "pos_frac": 0.78125, "sample": [75.05133056640625, -45.5650749206543, 399.1905822753906, 163.79931640625, -161.92724609375, 598.5637817382812, 1512.3641357421875, 348.9840393066406, 1188.7567138671875, 1108.4345703125, 311.3083190917969, 264.2481384277344, 499.611572265625, -24.01031494140625, 600.905517578125, -265.4236755371094, 692.5805053710938, -4.302289962768555, 794.032470703125, -180.38671875, 810.831298828125, 664.15625, 753.4560546875, 249.04632568359375, 112.16488647460938, 318.1699523925781, 439.2918701171875, 458.7112731933594, 33.1400260925293, 535.6246337890625, 282.60076904296875, 428.5205383300781, -135.28565979003906, 349.8357238769531, 696.7107543945312, 828.0338134765625, 390.5372619628906, 714.6383056640625, -184.3798370361328, 1250.9613037109375, 468.34844970703125, -19.475387573242188, 236.70101928710938, -231.6770477294922, 779.1961669921875, 86.65394592285156, 555.99267578125, -110.00508117675781, 348.72857666015625, 695.128173828125, -38.68408203125, 783.9515380859375, 600.4107666015625, 26.726043701171875, 1474.131103515625, -214.98854064941406, 195.7569122314453, 1387.4527587890625, 635.91015625, 1052.380859375, 340.1115417480469, 760.6363525390625, -222.82431030273438, 356.5966796875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000475.npy"}
{"epoch": 0.697503671071953, "step": 476, "batch_size": 64, "mean": 322.09075927734375, "std": 435.8434143066406, "min": -512.6300048828125, "p10": -182.4406539916992, "median": 251.15115356445312, "p90": 852.7464843750003, "max": 1941.1072998046875, "pos_frac": 0.796875, "sample": [799.15478515625, 83.69306182861328, -105.46642303466797, 658.2239990234375, 273.2589111328125, 509.8533020019531, 1255.856201171875, 176.12069702148438, 157.43991088867188, -225.02481079101562, 929.7504272460938, 151.800537109375, 168.5361785888672, 495.75689697265625, 759.1378173828125, 774.3154296875, 629.83642578125, 102.2068099975586, 254.38992309570312, 132.77114868164062, 434.6037292480469, 230.1209716796875, -29.998977661132812, 1197.0511474609375, 485.4378356933594, 552.8759155273438, -457.42340087890625, -239.1263427734375, 485.41650390625, 82.16924285888672, 6.604560852050781, 161.89569091796875, -215.90029907226562, -180.49069213867188, 471.046630859375, -157.2300262451172, -112.99340057373047, -440.2347717285156, 685.63818359375, 179.3260498046875, 228.0693359375, 635.2022094726562, 247.91238403320312, -183.27635192871094, 142.8413543701172, 197.00511169433594, 709.6984252929688, 1053.05419921875, 1941.1072998046875, 875.71435546875, 318.950439453125, -19.134567260742188, 286.0666809082031, 266.3393859863281, 165.9750518798828, 105.85626220703125, 449.3967590332031, 455.7801513671875, 132.50729370117188, 396.4590759277344, 266.206298828125, 995.7581787109375, 338.548583984375, -512.6300048828125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000476.npy"}
{"epoch": 0.6989720998531571, "step": 477, "batch_size": 64, "mean": 330.65277099609375, "std": 538.8142700195312, "min": -1508.5948486328125, "p10": -229.27567443847656, "median": 282.86756896972656, "p90": 996.2602355957033, "max": 1961.3470458984375, "pos_frac": 0.734375, "sample": [-519.9694213867188, 447.7869567871094, 941.2898559570312, -455.0318603515625, 1370.576416015625, 258.165283203125, 290.2372131347656, 870.010986328125, 808.3936767578125, 890.1416625976562, 50.528297424316406, 132.38137817382812, 229.1656951904297, 750.4095458984375, 1019.8189697265625, -91.41658782958984, 651.1187133789062, 405.6667785644531, 1025.3096923828125, -158.44476318359375, 483.8690490722656, 117.5362548828125, -9.065643310546875, 202.9567413330078, -236.14193725585938, 643.46044921875, 378.4495544433594, 154.65798950195312, -37.259674072265625, 7.313417434692383, -478.5591125488281, 468.2229309082031, -1508.5948486328125, -709.498046875, 1219.715087890625, 227.58084106445312, 284.2394714355469, 243.75054931640625, 491.5838623046875, 107.16342163085938, 579.5335693359375, -107.18878173828125, 504.10980224609375, 1078.345703125, 280.8980407714844, 363.133544921875, 323.4304504394531, 860.3954467773438, 528.1719970703125, 208.14508056640625, 1045.5423583984375, -241.10055541992188, -41.342308044433594, 281.49566650390625, -4.8980712890625, 797.6768798828125, 692.173583984375, -165.88357543945312, -10.416107177734375, -213.25439453125, 121.13802337646484, 726.8683471679688, 1961.3470458984375, 625.9373779296875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000477.npy"}
{"epoch": 0.7004405286343612, "step": 478, "batch_size": 64, "mean": 384.75927734375, "std": 478.1852722167969, "min": -718.9363403320312, "p10": -177.3697692871093, "median": 367.1163635253906, "p90": 1132.9776000976565, "max": 1603.18310546875, "pos_frac": 0.796875, "sample": [-203.3673858642578, 219.91334533691406, 653.298095703125, 74.62397003173828, 30.335289001464844, 419.70892333984375, 300.21685791015625, -331.17266845703125, 427.1461181640625, -0.563201904296875, 384.19677734375, 502.34161376953125, -591.5623168945312, -310.93756103515625, 296.4892578125, 906.3037719726562, 732.48974609375, 1603.18310546875, 443.53082275390625, 164.92501831054688, -219.9306182861328, -62.291412353515625, -369.95269775390625, 255.53402709960938, 403.8348388671875, 1068.0596923828125, 419.39093017578125, 425.9359130859375, -92.23757934570312, 288.2078857421875, 1391.5960693359375, 1035.599609375, 325.51019287109375, 254.161376953125, 475.57037353515625, 315.2519836425781, 1220.648681640625, 1160.799560546875, 645.509521484375, -116.70866394042969, 564.988037109375, -61.136253356933594, 116.822021484375, -91.92813873291016, 613.67529296875, 466.5879821777344, 118.89127349853516, 1227.875732421875, 226.95310974121094, 417.2169189453125, 565.0118408203125, 36.406700134277344, 350.03594970703125, 721.1148071289062, 296.29510498046875, 288.5532531738281, 1277.2310791015625, 599.9103393554688, 1340.1383056640625, -718.9363403320312, 580.7769775390625, 398.2337341308594, 734.8432006835938, 9.442352294921875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000478.npy"}
{"epoch": 0.7019089574155654, "step": 479, "batch_size": 64, "mean": 352.7756042480469, "std": 586.9920043945312, "min": -1471.77099609375, "p10": -245.61933593749998, "median": 281.94525146484375, "p90": 1044.1874389648438, "max": 2694.943603515625, "pos_frac": 0.71875, "sample": [111.31400299072266, 145.31344604492188, 1037.02978515625, 630.83935546875, 931.7460327148438, 995.697265625, -431.7451171875, -223.46914672851562, 261.38824462890625, 47.34869384765625, 774.1842041015625, -157.31744384765625, -255.11227416992188, 302.50225830078125, 758.4493408203125, 256.54803466796875, 187.30247497558594, -111.1199951171875, 1082.906005859375, 486.4950256347656, 219.99984741210938, 456.04376220703125, -136.54615783691406, 434.6670227050781, 2694.943603515625, 1049.27099609375, 635.8587036132812, 683.0149536132812, 686.2799072265625, 505.93267822265625, 1050.5283203125, 239.7235107421875, 229.97845458984375, 847.302978515625, -545.9059448242188, 1521.010498046875, -27.174407958984375, 564.9934692382812, 472.4776306152344, 119.11204528808594, 407.1432189941406, 1047.2550048828125, 620.924560546875, 443.176025390625, -135.99290466308594, -1471.77099609375, 380.04815673828125, -134.60040283203125, -319.0004577636719, -22.017183303833008, 156.09414672851562, -55.395606994628906, -366.7701416015625, -452.98736572265625, 141.17311096191406, -77.3906478881836, 516.9154663085938, -9.50421142578125, 119.3636474609375, 1308.615966796875, 408.01226806640625, 897.7960205078125, 125.83787536621094, 518.9007568359375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000479.npy"}
{"epoch": 0.7033773861967695, "step": 480, "batch_size": 64, "mean": 287.37847900390625, "std": 561.8329467773438, "min": -761.0377807617188, "p10": -442.79411926269523, "median": 318.15386962890625, "p90": 908.5329406738282, "max": 2294.395751953125, "pos_frac": 0.6875, "sample": [-466.3822326660156, 349.85382080078125, -175.79721069335938, 127.41256713867188, 372.8850402832031, 19.607444763183594, 232.69943237304688, 916.9863891601562, 552.1033935546875, 829.7000122070312, 488.6444091796875, 1325.850830078125, 443.3355712890625, -342.44757080078125, -540.6177368164062, -634.501708984375, 661.9155883789062, 760.4163818359375, -88.67123413085938, 369.1019287109375, 23.308616638183594, -199.6055145263672, 17.577041625976562, 450.64556884765625, 428.1086120605469, 361.022705078125, 2294.395751953125, 289.2551574707031, 563.520263671875, -198.18263244628906, 874.781005859375, 884.5740356445312, 888.8082275390625, -236.64256286621094, -527.517578125, 217.35940551757812, 424.6439514160156, -254.87144470214844, 441.20648193359375, -761.0377807617188, -645.352294921875, -624.6075439453125, 597.7823486328125, 1100.616455078125, 1418.20068359375, -154.97219848632812, 347.0525817871094, 559.9180297851562, -24.560420989990234, 423.5220642089844, -8.564376831054688, -387.75518798828125, 184.58734130859375, 177.55258178710938, 138.63888549804688, 8.166034698486328, 157.77880859375, -3.6735572814941406, 634.0112915039062, 1056.8792724609375, 497.38421630859375, -135.8189239501953, 1099.545654296875, 792.4458618164062], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000480.npy"}
{"epoch": 0.7048458149779736, "step": 481, "batch_size": 64, "mean": 454.84710693359375, "std": 539.8869018554688, "min": -758.95361328125, "p10": -184.0277114868164, "median": 404.01060485839844, "p90": 1022.0268798828126, "max": 2057.349609375, "pos_frac": 0.78125, "sample": [2057.349609375, -758.95361328125, 576.0216674804688, 391.21551513671875, -160.8537139892578, 48.85444641113281, -103.82337951660156, 303.85052490234375, 487.2286376953125, 1443.6722412109375, 109.15373992919922, -235.17642211914062, 124.32606506347656, -319.85931396484375, 562.9163208007812, 862.4343872070312, -82.06951904296875, 674.6885986328125, 1371.54736328125, -37.07206726074219, -21.74908447265625, 791.423095703125, 298.1247253417969, 805.3588256835938, 437.2148132324219, 193.0997314453125, 215.46954345703125, 1039.4239501953125, 695.3944091796875, -296.0534973144531, 111.21502685546875, 794.6838989257812, 358.73297119140625, -194.7491455078125, 683.8098754882812, 824.876708984375, -233.9378662109375, 93.25602722167969, 981.4337158203125, 514.1109008789062, -189.66595458984375, 164.66200256347656, 878.7568359375, 876.4664306640625, 55.402198791503906, 155.190673828125, 1711.2540283203125, 266.5793762207031, 1546.0928955078125, 964.3021240234375, 324.68328857421875, -8.582015991210938, 416.8056945800781, 618.7555541992188, 454.94256591796875, 448.583740234375, 167.93942260742188, 861.3040771484375, 799.6134643554688, 717.822998046875, 1501.6021728515625, -170.87181091308594, 309.29949951171875, 832.6859741210938], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000481.npy"}
{"epoch": 0.7063142437591777, "step": 482, "batch_size": 64, "mean": 201.93572998046875, "std": 574.8825073242188, "min": -2012.982666015625, "p10": -464.9453002929687, "median": 254.10411071777344, "p90": 884.7923706054693, "max": 1604.280517578125, "pos_frac": 0.734375, "sample": [201.85794067382812, -46.66172790527344, 358.3811950683594, -808.2818603515625, 381.9813232421875, 539.5897216796875, -1050.113525390625, 549.8828125, 612.7811279296875, 414.7802429199219, 362.8612976074219, 585.1524047851562, -10.487930297851562, 268.423828125, 762.7069702148438, 1003.9684448242188, 239.78439331054688, -419.55792236328125, 361.5860595703125, 408.99151611328125, 2.756237030029297, 414.1716003417969, 283.8587646484375, 941.0927124023438, -911.51416015625, 299.46966552734375, 147.78924560546875, 943.1541748046875, 279.1290283203125, 397.524658203125, 594.212646484375, 635.404296875, 723.52734375, 180.36727905273438, -347.9298400878906, 180.53244018554688, 937.1146850585938, -488.6416931152344, 1291.2486572265625, -290.0213623046875, 678.1039428710938, -42.40727996826172, -178.47552490234375, 37.822547912597656, 169.41151428222656, 76.70126342773438, -2012.982666015625, 342.85528564453125, 1604.280517578125, 29.49573516845703, 26.39129638671875, 16.235092163085938, -1.1961669921875, 420.32073974609375, -797.3622436523438, 361.21636962890625, 132.19781494140625, 1129.2569580078125, -121.3306884765625, 72.29325866699219, -198.27426147460938, 1.7289047241210938, 731.1287841796875, -484.39703369140625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000482.npy"}
{"epoch": 0.7077826725403817, "step": 483, "batch_size": 64, "mean": 463.0313415527344, "std": 571.2857666015625, "min": -546.8448486328125, "p10": -255.9297119140625, "median": 374.3201904296875, "p90": 1178.6426513671877, "max": 2914.45361328125, "pos_frac": 0.828125, "sample": [-176.13526916503906, 34.405426025390625, 1285.283935546875, 779.45947265625, -257.7547607421875, 736.2674560546875, 383.27978515625, 345.2235107421875, 706.673828125, 643.5546875, 381.32696533203125, 789.3057861328125, 480.46612548828125, 289.3272705078125, 293.1533508300781, 213.12644958496094, 932.3329467773438, 968.9888916015625, -251.6712646484375, 515.8341674804688, 297.5370788574219, -55.23125457763672, 635.560546875, 750.3938598632812, 22.436172485351562, -344.6595764160156, 1194.6767578125, 685.5549926757812, 51.91264343261719, 1041.778076171875, -303.8094177246094, 1378.48291015625, 26.541893005371094, 952.7754516601562, 28.573532104492188, 1304.0008544921875, 253.31539916992188, 934.713623046875, 185.9971923828125, 1034.2490234375, 711.4893798828125, 677.9942626953125, 498.21673583984375, -454.152587890625, 208.69873046875, 367.31341552734375, 485.4806823730469, 310.07110595703125, 1141.229736328125, 116.47264099121094, 158.8228759765625, -546.8448486328125, 169.8406982421875, 2914.45361328125, -286.71484375, 218.05027770996094, 1256.7344970703125, 241.7806396484375, 548.8933715820312, -283.1527404785156, 648.4583740234375, 1268.8297119140625, 261.75982666015625, -166.96868896484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000483.npy"}
{"epoch": 0.7092511013215859, "step": 484, "batch_size": 64, "mean": 437.65692138671875, "std": 519.9096069335938, "min": -568.4465942382812, "p10": -79.81718673706054, "median": 369.64207458496094, "p90": 1121.0143676757814, "max": 1753.03955078125, "pos_frac": 0.78125, "sample": [374.7911376953125, -55.43864440917969, 1753.03955078125, 131.1138458251953, -80.89247131347656, -273.5234375, 581.6028442382812, 57.17357635498047, 1683.5413818359375, 247.17840576171875, 88.25276184082031, 502.98199462890625, 554.3893432617188, 636.217041015625, 1129.12451171875, 534.2454223632812, 415.5638122558594, -35.362545013427734, -134.77166748046875, -568.4465942382812, 439.7524719238281, 21.363330841064453, 990.9281005859375, 895.7495727539062, 1736.771484375, 39.36729431152344, 421.07891845703125, 807.5756225585938, 8.552852630615234, -70.59909057617188, -70.49136352539062, -75.6451416015625, 821.815185546875, 1196.0203857421875, 64.6624526977539, 236.0233154296875, 326.754638671875, 141.85157775878906, 163.0188751220703, -190.8256072998047, -77.30818939208984, 293.4895935058594, 869.6912231445312, 393.8911437988281, 1252.60791015625, 53.669189453125, 506.3015441894531, 1102.0906982421875, 406.5600891113281, -194.23104858398438, -54.50553894042969, 1048.012451171875, 1089.9085693359375, 741.2157592773438, 88.03215026855469, 1213.925537109375, 994.80810546875, 364.4930114746094, 536.0364379882812, 143.51715087890625, 886.1005249023438, 65.82347106933594, -134.91053771972656, 976.3196411132812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000484.npy"}
{"epoch": 0.71071953010279, "step": 485, "batch_size": 64, "mean": 397.8016357421875, "std": 571.6553955078125, "min": -913.4813842773438, "p10": -160.60449905395504, "median": 267.341796875, "p90": 1095.3114379882813, "max": 2097.721923828125, "pos_frac": 0.78125, "sample": [623.5458374023438, 681.9373779296875, -50.98442077636719, 226.50970458984375, 419.35638427734375, 76.02518463134766, 1113.1260986328125, 199.71859741210938, 425.7276611328125, 481.6974182128906, 165.16049194335938, -77.32141876220703, 1523.155029296875, 1279.0302734375, 161.27886962890625, 1053.743896484375, -627.4046630859375, -10.059335708618164, 972.0137939453125, 1013.0303955078125, 996.81640625, 1189.5560302734375, 134.73196411132812, 595.9534912109375, 2097.721923828125, 1156.3441162109375, -35.18657684326172, 10.102106094360352, 662.0007934570312, -114.86312103271484, 1.2436752319335938, -51.44110107421875, -601.7845458984375, 156.3525390625, 231.4989013671875, 919.581787109375, 468.2005615234375, 122.58112335205078, 173.57925415039062, 747.8643798828125, -99.67645263671875, 135.2779541015625, 1904.6597900390625, 509.75042724609375, -180.20794677734375, 364.3299865722656, 387.9840087890625, 602.2899780273438, 336.2763671875, -558.0059204101562, 989.5169067382812, 972.70849609375, 51.01102066040039, -209.50930786132812, 53.61982727050781, -200.9530487060547, 914.1300048828125, 279.1737976074219, 325.4147033691406, 178.4888916015625, 13.708625793457031, -913.4813842773438, 837.1452026367188, 255.50979614257812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000485.npy"}
{"epoch": 0.7121879588839941, "step": 486, "batch_size": 64, "mean": 461.8829345703125, "std": 668.54931640625, "min": -1106.2528076171875, "p10": -344.7062194824219, "median": 420.14320373535156, "p90": 1356.4958007812502, "max": 2784.16796875, "pos_frac": 0.78125, "sample": [1001.4332275390625, 569.0947875976562, 10.989761352539062, 611.250244140625, 362.6068420410156, -799.9442138671875, 94.19448852539062, 257.9485168457031, 975.9182739257812, -166.10415649414062, 525.8878784179688, 1413.5623779296875, 613.4795532226562, -732.2930297851562, -194.9620361328125, 1401.20166015625, 958.80029296875, 1028.4044189453125, -335.78228759765625, 1011.7744140625, 1297.4310302734375, -371.9197998046875, 1378.763671875, 1370.486083984375, 325.98187255859375, -200.89132690429688, 469.57794189453125, 405.0371398925781, 75.22396850585938, 1472.6195068359375, 1044.197265625, 442.45721435546875, -348.53076171875, -685.4366455078125, 365.14300537109375, 381.93731689453125, 146.458251953125, 1416.3160400390625, 892.7124633789062, 820.9486083984375, 254.61691284179688, 805.9544067382812, 435.249267578125, -148.69927978515625, 573.1900024414062, 854.514404296875, 908.0281982421875, 648.3021850585938, 1018.3622436523438, 1323.851806640625, 178.79940795898438, 356.974609375, 34.824554443359375, 294.2249755859375, 719.6261596679688, 90.20387268066406, 21.428564071655273, 474.2726135253906, -1106.2528076171875, -4.3893280029296875, 369.4547424316406, -52.64935302734375, -579.5220947265625, 2784.16796875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000486.npy"}
{"epoch": 0.7136563876651982, "step": 487, "batch_size": 64, "mean": 327.4232177734375, "std": 535.3898315429688, "min": -1771.17333984375, "p10": -255.52739410400386, "median": 278.14068603515625, "p90": 1008.5490905761719, "max": 1800.8363037109375, "pos_frac": 0.765625, "sample": [365.7568359375, 1134.9862060546875, 197.74371337890625, -278.6972351074219, 952.349609375, 1002.1861572265625, 724.6268310546875, 449.054443359375, 1547.1229248046875, -111.16979217529297, 183.3931884765625, -501.0140380859375, 1055.032470703125, -44.902442932128906, 246.0203857421875, 1800.8363037109375, 273.14837646484375, -313.6291809082031, 786.0761108398438, 658.3468627929688, 389.02911376953125, 603.9383544921875, -1771.17333984375, 99.30204010009766, -64.13060760498047, 701.814453125, -21.514564514160156, 1105.39892578125, 1245.8526611328125, -29.35929298400879, 172.6682586669922, 362.9296569824219, 338.7606201171875, 435.8677978515625, 210.33889770507812, -490.07073974609375, 245.29995727539062, -446.6034240722656, 552.8251342773438, 201.34591674804688, 388.49847412109375, 68.9239501953125, 498.42572021484375, -201.4644317626953, 497.02423095703125, 175.01217651367188, 2.6731414794921875, 283.13299560546875, 418.25335693359375, 225.98870849609375, 122.31480407714844, 590.8965454101562, 255.7722625732422, 418.747802734375, -20.494735717773438, -381.20916748046875, 225.01669311523438, 1011.2760620117188, 781.0172729492188, 645.0098876953125, 0.466094970703125, 535.3565673828125, -42.649078369140625, 487.3072509765625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000487.npy"}
{"epoch": 0.7151248164464024, "step": 488, "batch_size": 64, "mean": 355.3664855957031, "std": 464.1176452636719, "min": -545.3031616210938, "p10": -130.6131423950195, "median": 371.33111572265625, "p90": 1090.7973022460938, "max": 1703.001708984375, "pos_frac": 0.734375, "sample": [-3.4319229125976562, 34.85867691040039, 1397.946533203125, 267.12530517578125, 1083.9378662109375, 217.52610778808594, 665.826904296875, -79.75556182861328, -181.62014770507812, 344.1976318359375, 586.62060546875, 227.1948699951172, 637.8466186523438, 1093.737060546875, 439.65728759765625, 818.591796875, 24.744903564453125, 48.470314025878906, -437.9630126953125, -117.2584228515625, 611.2483520507812, -57.186180114746094, 502.2010498046875, 444.1151428222656, -6.9018707275390625, -280.8353271484375, 89.33493041992188, 591.6044311523438, 1703.001708984375, 170.2427215576172, 413.34075927734375, 570.8626098632812, -545.3031616210938, 1130.3599853515625, 747.6840209960938, -9.719684600830078, 1238.443359375, 446.4223327636719, 354.6892395019531, 76.58673858642578, 577.9059448242188, 877.5765380859375, 255.42291259765625, 388.93011474609375, 43.47080993652344, 387.9729919433594, 502.25762939453125, 1284.46630859375, -99.12775421142578, -72.4595718383789, -136.3365936279297, 478.4658508300781, 409.3176574707031, 67.74576568603516, 411.08612060546875, 502.7038269042969, -40.52711486816406, 449.9692687988281, -28.816303253173828, -198.40924072265625, -454.51885986328125, 509.6824645996094, 247.7322998046875, 1120.4986572265625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000488.npy"}
{"epoch": 0.7165932452276065, "step": 489, "batch_size": 64, "mean": 344.5076599121094, "std": 517.2615966796875, "min": -574.5018920898438, "p10": -160.89617156982422, "median": 202.44287872314453, "p90": 1162.1992309570314, "max": 1804.04931640625, "pos_frac": 0.734375, "sample": [784.694091796875, 45.50245666503906, 287.7625732421875, 108.39521789550781, -7.542032241821289, 141.9332733154297, 179.34971618652344, -245.63827514648438, 96.73118591308594, 172.86065673828125, -250.9886016845703, 103.76449584960938, 536.557861328125, 693.5718383789062, 472.0820007324219, 264.21923828125, -148.5387725830078, 202.03021240234375, -108.37586212158203, 922.5093383789062, -6.573286056518555, -50.576904296875, 257.2799072265625, 116.58873748779297, 107.66061401367188, 142.5588836669922, 227.62461853027344, -67.6756591796875, 202.8555450439453, 549.3973388671875, -166.19219970703125, 47.521240234375, -44.040618896484375, -267.69549560546875, 1341.159912109375, 579.3942260742188, 45.7153434753418, 105.37986755371094, 266.2413330078125, -574.5018920898438, 665.1445922851562, -355.3037414550781, 1408.671142578125, -455.74676513671875, 1169.97802734375, 1144.0487060546875, 585.3287963867188, 1804.04931640625, 316.7216796875, 582.3145751953125, 1796.4239501953125, 658.645751953125, 443.6982421875, 1323.18310546875, -117.61739349365234, -31.52252769470215, 719.769287109375, 501.3357849121094, 447.6699523925781, 366.49676513671875, 0.320098876953125, 1418.0333251953125, 642.530029296875, -48.68463134765625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000489.npy"}
{"epoch": 0.7180616740088106, "step": 490, "batch_size": 64, "mean": 357.92462158203125, "std": 647.6117553710938, "min": -1214.2685546875, "p10": -279.8762786865234, "median": 182.47476196289062, "p90": 1196.5278442382814, "max": 2135.585205078125, "pos_frac": 0.78125, "sample": [73.9476318359375, -1214.2685546875, 786.4807739257812, -178.3795623779297, 144.39810180664062, -20.537193298339844, 104.94610595703125, 829.7478637695312, 841.139892578125, -586.871826171875, 75.8570556640625, -3.556884765625, 1264.320556640625, 59.576812744140625, 498.3343505859375, -300.7834167480469, -197.378173828125, 1137.4044189453125, 573.3225708007812, -739.1712646484375, 46.27738571166992, 137.76040649414062, -681.25048828125, 147.67579650878906, -1122.5570068359375, 1315.447998046875, 110.67324829101562, 551.4498901367188, 241.80844116210938, 741.5464477539062, 331.594482421875, 2135.585205078125, 71.489990234375, 686.4675903320312, 47.512657165527344, -139.4002685546875, 1682.5838623046875, 929.5266723632812, 1609.89208984375, 27.484516143798828, -40.84246826171875, 159.17691040039062, 1221.866455078125, 774.8722534179688, 44.37498474121094, 903.5370483398438, -231.09295654296875, 754.2369384765625, 43.1285400390625, 449.45843505859375, 335.26263427734375, -545.9146728515625, 1650.693603515625, 21.67431640625, 936.363525390625, 959.7383422851562, 21.62152099609375, 205.77261352539062, 631.165283203125, 927.5366821289062, 55.99264144897461, 919.96484375, 439.97021484375, 248.51788330078125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000490.npy"}
{"epoch": 0.7195301027900147, "step": 491, "batch_size": 64, "mean": 246.2215118408203, "std": 460.2898864746094, "min": -779.464599609375, "p10": -325.44468383789064, "median": 171.6578826904297, "p90": 849.4108825683596, "max": 1551.242919921875, "pos_frac": 0.765625, "sample": [291.10693359375, -341.5408630371094, 701.0042724609375, 513.1927490234375, 1365.582275390625, 130.12205505371094, 364.4355773925781, -92.55393981933594, 403.229736328125, 99.47332763671875, 360.1089782714844, 96.77467346191406, 19.578590393066406, -485.14385986328125, 438.2251892089844, 103.90101623535156, 747.6025390625, 415.9700622558594, 22.808547973632812, 366.2809143066406, 210.47372436523438, 6.829141616821289, 1013.12890625, -363.8639831542969, 224.7515106201172, 284.0118408203125, 705.06591796875, -212.61532592773438, 870.0799560546875, 20.84006690979004, 917.3035888671875, 335.8108825683594, -779.464599609375, 51.547142028808594, -313.394775390625, 281.6912536621094, 0.8035812377929688, 1056.26708984375, 306.908203125, 315.59283447265625, 619.1659545898438, -329.09710693359375, 1551.242919921875, 124.32656860351562, -194.6577606201172, 132.842041015625, -316.92236328125, 220.29051208496094, -51.372222900390625, 674.38671875, 97.63082885742188, -405.480712890625, 710.0783081054688, -130.5460968017578, 44.47545623779297, 125.83341979980469, 298.16705322265625, 801.1830444335938, 112.53382873535156, -42.288116455078125, 1278.668701171875, 103.31047821044922, 426.8016357421875, -544.322265625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000491.npy"}
{"epoch": 0.7209985315712188, "step": 492, "batch_size": 64, "mean": 248.42079162597656, "std": 554.1001586914062, "min": -912.5675659179688, "p10": -464.09756774902337, "median": 180.7614517211914, "p90": 1098.43115234375, "max": 1332.4473876953125, "pos_frac": 0.703125, "sample": [226.53570556640625, 306.26361083984375, 769.5335083007812, 10.573419570922852, 255.131591796875, 24.17982292175293, 28.272796630859375, 747.330322265625, -586.2039794921875, -120.96684265136719, 244.03530883789062, 620.966552734375, 711.981201171875, -203.80615234375, -836.4618530273438, 916.234130859375, -97.56497955322266, 1179.678955078125, -224.16558837890625, 1332.4473876953125, -499.52752685546875, 1299.3218994140625, 224.89761352539062, -419.7950744628906, 336.71649169921875, 313.9280090332031, 249.75595092773438, -483.0843505859375, 99.55693054199219, 319.3598327636719, 46.6456298828125, 4.933507919311523, 101.25334930419922, -912.5675659179688, -74.30120849609375, 1182.0533447265625, 836.2469482421875, 166.83544921875, -679.693115234375, 269.12530517578125, 1074.715576171875, -763.099365234375, 1292.705810546875, 41.72478485107422, -142.1069793701172, 702.528564453125, 194.6874542236328, 104.35523986816406, 949.8356323242188, -20.84396743774414, 445.7402038574219, -118.38157653808594, 140.1923828125, -394.6930236816406, 1108.594970703125, 889.6695556640625, 607.6915893554688, -14.37060546875, 50.0815315246582, 585.8822021484375, -202.54635620117188, 1284.8258056640625, 30.79383087158203, 365.29058837890625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000492.npy"}
{"epoch": 0.7224669603524229, "step": 493, "batch_size": 64, "mean": 267.0850524902344, "std": 502.16162109375, "min": -1827.5435791015625, "p10": -190.1523284912109, "median": 215.08150482177734, "p90": 970.537176513672, "max": 1273.7347412109375, "pos_frac": 0.734375, "sample": [1061.621826171875, 100.72808074951172, 493.2388000488281, 247.90740966796875, -93.8759765625, 1194.7537841796875, 791.3803100585938, -82.04153442382812, 123.44930267333984, -407.30108642578125, 459.56365966796875, 89.64610290527344, 912.6609497070312, 442.01287841796875, 415.24456787109375, 148.94125366210938, -204.23263549804688, 322.9626770019531, 117.53851318359375, 42.66508483886719, 197.5273895263672, 280.4121398925781, 496.77264404296875, 483.25823974609375, -157.29827880859375, 421.91326904296875, 652.76416015625, -135.74789428710938, -232.22979736328125, -137.92840576171875, 93.60040283203125, -1827.5435791015625, 315.7279052734375, 4.4128570556640625, 801.8851318359375, -5.792577743530273, 1273.7347412109375, 1250.47802734375, 142.6937255859375, -244.9862823486328, -153.21804809570312, 365.32696533203125, 1122.80224609375, 743.3271484375, 323.0738220214844, 947.86181640625, -450.3208923339844, 365.15216064453125, 1250.650146484375, 980.2551879882812, 134.17483520507812, 489.1935729980469, -237.1181182861328, 233.4500732421875, 232.6356201171875, -112.17542266845703, -96.74055480957031, 281.3785095214844, 581.013427734375, 118.59992980957031, 120.77277374267578, 23.42498779296875, -71.07916259765625, 50.48512268066406], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000493.npy"}
{"epoch": 0.723935389133627, "step": 494, "batch_size": 64, "mean": 440.0100402832031, "std": 470.88873291015625, "min": -614.50390625, "p10": -69.73050613403319, "median": 443.00628662109375, "p90": 984.0774353027344, "max": 1782.75244140625, "pos_frac": 0.8125, "sample": [-2.4453258514404297, 299.59173583984375, 544.6571655273438, 368.6108703613281, 1782.75244140625, 1323.35595703125, -614.50390625, 709.4755249023438, 725.3865966796875, 22.143735885620117, 474.0042419433594, -52.63893508911133, 860.423583984375, 488.82415771484375, -270.118408203125, 1084.8280029296875, 662.1320190429688, -187.66033935546875, 496.5523986816406, 772.76611328125, 143.37777709960938, -282.53875732421875, 283.3856506347656, 931.4600219726562, 838.2919921875, 50.835182189941406, 864.1170043945312, 996.629150390625, 246.26661682128906, 338.642822265625, -541.4551391601562, 42.30397033691406, 407.3316345214844, -240.21826171875, 1621.9483642578125, 435.7967224121094, 504.19842529296875, -24.724830627441406, 485.62982177734375, 1227.2364501953125, 954.7901000976562, 506.550048828125, -72.49928283691406, -63.27002716064453, 449.2275390625, 651.955810546875, 566.5316162109375, 875.274658203125, 430.2044982910156, 772.6392822265625, -25.886823654174805, 324.736083984375, 515.991455078125, 114.84286499023438, 160.00680541992188, 471.5126953125, 800.868896484375, 625.9657592773438, 217.0943603515625, 436.7850341796875, 1090.6483154296875, 82.49864196777344, 408.7584533691406, 48.763641357421875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000494.npy"}
{"epoch": 0.7254038179148311, "step": 495, "batch_size": 64, "mean": 384.21527099609375, "std": 566.7551879882812, "min": -974.0975952148438, "p10": -213.78501434326165, "median": 305.3529052734375, "p90": 1022.8670532226563, "max": 2350.697021484375, "pos_frac": 0.796875, "sample": [386.0196533203125, 87.72819519042969, -61.32513427734375, -115.11090087890625, 229.392822265625, 351.47479248046875, -431.94317626953125, 141.97662353515625, 234.49517822265625, 30.551162719726562, 187.97401428222656, 741.7037353515625, 840.217529296875, -579.7481079101562, 395.756591796875, 457.0045471191406, -278.8469543457031, 852.7516479492188, 78.67425537109375, 755.76611328125, 143.350341796875, -78.09915161132812, 268.5276184082031, 44.056636810302734, 409.2414245605469, 1132.674560546875, 1023.2772827148438, 990.56005859375, 215.29714965820312, 443.38555908203125, 357.218994140625, 338.69635009765625, 120.99401092529297, 453.93194580078125, 958.7442626953125, 1021.9098510742188, 900.8873291015625, -242.06520080566406, 2350.697021484375, 1147.244140625, 409.484130859375, 41.58369445800781, 141.34896850585938, 462.98797607421875, 268.5692443847656, -597.900634765625, 23.821151733398438, 384.69525146484375, 104.78258514404297, -281.8714599609375, -147.79791259765625, -7.74456787109375, 402.91229248046875, 488.342041015625, 1371.494873046875, 597.4414672851562, 264.9151306152344, 832.1442260742188, -974.0975952148438, 1252.762451171875, 1014.0076904296875, 272.00946044921875, 1984.87451171875, -24.029563903808594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000495.npy"}
{"epoch": 0.7268722466960352, "step": 496, "batch_size": 64, "mean": 192.86068725585938, "std": 435.7102355957031, "min": -960.7478637695312, "p10": -319.08278503417966, "median": 130.1223907470703, "p90": 847.0624572753907, "max": 1106.7967529296875, "pos_frac": 0.75, "sample": [75.96487426757812, -451.1839599609375, 967.4661865234375, 953.0164794921875, -666.9694213867188, 715.1087036132812, 96.66414642333984, 229.07623291015625, 762.837158203125, 106.88369750976562, 1106.7967529296875, 266.40789794921875, 394.0683898925781, 238.83609008789062, -249.2757568359375, -107.13336181640625, -238.62969970703125, 416.29571533203125, -669.1451416015625, 973.4597778320312, 911.6593017578125, 353.6008605957031, 857.2901611328125, 382.2549133300781, 928.9791870117188, 134.3023681640625, -84.60801696777344, 19.176254272460938, 70.46064758300781, -330.7548522949219, 242.65399169921875, 168.6058807373047, 97.01620483398438, 409.49639892578125, 109.38390350341797, -128.5511932373047, 441.24591064453125, 155.15597534179688, -617.9360961914062, 211.49159240722656, -477.9853515625, -291.84796142578125, 683.00830078125, 37.781349182128906, 152.2954864501953, 548.2564086914062, 115.96321105957031, 428.5756530761719, 597.5149536132812, 388.7662658691406, 125.94241333007812, -192.33311462402344, 333.91192626953125, 45.14971160888672, 107.65054321289062, -960.7478637695312, 58.56462478637695, 823.1978149414062, -7.451667785644531, -54.569053649902344, 526.5178833007812, 16.844257354736328, 78.96721649169922, 7.64385986328125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000496.npy"}
{"epoch": 0.7283406754772394, "step": 497, "batch_size": 64, "mean": 201.83465576171875, "std": 454.5106201171875, "min": -928.9949951171875, "p10": -238.42115325927733, "median": 142.52951049804688, "p90": 767.994714355469, "max": 1237.8441162109375, "pos_frac": 0.671875, "sample": [-284.5303649902344, 797.9846801757812, 624.6524047851562, 698.0181274414062, 10.453290939331055, -208.06158447265625, 498.810791015625, -197.69471740722656, -95.93013000488281, -214.9938507080078, 215.60533142089844, 956.1595458984375, 1153.699462890625, 430.78643798828125, 46.02574920654297, 128.58154296875, 381.3705139160156, 163.7974090576172, 676.3784790039062, 474.2948913574219, -16.404098510742188, -75.31450653076172, 594.3760986328125, 973.5455322265625, -78.40641784667969, 4.298871994018555, 696.819091796875, -185.96839904785156, 64.36155700683594, -609.9327392578125, 452.834228515625, 83.68116760253906, 6.508014678955078, 349.52978515625, 69.22259521484375, 1098.5423583984375, -53.272705078125, 1237.8441162109375, 86.66361999511719, 67.38812255859375, 95.6791000366211, 268.7791748046875, 295.77154541015625, 692.670166015625, -928.9949951171875, 156.47747802734375, 602.9578247070312, -376.4892883300781, 352.1653137207031, 172.44451904296875, 453.3638000488281, 265.04437255859375, 950.6557006835938, -179.5990447998047, -149.58860778808594, 249.90969848632812, -199.3613739013672, -248.46142578125, -114.03852844238281, -913.0009155273438, 637.60009765625, -488.8623046875, 327.2329406738281, -26.661968231201172], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000497.npy"}
{"epoch": 0.7298091042584435, "step": 498, "batch_size": 64, "mean": 325.12445068359375, "std": 549.199951171875, "min": -1292.3760986328125, "p10": -257.746011352539, "median": 338.01458740234375, "p90": 980.6428283691407, "max": 1478.1806640625, "pos_frac": 0.703125, "sample": [-776.2730712890625, 302.638671875, 640.3992919921875, 119.29324340820312, -193.14035034179688, 718.8553466796875, 659.957763671875, 962.6658935546875, 277.9640197753906, 755.8065185546875, 448.0621643066406, 763.5955810546875, 245.38302612304688, 823.5580444335938, -1292.3760986328125, 673.1600952148438, 802.9500122070312, 1294.1966552734375, 88.73725128173828, 609.4166870117188, 159.70391845703125, 729.816650390625, -97.42269897460938, 1028.4013671875, -244.52639770507812, 325.3729553222656, 388.1900939941406, 51.68201446533203, -383.54364013671875, -10.893913269042969, 282.4097595214844, 360.7872314453125, -1147.81640625, 1230.8529052734375, 497.4577941894531, -14.0628662109375, -145.76727294921875, 384.81787109375, 623.51904296875, 455.93206787109375, 1384.63134765625, 8.938423156738281, -23.35479736328125, 121.84822082519531, 624.3543090820312, 619.9320678710938, -412.3182678222656, -17.57489013671875, 1105.10546875, 929.9725341796875, 261.10028076171875, -263.41156005859375, 350.6562194824219, -105.46100616455078, -236.9285125732422, -18.991424560546875, 489.5926513671875, 184.14556884765625, 914.0012817382812, 467.85455322265625, -306.0578918457031, 988.3472290039062, 1478.1806640625, -136.35891723632812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000498.npy"}
{"epoch": 0.7312775330396476, "step": 499, "batch_size": 64, "mean": 409.18572998046875, "std": 552.010009765625, "min": -1015.0634155273438, "p10": -168.31645965576172, "median": 441.86834716796875, "p90": 1020.6643981933596, "max": 2125.2568359375, "pos_frac": 0.78125, "sample": [961.209716796875, 87.06172180175781, 201.75384521484375, -1015.0634155273438, -42.08800506591797, 566.1241455078125, 496.3016052246094, 648.5924682617188, -54.15699005126953, -113.45449829101562, 604.1595458984375, 435.44146728515625, 115.42882537841797, 1044.0465087890625, 582.2423095703125, 157.73245239257812, 671.7052612304688, -682.1181030273438, 2125.2568359375, 276.91632080078125, 167.25732421875, 966.1061401367188, 694.7462158203125, 45.07958221435547, -661.99267578125, 571.5123291015625, 297.5003662109375, 903.943359375, -247.6161651611328, 1189.53857421875, 237.26992797851562, -19.921875, 589.6522216796875, -114.84207916259766, 305.5642395019531, 68.91968536376953, 573.206298828125, 909.2830810546875, 884.4007568359375, -217.6891326904297, 37.24879455566406, 487.63262939453125, 588.9071044921875, 596.3389282226562, 601.8440551757812, 206.18405151367188, 645.8289184570312, 1742.60107421875, 1362.1436767578125, 141.28182983398438, 397.6416320800781, 732.736083984375, 448.29522705078125, 832.8165283203125, -79.90898132324219, 804.2275390625, 1239.2720947265625, 322.6146240234375, -586.5762939453125, -160.31761169433594, -171.74453735351562, 628.3623657226562, 52.524620056152344, 1108.922119140625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000499.npy"}
{"epoch": 0.7327459618208517, "step": 500, "batch_size": 64, "mean": 335.6067810058594, "std": 526.68408203125, "min": -1034.2469482421875, "p10": -358.8132781982421, "median": 274.5812072753906, "p90": 949.547479248047, "max": 1649.2652587890625, "pos_frac": 0.796875, "sample": [228.04244995117188, -170.3079071044922, -644.5598754882812, 1649.2652587890625, -553.2780151367188, 507.7237243652344, 1021.4234619140625, 436.2909240722656, 763.7357788085938, 47.00066375732422, 846.94873046875, 710.4366455078125, 708.771484375, -436.0370178222656, 71.67562866210938, 482.5276794433594, 46.29481506347656, 117.196533203125, 290.85504150390625, 1214.97412109375, 608.3241577148438, -29.765914916992188, 644.413818359375, 1245.841064453125, -44.80554962158203, 771.956787109375, -406.6663818359375, 792.2446899414062, 909.2356567382812, -722.5237426757812, 805.5228881835938, 884.48681640625, 25.048202514648438, 98.00939178466797, 75.86273956298828, 731.6220703125, -595.7510986328125, 163.87957763671875, 966.823974609375, -1034.2469482421875, -152.5603790283203, 144.1389617919922, 208.1214599609375, 602.5697631835938, 105.55228424072266, -141.25848388671875, 513.6807250976562, 77.92876434326172, 92.7252197265625, 258.307373046875, 1142.84765625, 840.8909301757812, 761.533447265625, 195.62924194335938, 569.9344482421875, 422.3311462402344, 637.729736328125, 326.2783203125, 27.78682518005371, 550.2313842773438, 128.84104919433594, 1153.10302734375, -247.15603637695312, 31.154983520507812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000500.npy"}
{"epoch": 0.7342143906020558, "step": 501, "batch_size": 64, "mean": 367.0113830566406, "std": 557.9096069335938, "min": -999.5001220703125, "p10": -304.60113220214845, "median": 366.0848693847656, "p90": 1098.7266357421877, "max": 1780.4534912109375, "pos_frac": 0.765625, "sample": [-47.972816467285156, 379.33392333984375, 373.7471923828125, -365.1366271972656, -53.163028717041016, 1255.3670654296875, 224.4400177001953, 986.1767578125, 291.06622314453125, 58.207977294921875, 164.21408081054688, -224.20382690429688, 358.42254638671875, 192.19068908691406, 1780.4534912109375, 667.6639404296875, 1054.467529296875, 384.2840576171875, 388.94842529296875, 933.0306396484375, 1117.69482421875, -1.1287078857421875, 340.56781005859375, 173.10830688476562, 816.8910522460938, -490.6345520019531, -627.8881225585938, 356.51397705078125, 1010.3839111328125, 529.9506225585938, -787.005126953125, 210.85415649414062, 639.979248046875, 446.95703125, 447.44580078125, 1316.5054931640625, -184.0974578857422, 570.963623046875, 317.2588195800781, 890.4647827148438, 1153.6824951171875, 11.873771667480469, -169.4547576904297, -303.9028625488281, 613.785400390625, 146.67889404296875, 68.33041381835938, 570.09228515625, 93.69839477539062, -999.5001220703125, 389.5252685546875, 579.40771484375, -304.900390625, -124.65933227539062, 13.233280181884766, -449.34326171875, 1020.3427124023438, 864.392578125, 1203.126953125, 453.5748596191406, 1656.5291748046875, 554.071044921875, 470.2105712890625, 81.6102294921875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000501.npy"}
{"epoch": 0.73568281938326, "step": 502, "batch_size": 64, "mean": 359.60321044921875, "std": 432.7370300292969, "min": -593.5708618164062, "p10": -56.113219451904286, "median": 307.2708435058594, "p90": 924.7134277343752, "max": 2050.32568359375, "pos_frac": 0.84375, "sample": [597.432373046875, -277.48980712890625, 842.0557250976562, 523.0380249023438, 57.656982421875, 628.0523681640625, -128.61935424804688, -59.4864501953125, 1173.0694580078125, 371.6633605957031, 399.5777587890625, 226.38973999023438, 320.07086181640625, 1031.253173828125, 873.6624755859375, 21.603759765625, -6.677001953125, 292.3294982910156, -593.5708618164062, 118.28900146484375, 660.1940307617188, 984.5411376953125, 111.30461883544922, 410.0502624511719, 34.65702819824219, 161.59263610839844, 103.36964416503906, 367.4023132324219, 430.419189453125, 270.19354248046875, -271.954833984375, 1089.6466064453125, 0.13201141357421875, 471.7693786621094, 421.5292053222656, 878.8197631835938, -97.44203186035156, 2050.32568359375, 23.884872436523438, 21.01072883605957, 676.93505859375, 40.14073181152344, 271.99322509765625, 347.30059814453125, 365.49169921875, 571.5335083007812, 1207.9058837890625, 14.533395767211914, 655.665283203125, 452.795166015625, 769.1292724609375, 37.95292663574219, -11.361017227172852, 111.04067993164062, 944.3821411132812, -114.0374755859375, 309.31768798828125, 69.42776489257812, 709.463134765625, 305.2239990234375, -48.242347717285156, 352.451416015625, 139.96730041503906, 303.8485107421875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000502.npy"}
{"epoch": 0.737151248164464, "step": 503, "batch_size": 64, "mean": 330.55023193359375, "std": 441.4797668457031, "min": -341.0614013671875, "p10": -111.59978866577148, "median": 214.36455535888672, "p90": 852.3300231933595, "max": 1993.9251708984375, "pos_frac": 0.78125, "sample": [0.4543266296386719, -60.20879364013672, 904.05859375, 63.68846130371094, -123.28565979003906, -179.64425659179688, 492.95709228515625, 273.9898986816406, 81.67383575439453, -98.24321746826172, -341.0614013671875, 333.61883544921875, -105.7298355102539, 213.11351013183594, 939.0984497070312, 755.7393188476562, 332.58966064453125, 785.8555908203125, 51.00396728515625, 659.102294921875, -100.31990051269531, 111.43659973144531, 59.14030456542969, 65.18518829345703, 405.5272216796875, -39.66764831542969, -145.83935546875, 579.4059448242188, 214.74093627929688, 132.7391357421875, 407.3246765136719, -30.6871337890625, 107.25892639160156, 590.0140380859375, 23.949363708496094, -299.52679443359375, 1993.9251708984375, 1671.2647705078125, 545.987060546875, 375.7431335449219, 139.65975952148438, 648.3549194335938, 700.9312744140625, 36.39165496826172, -108.6041259765625, 562.4580078125, 97.5630874633789, 820.2509765625, 206.08395385742188, 388.53271484375, 154.87075805664062, 569.9366455078125, 449.03594970703125, 213.98817443847656, 1014.6806030273438, 578.96044921875, -112.8836441040039, 734.0887451171875, -263.8288269042969, 1092.63671875, 226.90673828125, 446.40478515625, 866.0781860351562, 46.343536376953125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000503.npy"}
{"epoch": 0.7386196769456681, "step": 504, "batch_size": 64, "mean": 390.8580017089844, "std": 527.0880126953125, "min": -653.5833740234375, "p10": -330.08172302246095, "median": 347.6206817626953, "p90": 961.3849548339845, "max": 1679.953857421875, "pos_frac": 0.765625, "sample": [-23.397613525390625, 138.24087524414062, 942.7780151367188, 378.11883544921875, 99.85377502441406, 291.8057556152344, 650.45947265625, -230.4228515625, 1631.37841796875, 1223.877685546875, 1679.953857421875, -503.93585205078125, 518.4891357421875, 631.24267578125, -52.49107360839844, -68.69825744628906, 650.7239379882812, 203.57272338867188, 17.99407196044922, -28.367156982421875, 5.541862487792969, -38.071144104003906, 393.59393310546875, -320.0919189453125, 225.21519470214844, 283.56292724609375, -359.14801025390625, 568.7589721679688, 163.67044067382812, 369.47998046875, -334.3630676269531, 911.4457397460938, -480.6991882324219, 1319.4720458984375, 850.2247314453125, 697.4116821289062, 825.995849609375, 554.3223266601562, 173.53500366210938, 335.7716064453125, 112.96185302734375, 1149.9776611328125, 802.1846313476562, 490.5853576660156, 798.40966796875, -135.49026489257812, -451.106201171875, 857.9602661132812, 28.548540115356445, 967.616455078125, 172.13180541992188, 689.730224609375, 946.8447875976562, 672.8115234375, 342.2407531738281, 353.0006103515625, -390.63592529296875, 941.6317749023438, 1459.337158203125, 334.986572265625, -653.5833740234375, 489.10186767578125, 707.169921875, 31.6922607421875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000504.npy"}
{"epoch": 0.7400881057268722, "step": 505, "batch_size": 64, "mean": 214.41128540039062, "std": 434.99273681640625, "min": -775.5117797851562, "p10": -288.8455047607422, "median": 159.9510726928711, "p90": 836.2848266601562, "max": 1307.24560546875, "pos_frac": 0.6875, "sample": [-1.1750564575195312, 512.1576538085938, -298.7203369140625, 541.22265625, -48.46183776855469, 839.6387939453125, 145.95338439941406, -62.05052185058594, 75.1759033203125, 1.2713470458984375, 738.1227416992188, 828.3218994140625, 1307.24560546875, -18.33868408203125, 565.8175659179688, 509.70196533203125, -136.78758239746094, 297.07769775390625, 99.6990966796875, -265.8042297363281, 316.1955871582031, 681.542724609375, 176.42578125, 79.39749145507812, -344.70013427734375, -634.1231079101562, 105.10987854003906, 289.4359436035156, 883.730712890625, 213.32525634765625, -39.45711135864258, 427.8536376953125, 299.124267578125, -218.47348022460938, 211.00796508789062, 655.5506591796875, 173.94876098632812, 696.1834106445312, -74.8032455444336, 475.7401123046875, -658.6441040039062, -775.5117797851562, 64.47196960449219, 310.38922119140625, 14.41888427734375, 75.57476043701172, 1036.230712890625, 514.83984375, -226.53074645996094, 874.07958984375, 47.30314254760742, -187.17706298828125, -418.82965087890625, 895.2139892578125, 196.82786560058594, -72.60917663574219, 830.63330078125, -348.9085693359375, -237.573486328125, 81.75190734863281, 838.7069091796875, 246.29861450195312, 618.06640625, 0.21702957153320312], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000505.npy"}
{"epoch": 0.7415565345080763, "step": 506, "batch_size": 64, "mean": 352.2035217285156, "std": 469.82867431640625, "min": -792.9182739257812, "p10": -148.1012252807617, "median": 347.9792022705078, "p90": 970.2038635253907, "max": 1852.09130859375, "pos_frac": 0.765625, "sample": [-304.53460693359375, 38.424346923828125, 591.3936767578125, -42.29377746582031, 240.4673309326172, 269.7755126953125, -189.45501708984375, 578.2039184570312, 762.9428100585938, 111.749755859375, 595.6881713867188, 258.52191162109375, 270.178955078125, 238.72181701660156, -571.551025390625, 991.6907958984375, 442.72967529296875, -33.11131286621094, 299.1468811035156, 644.9241333007812, 497.200439453125, 1200.946044921875, 394.5499267578125, 125.81041717529297, 1852.09130859375, -158.63687133789062, -112.47417449951172, 481.04840087890625, 675.9446411132812, 393.199462890625, 423.45465087890625, 1212.212158203125, 381.4508972167969, 103.94200134277344, 526.740478515625, -647.6908569335938, 363.3083801269531, 199.26837158203125, 828.487548828125, 833.8177490234375, 523.3883056640625, 599.0459594726562, -123.51805114746094, 430.35650634765625, 35.489234924316406, -792.9182739257812, 270.94244384765625, -70.78971099853516, 1038.12353515625, 855.7862548828125, 1005.4608154296875, -3.784404754638672, 332.6500244140625, 790.4920043945312, 690.56103515625, 980.9035034179688, 61.92408752441406, 945.238037109375, 178.1827850341797, -45.42162322998047, -276.1556396484375, 17.973114013671875, 423.7380676269531, -94.92804718017578], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000506.npy"}
{"epoch": 0.7430249632892805, "step": 507, "batch_size": 64, "mean": 302.70806884765625, "std": 562.3052978515625, "min": -989.382080078125, "p10": -267.3841278076171, "median": 248.9258575439453, "p90": 775.1598754882813, "max": 2246.781494140625, "pos_frac": 0.71875, "sample": [17.408742904663086, 294.17236328125, -31.460769653320312, -989.382080078125, 1060.69189453125, -50.9324951171875, 23.860885620117188, 405.189453125, -58.522789001464844, 139.35736083984375, -47.83277893066406, 381.3744812011719, 51.34870910644531, 427.08905029296875, 686.283935546875, 774.7295532226562, -413.5023193359375, 311.54901123046875, 95.13557434082031, 1999.7005615234375, 385.66259765625, 661.12744140625, 52.07209396362305, -172.59616088867188, 420.7063903808594, 363.86029052734375, 676.3533935546875, 924.9605102539062, -350.5700988769531, 368.181640625, 234.04893493652344, -89.16049194335938, -177.97406005859375, 3.6750621795654297, 34.095252990722656, 519.19140625, 534.5126342773438, 1361.6976318359375, 619.3086547851562, 65.46715545654297, 261.11395263671875, 19.00982666015625, -28.572906494140625, 259.1058654785156, 2246.781494140625, 773.5068359375, 179.32281494140625, 273.08197021484375, -418.7509460449219, 746.7290649414062, 742.5203857421875, -477.14453125, -40.7403564453125, 737.7184448242188, 1710.95068359375, 566.5866088867188, 775.3442993164062, 426.80230712890625, 238.745849609375, -1.6892337799072266, 29.89916229248047, -149.51754760742188, -305.7027282714844, -702.6629638671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000507.npy"}
{"epoch": 0.7444933920704846, "step": 508, "batch_size": 64, "mean": 262.43280029296875, "std": 551.2412719726562, "min": -1363.7071533203125, "p10": -250.86580505371094, "median": 269.92820739746094, "p90": 837.484033203125, "max": 1975.4793701171875, "pos_frac": 0.703125, "sample": [173.06631469726562, 150.92742919921875, -674.169677734375, 693.4511108398438, 488.96636962890625, -10.61346435546875, 95.05789184570312, 1847.25732421875, 406.8850402832031, 640.1813354492188, 300.9523620605469, 884.326904296875, 271.2112731933594, 1975.4793701171875, 467.91839599609375, 370.38427734375, 201.14352416992188, -503.80364990234375, 1137.280517578125, 827.7598266601562, -447.35162353515625, -84.60997009277344, 268.6451416015625, -890.9617309570312, 967.694091796875, -140.56942749023438, 119.69586181640625, -131.56210327148438, -121.15902709960938, 312.06219482421875, 3.8572845458984375, 333.1561584472656, 24.396377563476562, -28.950157165527344, 1491.896240234375, -1363.7071533203125, 122.8452377319336, 136.99749755859375, 149.62210083007812, 589.196044921875, 306.6947937011719, 342.14959716796875, 104.69992065429688, 461.9134216308594, 537.38232421875, 319.6229248046875, 524.0292358398438, 484.8043212890625, 718.8418579101562, 548.1976318359375, 574.824462890625, -162.82339477539062, -244.20106506347656, 841.6515502929688, 584.357177734375, -138.3453369140625, -106.93206787109375, -39.62982177734375, -253.7221221923828, 590.7106323242188, -394.924072265625, 312.666748046875, 65.2244644165039, -236.34829711914062], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000508.npy"}
{"epoch": 0.7459618208516887, "step": 509, "batch_size": 64, "mean": 281.92559814453125, "std": 430.9751281738281, "min": -575.4947509765625, "p10": -256.07958374023434, "median": 232.0040740966797, "p90": 861.7127197265627, "max": 1727.628662109375, "pos_frac": 0.78125, "sample": [-19.715490341186523, -70.9505615234375, -575.4947509765625, 810.4561767578125, -153.09124755859375, 498.7107238769531, 48.08100128173828, 141.2354736328125, 496.00103759765625, 269.03302001953125, 649.4215698242188, 181.69793701171875, 281.81097412109375, 167.72940063476562, 199.26454162597656, 205.73855590820312, 883.6798095703125, -502.15045166015625, -370.86981201171875, 392.2344970703125, 961.1533813476562, 443.0311584472656, 146.84852600097656, 257.34112548828125, 376.26995849609375, 635.8115234375, 137.13058471679688, 228.19427490234375, 326.13568115234375, 402.4109802246094, 576.5161743164062, -366.0065002441406, 86.89366149902344, -156.88978576660156, 179.67984008789062, 936.2059936523438, 627.5233764648438, 55.58180236816406, 226.56195068359375, -264.72723388671875, -17.782257080078125, 385.22235107421875, 322.75457763671875, 128.31600952148438, 46.432098388671875, -544.5282592773438, 1108.5098876953125, 1237.138427734375, -78.16401672363281, 1727.628662109375, -235.9017333984375, 156.20016479492188, -300.6087341308594, 120.33554077148438, 276.940185546875, 403.481689453125, 104.3962631225586, 235.81387329101562, 242.75021362304688, 470.3746337890625, 747.0177612304688, 1010.82373046875, 410.70867919921875, 736.8898315429688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000509.npy"}
{"epoch": 0.7474302496328928, "step": 510, "batch_size": 64, "mean": 362.33612060546875, "std": 383.67034912109375, "min": -539.921875, "p10": -104.11907730102538, "median": 383.96046447753906, "p90": 730.0242309570312, "max": 1213.603515625, "pos_frac": 0.8125, "sample": [229.49319458007812, 695.7202758789062, 1109.7816162109375, 434.4525146484375, -100.43679809570312, 602.4619140625, 720.1112670898438, 349.88140869140625, 73.72288513183594, 663.591064453125, 373.1282653808594, 520.5281372070312, -284.6249084472656, 430.6485595703125, 1050.6094970703125, 734.2726440429688, 607.01025390625, -539.921875, -79.9581527709961, 719.4562377929688, 232.52761840820312, 394.79266357421875, 444.9130859375, 156.83059692382812, 292.3305358886719, 655.5536499023438, 528.0202026367188, 871.8140869140625, 989.7518920898438, -253.4647216796875, 715.6683349609375, 1213.603515625, -24.13284683227539, 170.723388671875, 588.6351928710938, 28.638954162597656, 405.6371154785156, 1187.9583740234375, -239.7411346435547, 314.52880859375, 193.5282745361328, 47.817909240722656, 659.649169921875, 290.15869140625, 471.9617919921875, 24.462303161621094, 494.26641845703125, 371.2448425292969, 59.82157897949219, 467.74298095703125, 712.00732421875, -105.69719696044922, 207.3955078125, 683.4276123046875, -57.54841613769531, 503.82415771484375, -239.7444305419922, 707.173095703125, -73.95137786865234, -202.07781982421875, 0.7851657867431641, 642.4960327148438, 294.4023132324219, 51.84847640991211], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000510.npy"}
{"epoch": 0.748898678414097, "step": 511, "batch_size": 64, "mean": 295.53057861328125, "std": 536.4003295898438, "min": -1321.3931884765625, "p10": -174.18253860473627, "median": 212.3377685546875, "p90": 959.8036682128911, "max": 1825.218505859375, "pos_frac": 0.78125, "sample": [-117.19676971435547, 1187.671142578125, 203.80709838867188, 142.72901916503906, 285.04205322265625, 131.42649841308594, 1011.7539672851562, 1271.394775390625, 481.37933349609375, 145.62020874023438, 56.67810821533203, 581.4825439453125, 676.56201171875, 680.651123046875, 96.04972839355469, 1273.755126953125, -1321.3931884765625, 408.81866455078125, -324.0357666015625, 645.924560546875, 1346.113037109375, 759.6442260742188, 46.00648498535156, 452.3228759765625, 220.86843872070312, 72.71390533447266, -726.1177368164062, 318.56787109375, -113.36795043945312, 479.90869140625, 487.1845703125, -39.03300476074219, -22.620710372924805, 324.83367919921875, 294.9953918457031, 184.00889587402344, 838.5863037109375, -198.60501098632812, 18.748046875, 178.75582885742188, 750.8584594726562, 161.76480102539062, 47.71687698364258, -340.5770568847656, -60.358726501464844, 710.3229370117188, 712.2642822265625, 89.94334411621094, -11.407384872436523, 69.80702209472656, -228.91323852539062, 471.4449157714844, 438.2286376953125, 51.822227478027344, 238.05535888671875, 373.5233154296875, 260.48236083984375, -13.376701354980469, 1825.218505859375, 93.54669189453125, 429.2083740234375, -1175.514892578125, 151.87261962890625, 1426.39111328125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000511.npy"}
{"epoch": 0.750367107195301, "step": 512, "batch_size": 64, "mean": 390.9322509765625, "std": 405.1795654296875, "min": -400.81072998046875, "p10": -66.70032424926754, "median": 344.9140625, "p90": 905.6121826171875, "max": 1707.489501953125, "pos_frac": 0.859375, "sample": [951.2406005859375, 897.1373291015625, 816.9422607421875, -366.1279296875, -86.88133239746094, 23.038299560546875, 1707.489501953125, 254.7554931640625, 53.71900939941406, -19.61130714416504, 136.1072540283203, 614.882568359375, 4.045909881591797, 400.2654113769531, -241.14663696289062, 3.0607261657714844, 506.84619140625, 505.0789489746094, 327.98004150390625, -4.24431037902832, -208.78579711914062, 638.06787109375, 137.26461791992188, 529.1365966796875, 1120.23681640625, 700.6918334960938, 239.7434539794922, 383.00518798828125, 23.08483123779297, 705.9805908203125, 253.10906982421875, 292.8453674316406, 476.7975769042969, 458.38360595703125, 227.05140686035156, 162.00428771972656, 1034.373291015625, 303.1245422363281, -107.5511245727539, 817.4728393554688, 180.00112915039062, 909.2442626953125, 398.4762878417969, 965.228759765625, 28.798324584960938, -400.81072998046875, 336.71063232421875, 90.27445983886719, 212.73887634277344, 458.31951904296875, 1047.3087158203125, 14.935371398925781, 380.0691833496094, 577.5143432617188, 318.5189514160156, 637.174072265625, 698.1572265625, 219.76162719726562, 353.11749267578125, 834.36083984375, -259.9419860839844, 869.3218994140625, 742.244873046875, 737.5244140625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000512.npy"}
{"epoch": 0.7518355359765051, "step": 513, "batch_size": 64, "mean": 280.02032470703125, "std": 438.0154724121094, "min": -621.0064697265625, "p10": -199.51132049560542, "median": 214.15422821044922, "p90": 976.2395812988283, "max": 1218.1151123046875, "pos_frac": 0.734375, "sample": [651.65576171875, 270.67498779296875, 328.17413330078125, 140.13314819335938, 349.0105285644531, 245.4508514404297, 180.4219512939453, 185.9022674560547, -220.21238708496094, 997.9019775390625, 571.2889404296875, -133.28070068359375, 581.1395874023438, 62.64910125732422, 100.0511703491211, 728.9249267578125, 38.14971923828125, -47.32098388671875, 198.0951690673828, 249.80010986328125, 804.840576171875, 852.2437133789062, 1159.6986083984375, 310.82958984375, 603.340087890625, 72.90097045898438, 303.8919677734375, 82.91766357421875, 368.3272705078125, -151.20883178710938, 134.78219604492188, 908.2786254882812, 504.3802490234375, 406.90057373046875, 230.52557373046875, -54.288970947265625, -58.52952575683594, -559.1561889648438, 401.89306640625, 1218.1151123046875, -74.63453674316406, 107.68172454833984, 124.77783203125, -621.0064697265625, 584.3607177734375, 6.59735107421875, -345.6969299316406, -46.424964904785156, 1024.743896484375, 929.1007080078125, -468.6199951171875, -1.019500732421875, 1012.162841796875, 1160.638671875, 157.82373046875, 230.21328735351562, -367.44635009765625, 621.532470703125, -97.6285171508789, -129.1721649169922, -414.2811279296875, 996.4419555664062, 167.29632568359375, 344.56671142578125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000513.npy"}
{"epoch": 0.7533039647577092, "step": 514, "batch_size": 64, "mean": 287.24462890625, "std": 481.3392333984375, "min": -705.1365966796875, "p10": -226.6568145751953, "median": 188.81744384765625, "p90": 883.5353149414063, "max": 1885.4267578125, "pos_frac": 0.703125, "sample": [-541.5341186523438, -110.28141021728516, 151.26393127441406, -67.43258666992188, 1063.736328125, 203.05258178710938, 590.3359375, 654.5636596679688, 1885.4267578125, 892.7559204101562, 130.8514862060547, 576.0697631835938, -218.94000244140625, -113.40960693359375, 422.54644775390625, -104.73698425292969, -242.09349060058594, 168.90586853027344, 1077.57080078125, 1036.6424560546875, -215.11483764648438, -67.76679229736328, 303.2845153808594, -122.10567474365234, 35.12035369873047, 412.8099060058594, 301.688232421875, 173.1103515625, 461.28509521484375, -81.49811553955078, 862.0205688476562, 490.56756591796875, -115.3254165649414, 1.99322509765625, 175.07492065429688, 333.31292724609375, 1180.5404052734375, -152.97909545898438, 105.57006072998047, -229.96401977539062, 309.03363037109375, -431.72479248046875, -146.99285888671875, 588.7481079101562, 202.55996704101562, 91.07542419433594, -705.1365966796875, -277.22540283203125, -258.2764892578125, 846.501708984375, 292.9971008300781, 478.7598876953125, 390.672607421875, 720.4254150390625, 790.8275756835938, 118.98277282714844, 294.1307373046875, 597.02685546875, 695.813720703125, 138.7623748779297, 710.9043579101562, 44.05595779418945, 132.97384643554688, 1451.842529296875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000514.npy"}
{"epoch": 0.7547723935389133, "step": 515, "batch_size": 64, "mean": 237.156005859375, "std": 420.7529296875, "min": -1494.1552734375, "p10": -206.41293334960935, "median": 226.95526123046875, "p90": 718.783062744141, "max": 1418.6314697265625, "pos_frac": 0.78125, "sample": [-68.31083679199219, -30.915786743164062, 497.6582336425781, 773.2747802734375, 500.82977294921875, -1494.1552734375, 487.9020080566406, 378.71746826171875, 96.51336669921875, -419.8072509765625, -246.6669464111328, 27.354736328125, 4.69659423828125, 198.8232421875, 283.2499084472656, 328.1056823730469, 146.3828582763672, 587.7171630859375, -259.53955078125, 132.57733154296875, 332.8492736816406, -26.075727462768555, 148.1584930419922, 345.312255859375, 233.9285430908203, 219.3835906982422, 591.2044677734375, -190.62149047851562, 527.8711547851562, 32.35504913330078, 813.0029907226562, 28.51128578186035, -182.48809814453125, 605.27783203125, 376.34100341796875, 372.44451904296875, 584.48974609375, 10.768867492675781, 1170.17822265625, -213.18069458007812, 155.73980712890625, 936.4784545898438, -309.91278076171875, -31.566429138183594, -370.95721435546875, 223.40805053710938, 101.63072967529297, 767.4281616210938, 31.287799835205078, 107.139892578125, 100.1111068725586, 1010.3486328125, 230.50247192382812, 168.90875244140625, 373.6871643066406, 544.7725830078125, 450.9876403808594, -128.80136108398438, 347.89080810546875, 396.7718505859375, 312.5886535644531, 247.59762573242188, 1418.6314697265625, 389.192138671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000515.npy"}
{"epoch": 0.7562408223201175, "step": 516, "batch_size": 64, "mean": 330.2271728515625, "std": 399.8164367675781, "min": -875.6845092773438, "p10": -182.24660186767574, "median": 356.76112365722656, "p90": 841.9170227050781, "max": 1077.236083984375, "pos_frac": 0.78125, "sample": [191.85009765625, 234.60903930664062, 682.252197265625, -131.6246795654297, 544.95703125, 703.3990478515625, 494.9164733886719, -121.39390563964844, 260.82073974609375, 517.9632568359375, 1037.005615234375, 471.86492919921875, 511.27850341796875, 445.98590087890625, 150.86619567871094, -118.29556274414062, 624.3480224609375, 182.8336639404297, 124.56341552734375, 1077.236083984375, -875.6845092773438, 101.24847412109375, -203.94171142578125, -385.04046630859375, 886.7071533203125, 558.5592041015625, -433.5992736816406, 735.9066772460938, 1055.1085205078125, 224.84739685058594, 289.4283752441406, 841.9453125, 917.8335571289062, 295.630859375, 531.3504028320312, 563.9072265625, 281.96759033203125, 40.3650016784668, 464.31390380859375, 335.21368408203125, 174.24244689941406, -41.078399658203125, 841.8510131835938, 321.8377990722656, -8.20962142944336, 998.2974853515625, 155.42396545410156, 178.91448974609375, -17.589576721191406, 461.51824951171875, 378.3085632324219, 501.9366455078125, 85.34725952148438, 573.7968139648438, 619.7236938476562, -276.8953857421875, -247.77134704589844, -399.629150390625, 519.85791015625, 578.4074096679688, 475.7828369140625, 810.0506591796875, 386.29486083984375, -47.38393020629883], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000516.npy"}
{"epoch": 0.7577092511013216, "step": 517, "batch_size": 64, "mean": 391.58160400390625, "std": 499.5638427734375, "min": -565.4898071289062, "p10": -160.88776702880858, "median": 355.7206726074219, "p90": 911.9330627441408, "max": 2083.670166015625, "pos_frac": 0.78125, "sample": [598.6801147460938, 699.0245971679688, 202.36639404296875, 318.9443664550781, 821.5503540039062, 168.93496704101562, -433.58221435546875, 787.3334350585938, 479.50946044921875, -225.6434783935547, 479.7953796386719, 1834.311767578125, 1147.681884765625, 427.21649169921875, 598.0042724609375, -143.47491455078125, 244.79531860351562, 524.1290283203125, 184.9220428466797, 761.7267456054688, 1327.047119140625, 644.0609130859375, 12.470438003540039, 302.9140319824219, -565.4898071289062, -287.1295166015625, 116.58480072021484, 341.0426330566406, -295.1029968261719, 2083.670166015625, 431.2562255859375, -38.56758117675781, -32.61555480957031, 862.1670532226562, 89.585693359375, 496.90374755859375, -492.57080078125, 443.7926940917969, -62.517608642578125, 352.5971374511719, 784.955810546875, -59.142723083496094, 380.925048828125, 375.7120666503906, 544.6221923828125, 548.2264404296875, 686.0469970703125, 358.8442077636719, 38.09833526611328, 1362.9530029296875, 125.3885726928711, 1062.1014404296875, 344.0481872558594, 933.2613525390625, 142.4156494140625, 688.35595703125, -168.3504180908203, 222.96273803710938, 227.6245880126953, 409.4398193359375, -125.13409423828125, 796.306396484375, 241.00875854492188, -65.7742691040039], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000517.npy"}
{"epoch": 0.7591776798825257, "step": 518, "batch_size": 64, "mean": 331.1033935546875, "std": 421.5318603515625, "min": -799.9049682617188, "p10": -174.70106658935546, "median": 270.9982147216797, "p90": 875.8142944335938, "max": 1199.697998046875, "pos_frac": 0.796875, "sample": [59.759334564208984, 1062.195556640625, 532.7943725585938, 288.72076416015625, 200.3566131591797, 277.08795166015625, 530.6265258789062, 604.6893310546875, 106.15773010253906, 303.5815734863281, 808.14990234375, 605.7908325195312, 135.2833251953125, -167.7715301513672, 528.63916015625, 256.4307861328125, 371.7486877441406, 209.6607666015625, 61.986175537109375, 559.6939086914062, 994.3416748046875, -32.331825256347656, -233.90347290039062, 134.11849975585938, -14.356266021728516, 311.1279602050781, 808.8641357421875, 33.9649658203125, 736.2066040039062, 877.2409057617188, 793.4669189453125, 95.21879577636719, 1199.697998046875, 1193.0413818359375, 796.2052001953125, 714.313232421875, 146.99520874023438, -100.58767700195312, 468.5995178222656, 612.9156494140625, 219.0763397216797, -94.38302612304688, 527.4020385742188, -177.67086791992188, 469.53619384765625, -412.799560546875, -406.79119873046875, -799.9049682617188, 377.1210632324219, 447.2566833496094, 29.991256713867188, 179.1116485595703, -204.0029754638672, 195.1973876953125, -381.37567138671875, 1146.53857421875, 988.9368896484375, 195.11404418945312, -47.31454849243164, 872.4855346679688, 264.9084777832031, 133.3909149169922, 554.18408203125, 243.8887176513672], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000518.npy"}
{"epoch": 0.7606461086637298, "step": 519, "batch_size": 64, "mean": 227.3538360595703, "std": 439.9578857421875, "min": -948.2279052734375, "p10": -295.05587158203116, "median": 250.18697357177734, "p90": 831.8871520996097, "max": 1248.362060546875, "pos_frac": 0.703125, "sample": [-33.226226806640625, 526.0648193359375, 341.7094421386719, -460.6985778808594, 956.7820434570312, -219.88333129882812, 105.26921081542969, 517.6006469726562, 950.0043334960938, 537.4805297851562, 147.75450134277344, 584.3638916015625, -175.33319091796875, 915.2224731445312, 342.751220703125, -869.7635498046875, 1248.362060546875, 594.6959228515625, 32.02684783935547, -38.19850158691406, 285.65814208984375, -84.78285217285156, 50.346336364746094, 321.895263671875, 393.67742919921875, 203.58505249023438, 277.5669250488281, 565.5923461914062, 506.10662841796875, 3.167379379272461, -30.223907470703125, 637.9962768554688, -151.49490356445312, -114.3505859375, 351.80670166015625, 80.77579498291016, 473.759033203125, -95.80179595947266, -383.74603271484375, -10.733718872070312, 390.0567626953125, -327.2726745605469, 936.6384887695312, 409.1878662109375, -948.2279052734375, 592.2644653320312, 1204.1282958984375, 30.145973205566406, 10.71514892578125, 21.905563354492188, 175.63462829589844, 870.7359619140625, -394.1512145996094, 582.784423828125, -197.50360107421875, 253.08917236328125, 741.2399291992188, 17.94906997680664, 373.5617980957031, 469.4179382324219, 247.28477478027344, -52.61985397338867, 290.0935974121094, -430.19635009765625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000519.npy"}
{"epoch": 0.762114537444934, "step": 520, "batch_size": 64, "mean": 306.2880859375, "std": 470.9873352050781, "min": -952.8714599609375, "p10": -157.3209014892578, "median": 212.07371520996094, "p90": 911.9131469726564, "max": 1972.7210693359375, "pos_frac": 0.71875, "sample": [-128.24038696289062, -240.759765625, 995.15869140625, 140.11289978027344, 474.4096984863281, 156.82492065429688, 420.5893249511719, -56.94190979003906, -160.9459228515625, 191.70587158203125, -8.031723022460938, -56.202301025390625, 716.758544921875, -105.81697082519531, -58.77344512939453, 544.5411376953125, 1138.3870849609375, 1972.7210693359375, -228.80416870117188, 380.5254821777344, 1116.420654296875, 1338.673095703125, -423.712890625, 212.20913696289062, -57.454437255859375, 740.5870361328125, 121.79468536376953, 561.4992065429688, 458.119873046875, 211.96043395996094, 232.45880126953125, 212.1377410888672, 1214.4224853515625, 164.8556365966797, 69.23153686523438, 463.82861328125, 519.359130859375, 433.9691467285156, 850.1143798828125, -952.8714599609375, 496.1725158691406, 121.54108428955078, 212.0096893310547, 297.66290283203125, 20.33062744140625, 886.3817749023438, 93.50084686279297, 817.2913818359375, 446.83331298828125, 205.38934326171875, 190.9735565185547, -252.96945190429688, -148.86251831054688, 315.93023681640625, 248.32308959960938, 592.8861083984375, -145.708984375, 922.8551635742188, -192.91893005371094, 543.885498046875, 360.60406494140625, -109.96507263183594, -29.767059326171875, 135.23760986328125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000520.npy"}
{"epoch": 0.7635829662261381, "step": 521, "batch_size": 64, "mean": 340.52056884765625, "std": 455.440673828125, "min": -772.069580078125, "p10": -110.8704360961914, "median": 260.8106689453125, "p90": 971.2644470214846, "max": 1673.037109375, "pos_frac": 0.734375, "sample": [442.15423583984375, 754.342041015625, 266.5542907714844, 25.457176208496094, -57.635169982910156, -772.069580078125, -154.339599609375, -18.420211791992188, -112.48101806640625, 496.491943359375, 41.047760009765625, -161.70721435546875, 16.418132781982422, 654.7709350585938, 510.5802917480469, 1218.7547607421875, 84.49102783203125, -5.252407073974609, -287.03863525390625, 984.8367919921875, 232.81138610839844, -107.11241149902344, -98.81090545654297, 329.56451416015625, 543.709228515625, 629.894775390625, 618.8824462890625, 163.64877319335938, 17.118812561035156, -67.833984375, 378.3292236328125, 487.0262145996094, -11.565773010253906, 1039.436767578125, 205.29037475585938, 606.0306396484375, 525.389892578125, 210.29696655273438, 548.81689453125, -142.16458129882812, 255.06704711914062, 504.47552490234375, 313.1474609375, 23.018360137939453, 204.610107421875, 1091.42529296875, -106.67390441894531, 1095.222412109375, -6.98614501953125, -438.9126892089844, 543.21826171875, 404.00054931640625, 750.1151123046875, 34.33299255371094, 690.8031616210938, 939.5956420898438, 168.0541229248047, 1449.202880859375, -6.318000793457031, 1673.037109375, 450.7460021972656, 92.15715026855469, 804.2100219726562, 830.051513671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000521.npy"}
{"epoch": 0.7650513950073421, "step": 522, "batch_size": 64, "mean": 308.4856262207031, "std": 478.03192138671875, "min": -866.1365356445312, "p10": -150.42037811279295, "median": 245.91671752929688, "p90": 1069.8461669921876, "max": 1265.844970703125, "pos_frac": 0.796875, "sample": [259.832763671875, 743.0653686523438, 838.9419555664062, -37.609519958496094, 1089.205322265625, 125.7315673828125, 1265.844970703125, 208.54067993164062, 45.83145523071289, -33.77302932739258, -155.22482299804688, 348.4514465332031, 706.7775268554688, 39.65373229980469, 481.56640625, -56.393829345703125, 298.0900573730469, -86.05482482910156, 471.58062744140625, 232.00067138671875, 649.2197875976562, 661.4179077148438, 129.10723876953125, 280.0526123046875, -661.6319580078125, 1136.868896484375, 312.85028076171875, 160.63021850585938, 501.96441650390625, 541.9851684570312, 320.607666015625, 107.85520935058594, 102.12751770019531, 45.39385986328125, 274.94512939453125, -866.1365356445312, 550.8785400390625, 181.07611083984375, 230.34765625, -316.0877685546875, -139.2100067138672, 448.694091796875, 1155.462890625, 116.00016021728516, -426.2166748046875, 479.05596923828125, 1024.6748046875, 347.4566650390625, -739.1486206054688, 1181.1707763671875, 966.4151000976562, 36.073150634765625, 1232.0653076171875, 126.35443878173828, 165.80844116210938, -441.9017333984375, 19.442359924316406, -100.36617279052734, 591.4929809570312, 288.86138916015625, 955.4812622070312, 84.14266967773438, 1154.7750244140625, 86.96461486816406], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000522.npy"}
{"epoch": 0.7665198237885462, "step": 523, "batch_size": 64, "mean": 322.1591491699219, "std": 548.8428955078125, "min": -977.9614868164062, "p10": -303.5126281738281, "median": 273.08985137939453, "p90": 1034.511413574219, "max": 1704.0853271484375, "pos_frac": 0.71875, "sample": [-142.8942108154297, 1187.49169921875, 114.59028625488281, 239.2712860107422, 396.51104736328125, -281.47625732421875, 426.75396728515625, 512.7554931640625, 931.7463989257812, 392.32342529296875, -823.661376953125, 745.9254150390625, 2.270387649536133, -26.091094970703125, -461.21734619140625, 152.1278533935547, 113.8641357421875, 802.0096435546875, -433.2277526855469, 202.70025634765625, 645.0779418945312, 966.2105712890625, -153.41436767578125, 24.53032112121582, 508.4776611328125, 113.68342590332031, 309.0251159667969, 89.59213256835938, -66.67003631591797, 1481.8807373046875, 544.3818969726562, 124.08125305175781, -81.85455322265625, -977.9614868164062, 1063.783203125, 902.1887817382812, -434.1670837402344, -40.19123840332031, 466.9425048828125, -312.956787109375, 698.73779296875, -105.71255493164062, 1704.0853271484375, 307.9327087402344, 851.2068481445312, -37.015716552734375, 306.9084167480469, 214.2764892578125, 1581.83251953125, 445.83343505859375, 554.918212890625, 94.58294677734375, 471.69586181640625, 106.59380340576172, 719.005859375, 1113.3712158203125, 782.7110595703125, -78.36747741699219, 585.0445556640625, 1241.2462158203125, 570.4356689453125, -692.2857666015625, 26.31188201904297, -69.5765380859375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000523.npy"}
{"epoch": 0.7679882525697503, "step": 524, "batch_size": 64, "mean": 345.7283020019531, "std": 517.4198608398438, "min": -1549.5469970703125, "p10": -103.3979393005371, "median": 409.9544219970703, "p90": 991.6176330566408, "max": 1645.3955078125, "pos_frac": 0.765625, "sample": [33.35932922363281, 1026.4205322265625, 405.50201416015625, 480.24884033203125, 434.7005310058594, 193.4796142578125, 494.9202880859375, 38.54401397705078, 572.5255126953125, 356.9705810546875, 448.03961181640625, 172.05990600585938, 409.5263977050781, 565.54931640625, 624.42578125, 384.3230285644531, -9.223403930664062, 1016.9912719726562, 306.6348876953125, 573.06494140625, -1549.5469970703125, 418.10723876953125, 468.46417236328125, 1645.3955078125, 0.8164215087890625, -28.14947509765625, 925.7449951171875, 402.502685546875, 546.255126953125, -16.07040786743164, 21.85976791381836, 410.3824462890625, 511.3890380859375, -193.77500915527344, 778.5882568359375, 335.1840515136719, 1297.607177734375, 1491.569091796875, 622.2393798828125, 441.7298583984375, -51.65386199951172, -906.2498779296875, 38.31300354003906, -106.08666229248047, 691.952880859375, -83.78317260742188, 422.1448974609375, 7.919166564941406, 407.6778564453125, 477.7872619628906, 1009.0833129882812, 1025.6934814453125, 552.6146240234375, -904.92822265625, -36.853782653808594, -97.12425231933594, 950.8643798828125, 604.8663330078125, -53.408409118652344, 452.3249206542969, 617.6092529296875, 404.11871337890625, -247.26995849609375, -107.35763549804688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000524.npy"}
{"epoch": 0.7694566813509545, "step": 525, "batch_size": 64, "mean": 439.2078857421875, "std": 543.117431640625, "min": -711.7267456054688, "p10": -117.49062042236324, "median": 324.84861755371094, "p90": 1190.0038574218752, "max": 1803.668701171875, "pos_frac": 0.796875, "sample": [429.10546875, 1148.1639404296875, 462.3282470703125, 179.26962280273438, 87.67369842529297, 397.3432922363281, 1803.668701171875, 773.46533203125, 378.3827819824219, -198.51119995117188, 1202.1055908203125, -400.9896240234375, 984.8923950195312, 165.8706512451172, 230.78585815429688, -136.7982635498047, 152.49172973632812, 236.69235229492188, 967.966552734375, 657.4429321289062, 551.8303833007812, 63.92184829711914, -61.65631103515625, 98.40982055664062, -170.76788330078125, 337.1212463378906, 541.864990234375, 622.2525634765625, 311.4179992675781, 1161.7664794921875, 1124.0223388671875, -14.163612365722656, 1662.4619140625, 4.743827819824219, 1682.6591796875, 312.57598876953125, 169.255126953125, 245.61080932617188, 435.56158447265625, -17.985198974609375, 1587.5621337890625, -711.7267456054688, -156.58026123046875, -8.084503173828125, 339.6886291503906, 225.08303833007812, 579.85009765625, 75.97067260742188, 589.6488647460938, 176.73379516601562, 680.486083984375, 1364.48095703125, -72.439453125, -40.48382568359375, 705.9297485351562, 339.3317565917969, 299.3365783691406, 58.006736755371094, -428.34100341796875, 937.9661865234375, 231.89248657226562, 1526.3502197265625, 340.0172424316406, 886.3722534179688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000525.npy"}
{"epoch": 0.7709251101321586, "step": 526, "batch_size": 64, "mean": 429.6107177734375, "std": 583.5489501953125, "min": -1164.5211181640625, "p10": -202.60290222167967, "median": 500.8017272949219, "p90": 1270.9987060546875, "max": 1491.04296875, "pos_frac": 0.734375, "sample": [1271.6689453125, 962.03125, 22.091266632080078, 197.6990966796875, 340.07733154296875, 278.3407897949219, -191.02320861816406, -1.5053768157958984, 574.213134765625, -772.717529296875, 593.8379516601562, 756.6517944335938, 714.5958862304688, 243.9642333984375, 1306.1531982421875, 1159.4305419921875, -104.76629638671875, 809.142822265625, 295.1156005859375, 574.894775390625, 417.7978210449219, 536.27099609375, 188.24110412597656, 1176.6807861328125, 534.46240234375, 526.1113891601562, 1491.04296875, -2.1386566162109375, 383.7691650390625, -213.82505798339844, -207.5656280517578, 626.0040283203125, -511.6714172363281, 1367.396728515625, -168.26498413085938, 387.3981628417969, 1054.848876953125, 798.187744140625, 196.21893310546875, -1164.5211181640625, 893.9966430664062, 686.178466796875, 550.6522216796875, 1316.0540771484375, -547.2625122070312, 775.67919921875, 754.911376953125, 1100.615966796875, -106.54280853271484, 509.4664306640625, 13.525604248046875, 237.27256774902344, 316.2768859863281, 1276.03173828125, 862.7835693359375, -132.13333129882812, 1269.434814453125, 612.3475952148438, 492.13702392578125, -57.90251159667969, -182.3975830078125, -76.11852264404297, -829.349609375, 1313.0892333984375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000526.npy"}
{"epoch": 0.7723935389133627, "step": 527, "batch_size": 64, "mean": 299.3970947265625, "std": 439.8367614746094, "min": -640.1596069335938, "p10": -249.33232879638672, "median": 253.49909210205078, "p90": 799.5681030273438, "max": 1281.025390625, "pos_frac": 0.765625, "sample": [-499.403076171875, 543.707763671875, 352.3028259277344, 425.9569396972656, 782.62255859375, 205.27481079101562, 1281.025390625, -104.99677276611328, -301.2315979003906, 756.7921142578125, -119.86317443847656, -152.72708129882812, 609.7001342773438, 302.88671875, -640.1596069335938, -587.0587158203125, 410.9849853515625, -130.97760009765625, 739.7230834960938, 397.87774658203125, -253.0992889404297, 1017.0732421875, 157.61688232421875, 66.51373291015625, 132.48545837402344, 794.9395751953125, -49.57704162597656, 126.2746353149414, 1012.2406005859375, 712.596923828125, 547.7779541015625, 1168.3414306640625, 389.56646728515625, 641.9288330078125, 1163.4166259765625, 1111.835693359375, 801.5517578125, -9.766576766967773, 200.90611267089844, 197.00167846679688, 614.8338012695312, 122.61136627197266, 243.4363555908203, 263.56182861328125, -127.23330688476562, 53.053558349609375, 375.61358642578125, 267.7264099121094, 204.27548217773438, -557.1087036132812, 298.6708679199219, 157.23931884765625, 621.1156616210938, 240.34344482421875, 111.23599243164062, 78.56950378417969, 716.4163818359375, 673.5621337890625, 593.9982299804688, 335.66168212890625, 156.45364379882812, -240.54275512695312, 44.44883728027344, -288.5902099609375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000527.npy"}
{"epoch": 0.7738619676945668, "step": 528, "batch_size": 64, "mean": 349.57666015625, "std": 576.2702026367188, "min": -843.54248046875, "p10": -397.8077453613281, "median": 303.3220520019531, "p90": 1190.654638671875, "max": 1809.103271484375, "pos_frac": 0.703125, "sample": [-16.261024475097656, 1212.881591796875, 309.9949645996094, 353.2858581542969, 143.29949951171875, -418.27716064453125, -109.04389190673828, 678.0946044921875, 613.28759765625, 1171.5794677734375, 366.5994873046875, -86.41258239746094, 396.625, -482.1766662597656, 170.2714385986328, 645.3434448242188, 306.76116943359375, -122.52444458007812, 1228.200439453125, 1809.103271484375, -80.11405181884766, 887.4769287109375, 602.0670776367188, 820.1641235351562, 477.51544189453125, 847.6078491210938, -589.3997192382812, 1405.97900390625, -326.1626281738281, 1025.802734375, -623.0363159179688, 299.8829345703125, 312.3854675292969, 253.02371215820312, 312.3193359375, 154.5302276611328, 544.1671142578125, 171.48683166503906, 484.5271911621094, -17.308677673339844, -97.2665786743164, 904.5547485351562, -120.37643432617188, -843.54248046875, 1460.1441650390625, 245.10324096679688, 432.8533630371094, 1567.638671875, 200.7888946533203, 7.26690673828125, -501.14849853515625, -583.1810913085938, 770.9600219726562, 1198.8297119140625, 162.19021606445312, 831.2213745117188, 590.0387573242188, 194.39837646484375, -350.0457763671875, -13.706680297851562, 94.56314086914062, -105.53303527832031, 1100.046630859375, 93.5633773803711], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000528.npy"}
{"epoch": 0.775330396475771, "step": 529, "batch_size": 64, "mean": 246.39369201660156, "std": 589.3685913085938, "min": -1654.232421875, "p10": -451.2996643066406, "median": 224.37876892089844, "p90": 960.8970520019533, "max": 1565.6239013671875, "pos_frac": 0.65625, "sample": [71.47450256347656, -143.29014587402344, 1565.6239013671875, -12.51824951171875, 168.02743530273438, 701.6688232421875, 428.8721923828125, 792.03271484375, 586.94677734375, 1565.02001953125, -265.4248962402344, 617.3268432617188, 29.911766052246094, -426.454833984375, 596.5389404296875, -153.6333770751953, 134.18215942382812, 87.32161712646484, 843.0192260742188, -26.643287658691406, -389.0469665527344, 256.6824951171875, 1155.282958984375, 893.1177368164062, 1038.687744140625, -673.2489624023438, -51.44495391845703, 241.18630981445312, -511.6439208984375, 724.2025146484375, 237.47146606445312, 446.3934631347656, -173.42269897460938, 310.66497802734375, -62.069488525390625, 35.43876647949219, 657.1795654296875, -12.239879608154297, 615.5493774414062, 790.9508666992188, -1654.232421875, 211.28607177734375, 934.000244140625, -509.662353515625, 602.1900024414062, -993.2454223632812, 156.09518432617188, 417.77703857421875, 972.4242553710938, 32.669960021972656, -760.2454223632812, 398.9053955078125, -61.34717559814453, 536.8128662109375, -461.94744873046875, 522.0897216796875, 133.37869262695312, 979.10986328125, 644.957275390625, 1262.17041015625, -253.95425415039062, -176.6694793701172, -238.89028930664062, 385.8301086425781], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000529.npy"}
{"epoch": 0.7767988252569751, "step": 530, "batch_size": 64, "mean": 396.9098205566406, "std": 697.9013671875, "min": -2079.937255859375, "p10": -277.9533782958984, "median": 238.72816467285156, "p90": 1517.871472167969, "max": 2162.455810546875, "pos_frac": 0.71875, "sample": [262.6783752441406, -326.3063659667969, -10.364952087402344, 209.28152465820312, 277.5069274902344, -281.8120422363281, 1786.37158203125, 573.0352783203125, 369.0068054199219, -635.908447265625, 129.28175354003906, -91.31737518310547, -41.421875, -117.75137329101562, 756.0869140625, 832.0814208984375, -3.0333080291748047, -17.40509033203125, -408.7667236328125, 1557.728271484375, 1832.3770751953125, 650.5313720703125, 966.8251342773438, 731.1828002929688, -105.00993347167969, 799.9825439453125, -79.4869384765625, 231.33615112304688, -268.9498291015625, 571.4217529296875, -6.25416374206543, 1610.7354736328125, 1186.7445068359375, 472.1954345703125, 367.1205749511719, -17.464344024658203, 802.059326171875, 121.6451187133789, -289.1800842285156, -457.27362060546875, 130.29270935058594, 1877.1322021484375, 722.9263916015625, 2162.455810546875, 122.4869384765625, 458.8116149902344, 427.3262023925781, 246.12017822265625, 1486.476318359375, 146.01022338867188, 345.8042297363281, 19.98712730407715, 837.2813110351562, -2079.937255859375, 1289.3387451171875, 74.73268127441406, 33.439300537109375, 1531.3265380859375, 556.55712890625, 225.5174560546875, 78.44642639160156, 93.99871826171875, 486.5889892578125, 189.60031127929688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000530.npy"}
{"epoch": 0.7782672540381792, "step": 531, "batch_size": 64, "mean": 352.80865478515625, "std": 542.1387329101562, "min": -1078.8956298828125, "p10": -255.38365325927734, "median": 277.51885986328125, "p90": 1010.3387756347657, "max": 1815.1395263671875, "pos_frac": 0.796875, "sample": [560.370361328125, 136.18699645996094, 685.3131103515625, 1097.797607421875, 981.0978393554688, -442.90081787109375, 317.5514831542969, 227.87838745117188, 1632.89794921875, -77.01296997070312, -242.7849884033203, 459.2234191894531, 6.269086837768555, 377.0744934082031, 270.1279296875, 283.74560546875, 424.08746337890625, 271.2921142578125, -1078.8956298828125, 372.2950439453125, 993.9588012695312, 761.1748657226562, 424.01434326171875, 81.09571838378906, 773.3505859375, 220.25961303710938, 475.0806579589844, 235.3268585205078, 424.2779235839844, -54.265342712402344, 1211.883544921875, -283.64227294921875, 675.5036010742188, 688.98681640625, 101.14241027832031, 316.40997314453125, -26.42261505126953, -548.19580078125, -15.683059692382812, 145.52532958984375, 1815.1395263671875, 377.5906982421875, 104.18819427490234, 3.9243927001953125, 1814.0045166015625, -423.1442565917969, 540.6343994140625, 1342.6373291015625, 265.2623291015625, 882.7667236328125, 1017.3587646484375, 79.34554290771484, 623.2630615234375, -240.72567749023438, 183.68710327148438, 145.82907104492188, 566.609375, 198.4387969970703, 389.0210876464844, 32.55659484863281, 226.75723266601562, 668.4097290039062, -634.4132080078125, -260.7830810546875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000531.npy"}
{"epoch": 0.7797356828193832, "step": 532, "batch_size": 64, "mean": 355.6239318847656, "std": 637.0980224609375, "min": -1189.588134765625, "p10": -444.8611694335938, "median": 292.1789093017578, "p90": 1120.4902709960938, "max": 1884.9617919921875, "pos_frac": 0.78125, "sample": [1130.169189453125, 465.703857421875, 1591.118896484375, -315.6504211425781, 1336.7796630859375, 225.64083862304688, 745.4224853515625, 788.26171875, -1189.588134765625, 321.3642883300781, -340.1661376953125, 1387.487060546875, 511.0514221191406, 735.123779296875, -445.8265380859375, 792.450439453125, 555.33935546875, -894.5028076171875, 135.71324157714844, 331.42327880859375, 384.7380065917969, 1567.257080078125, 1884.9617919921875, 907.348876953125, 623.8621826171875, 731.2010498046875, -143.6626739501953, 958.7186279296875, 1097.9061279296875, 197.70562744140625, 4.760307312011719, 373.9903869628906, -146.18902587890625, 972.709716796875, 855.9609375, 134.79806518554688, 202.69143676757812, 957.9998779296875, 45.307456970214844, 285.8203430175781, 78.18480682373047, 752.941162109375, 190.4234619140625, 568.106689453125, 234.51344299316406, -62.28192901611328, 59.82660675048828, 734.5425415039062, 144.39334106445312, 27.72704315185547, -563.1094970703125, -712.5575561523438, 284.0218505859375, 1609.5963134765625, 291.1160888671875, -112.0906982421875, 293.2417297363281, -1002.71337890625, 64.97808837890625, 226.5406494140625, -442.608642578125, 698.4180908203125, -668.4871215820312, 300.00775146484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000532.npy"}
{"epoch": 0.7812041116005873, "step": 533, "batch_size": 64, "mean": 255.36468505859375, "std": 560.594970703125, "min": -1454.89990234375, "p10": -310.57067871093744, "median": 231.76821899414062, "p90": 1038.1937438964844, "max": 2087.696044921875, "pos_frac": 0.671875, "sample": [591.7857666015625, -383.72283935546875, 793.2392578125, -473.7685241699219, 319.531982421875, 197.37313842773438, 1169.455322265625, 574.9642333984375, 328.1201171875, 433.89111328125, 727.0784912109375, 606.6719970703125, 605.3203125, 613.486083984375, -176.86038208007812, 446.0144958496094, 1014.9310913085938, 197.57620239257812, 266.4815673828125, -878.8497314453125, 1265.1953125, 241.145263671875, -142.7041015625, 590.7625732421875, 24.750965118408203, 572.7527465820312, -39.806312561035156, 173.06651306152344, 222.39117431640625, 2087.696044921875, 422.18475341796875, 410.9798278808594, -66.7708740234375, -42.666465759277344, 33.79580307006836, -1454.89990234375, -326.1321716308594, 414.4264221191406, -239.99066162109375, 379.4006042480469, 148.1890106201172, 100.66584777832031, 1178.6866455078125, 1071.209716796875, -131.76412963867188, -17.179763793945312, -274.2605285644531, -4.913299560546875, 291.84429931640625, 255.27877807617188, 285.8902893066406, -151.93429565429688, 1048.1634521484375, 123.94786834716797, -438.56646728515625, -246.53350830078125, -814.771484375, 141.14337158203125, 160.48422241210938, 1170.850341796875, -196.492431640625, -137.90188598632812, 754.5849609375, 528.4204711914062], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000533.npy"}
{"epoch": 0.7826725403817915, "step": 534, "batch_size": 64, "mean": 351.9185791015625, "std": 519.045166015625, "min": -754.267578125, "p10": -214.3913421630859, "median": 257.5768737792969, "p90": 921.966229248047, "max": 1784.672607421875, "pos_frac": 0.78125, "sample": [426.231201171875, 900.1992797851562, -37.52281188964844, 1784.672607421875, 38.88957595825195, 747.9620971679688, -545.85595703125, 258.7155456542969, 341.81695556640625, -578.0623168945312, 49.43492126464844, 1152.865234375, -145.57858276367188, 187.90093994140625, 1165.045654296875, 844.9959106445312, 801.2054443359375, -272.3796691894531, 366.1474609375, 799.9821166992188, 316.94415283203125, -192.17721557617188, 396.4741516113281, 441.90496826171875, 256.4382019042969, 356.5364685058594, 954.72216796875, -6.695034027099609, 442.5333251953125, 152.19857788085938, 89.33956146240234, 816.1849365234375, 259.1375732421875, 156.60971069335938, 637.5645141601562, -521.956787109375, -82.45706176757812, 1681.9798583984375, 751.6392211914062, 931.294921875, 151.95901489257812, 205.21519470214844, -25.526491165161133, 145.99200439453125, 835.7865600585938, 227.2019805908203, 200.48768615722656, 816.052978515625, 1499.9735107421875, 898.7501831054688, 662.5095825195312, 375.4058532714844, 686.489990234375, 81.81852722167969, 434.6723937988281, 203.584228515625, 232.93478393554688, 234.4901123046875, 244.73843383789062, 45.074951171875, -114.81302642822266, -754.267578125, -223.91168212890625, -666.7124633789062], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000534.npy"}
{"epoch": 0.7841409691629956, "step": 535, "batch_size": 64, "mean": 410.51568603515625, "std": 532.381103515625, "min": -819.505615234375, "p10": -146.3598205566406, "median": 395.2673645019531, "p90": 993.2119445800781, "max": 2347.827880859375, "pos_frac": 0.8125, "sample": [-402.9993896484375, 2347.827880859375, -261.68597412109375, 545.498291015625, -124.24671936035156, 227.0255126953125, 84.5567626953125, 910.6881103515625, 117.36981201171875, 601.9405517578125, 196.72975158691406, 114.32669830322266, 315.5055236816406, 875.14599609375, 478.8168640136719, 405.203369140625, 160.22640991210938, 1260.068603515625, 298.1472473144531, 433.965087890625, 981.82421875, 500.77435302734375, 549.4241943359375, 439.0396423339844, 71.71589660644531, 1360.966064453125, 984.8086547851562, 545.3248291015625, -100.7840576171875, 352.6920471191406, 1388.0093994140625, 814.3316040039062, 996.8133544921875, -332.4589538574219, -223.17977905273438, -819.505615234375, 468.6904602050781, 829.0413818359375, 57.20458984375, 407.2484130859375, 385.33135986328125, 1084.2962646484375, 761.289306640625, 204.4329833984375, 515.77783203125, -58.78251647949219, 53.09380340576172, 744.732421875, -151.88551330566406, -572.6199951171875, 602.3790893554688, 73.74275970458984, 503.9814453125, 55.410194396972656, -133.46653747558594, -102.99896240234375, 971.7344970703125, 107.88558959960938, 1519.3125, 280.8934326171875, 29.119714736938477, 552.10302734375, 381.04412841796875, 610.107421875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000535.npy"}
{"epoch": 0.7856093979441997, "step": 536, "batch_size": 64, "mean": 476.09051513671875, "std": 683.5867919921875, "min": -1235.1121826171875, "p10": -92.70287322998044, "median": 285.99749755859375, "p90": 1140.0483520507814, "max": 3879.381591796875, "pos_frac": 0.828125, "sample": [173.4527130126953, 814.335693359375, 322.72430419921875, 958.6083374023438, 1277.221923828125, 32.33345031738281, 201.189697265625, 97.7738037109375, 42.74465560913086, -25.044174194335938, 844.0843505859375, 325.8743591308594, 1047.402099609375, 804.3150634765625, -257.6571350097656, 504.8641357421875, 1215.327392578125, 86.90800476074219, 769.2614135742188, 785.550537109375, 2270.90625, 543.3781127929688, -150.24261474609375, 731.1109619140625, -105.56131744384766, 1153.6949462890625, 27.06542205810547, 634.8695068359375, 667.3071899414062, -31.57470703125, 3879.381591796875, 760.4231567382812, 294.99078369140625, 163.97244262695312, 512.5076904296875, 664.9573364257812, 150.3733367919922, 1108.206298828125, -313.2650451660156, 50.856746673583984, 170.4920196533203, -1235.1121826171875, 1533.5400390625, 813.90185546875, 632.4052734375, -143.91331481933594, 108.06842041015625, 220.298095703125, 264.1300048828125, 120.3961181640625, 277.00421142578125, -62.69983673095703, 256.2242736816406, 270.19049072265625, 240.0350341796875, 720.03369140625, 100.26112365722656, -32.6257209777832, -317.79791259765625, 50.508636474609375, 1005.388427734375, 762.6043701171875, 402.57171630859375, 1279.2598876953125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000536.npy"}
{"epoch": 0.7870778267254038, "step": 537, "batch_size": 64, "mean": 330.7102355957031, "std": 548.654296875, "min": -1335.42626953125, "p10": -300.1300903320312, "median": 305.7471008300781, "p90": 1039.4447509765625, "max": 1348.2100830078125, "pos_frac": 0.828125, "sample": [-1335.42626953125, -777.834716796875, 21.28046417236328, 179.53558349609375, -515.5486450195312, 307.096923828125, 40.26368713378906, 904.7385864257812, 11.697067260742188, -32.956546783447266, 488.2515563964844, -2.5754241943359375, 510.6323547363281, 419.088134765625, -309.8614501953125, 878.68359375, 1055.596923828125, 21.818937301635742, 304.39727783203125, 19.67426300048828, 31.103425979614258, 910.2938232421875, 47.173866271972656, -703.223876953125, 1006.6436767578125, -277.423583984375, 1281.81884765625, 985.3348388671875, 370.05853271484375, 517.7047119140625, 1020.854248046875, 417.5579833984375, 1019.7251586914062, 380.1630859375, 799.0304565429688, 50.965858459472656, 103.70806884765625, 576.4752807617188, 301.91162109375, -472.54339599609375, 197.34420776367188, 474.5375671386719, -35.49363708496094, -866.6849365234375, 73.05101013183594, 1093.263671875, 13.588130950927734, 203.06080627441406, 339.5233154296875, 691.5428466796875, 179.86541748046875, 1348.2100830078125, 119.10857391357422, 1177.37646484375, 572.6885375976562, 68.5921859741211, 1157.6937255859375, 98.64697265625, 1047.412109375, 333.1304931640625, 487.38165283203125, 705.8607177734375, 112.42486572265625, 1017.4456787109375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000537.npy"}
{"epoch": 0.788546255506608, "step": 538, "batch_size": 64, "mean": 371.4954833984375, "std": 496.7608642578125, "min": -616.6527099609375, "p10": -226.8523712158203, "median": 351.6288604736328, "p90": 988.268133544922, "max": 1884.421630859375, "pos_frac": 0.75, "sample": [139.1995391845703, 430.959716796875, 753.726318359375, 1044.6904296875, 513.2607421875, -224.64105224609375, 974.6627807617188, 614.77783203125, 428.3271484375, 558.5321044921875, -94.20806121826172, 1640.4779052734375, 541.8173828125, 506.86492919921875, 211.91282653808594, 1031.0548095703125, -616.6527099609375, -27.70551300048828, 203.1669464111328, 1085.2423095703125, 287.59185791015625, 472.3149719238281, 143.30294799804688, 725.043212890625, 474.21600341796875, 134.36439514160156, -70.93243408203125, 1339.6671142578125, -23.958404541015625, -327.1835632324219, -481.20867919921875, 804.0105590820312, 205.2424774169922, -174.7114715576172, 1884.421630859375, 660.3656616210938, 906.2609252929688, 546.5692749023438, 357.1409912109375, 672.9251708984375, 185.99700927734375, -86.68427276611328, 12.34762954711914, 580.5375366210938, 297.6657409667969, 569.0128173828125, -551.7205200195312, 532.97509765625, -227.80007934570312, 106.706298828125, 626.3023681640625, 346.1167297363281, 342.24176025390625, 705.5625, -163.3746795654297, -35.75676727294922, 178.3463134765625, 183.58253479003906, 994.0989990234375, 965.8538208007812, -343.8232421875, -256.90924072265625, 502.0578308105469, 61.46599578857422], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000538.npy"}
{"epoch": 0.7900146842878121, "step": 539, "batch_size": 64, "mean": 342.2251892089844, "std": 446.94439697265625, "min": -566.1139526367188, "p10": -167.7212173461914, "median": 256.3620071411133, "p90": 908.6399353027348, "max": 1505.6702880859375, "pos_frac": 0.765625, "sample": [281.7489318847656, 341.914794921875, 66.8249740600586, -33.508514404296875, 410.8336181640625, 424.83587646484375, -211.38661193847656, 26.83814239501953, -34.187957763671875, 1163.929931640625, -299.18414306640625, -23.31138038635254, 1505.6702880859375, 185.08168029785156, 946.4743041992188, 593.30126953125, 667.5928344726562, 763.6986694335938, 28.5050048828125, 806.6547241210938, 1505.3748779296875, 116.80511474609375, 229.9059600830078, 704.7838134765625, 160.17291259765625, 116.36129760742188, 29.957260131835938, 473.1644287109375, -47.56434631347656, 668.87548828125, -357.4225769042969, 596.5797119140625, -566.1139526367188, 457.96136474609375, 554.6849365234375, 49.372802734375, 88.84955596923828, 500.1373291015625, 194.89862060546875, -287.8410339355469, -53.94941711425781, 1352.1695556640625, -177.42636108398438, 716.7679443359375, -38.1253547668457, -164.288818359375, 950.5928344726562, -169.19224548339844, 137.73837280273438, -30.645652770996094, 627.6856079101562, 202.19216918945312, 723.6884765625, 388.5965270996094, 432.47064208984375, 230.97508239746094, 820.3597412109375, 513.7925415039062, 409.34033203125, 358.6556396484375, 18.181013107299805, 1247.2110595703125, 506.49005126953125, 97.86161804199219], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000539.npy"}
{"epoch": 0.7914831130690162, "step": 540, "batch_size": 64, "mean": 444.666748046875, "std": 613.171142578125, "min": -923.8980712890625, "p10": -238.95052185058586, "median": 313.45579528808594, "p90": 1319.5245849609378, "max": 2420.5654296875, "pos_frac": 0.765625, "sample": [471.2307434082031, 678.27880859375, 633.9744262695312, -279.2464599609375, 2009.6961669921875, -133.4832763671875, 309.6021423339844, 112.64813232421875, -923.8980712890625, -469.4461669921875, -55.800270080566406, 486.1973571777344, -29.355361938476562, 290.74896240234375, 2420.5654296875, -77.96424102783203, 245.89447021484375, 94.13864135742188, 662.791748046875, 663.36083984375, -18.075103759765625, 155.49847412109375, 363.947021484375, 788.1934814453125, 800.2962646484375, 1224.7418212890625, 510.9085388183594, 1257.0823974609375, 1346.2855224609375, 288.2535400390625, 510.8326110839844, 432.06884765625, 705.7750244140625, 103.1575698852539, 246.59210205078125, 478.9171447753906, -144.92666625976562, -16.846527099609375, 1443.197509765625, 225.92050170898438, 1094.411865234375, 1541.0872802734375, 1512.8203125, 1153.2845458984375, -445.2430725097656, 651.500732421875, 216.28799438476562, 209.8031768798828, 750.0864868164062, 314.72412109375, -330.1925048828125, 272.9988708496094, 647.5283813476562, 805.749755859375, 164.230712890625, 312.1874694824219, 614.97216796875, 196.04806518554688, 1365.161376953125, -108.70726776123047, 29.204383850097656, 641.656005859375, -548.0139770507812, -414.6681213378906], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000540.npy"}
{"epoch": 0.7929515418502202, "step": 541, "batch_size": 64, "mean": 233.8116912841797, "std": 548.9639282226562, "min": -1041.0579833984375, "p10": -448.0837615966796, "median": 200.28634643554688, "p90": 851.0110351562504, "max": 1926.9605712890625, "pos_frac": 0.734375, "sample": [-898.6190185546875, 187.75643920898438, 528.1075439453125, 120.80319213867188, 180.53509521484375, 58.822845458984375, 317.2491149902344, 62.43342971801758, 249.89801025390625, -16.0196533203125, -153.50912475585938, 1211.283447265625, -749.8507080078125, -43.69450378417969, 976.5364990234375, 142.76535034179688, 238.36798095703125, 1171.81005859375, 93.6911849975586, 718.916015625, 1488.5469970703125, 768.9091186523438, 2.730316162109375, -660.7708129882812, 245.96978759765625, -289.6490478515625, 290.42474365234375, 299.0914001464844, -478.4141845703125, 652.851318359375, 614.4564208984375, 62.80937957763672, 745.19580078125, 166.7974395751953, 183.52786254882812, 153.14443969726562, 167.98968505859375, 169.1888885498047, 571.4395141601562, 416.00140380859375, -136.2827606201172, 151.39683532714844, 707.237548828125, 886.1975708007812, 611.6378173828125, 353.49847412109375, -1041.0579833984375, -531.0729370117188, 419.0699768066406, 534.1402587890625, -273.4907531738281, 398.7330627441406, 224.65573120117188, 1926.9605712890625, 1106.981689453125, -300.5567626953125, -877.2498168945312, 512.2205810546875, -377.3127746582031, -169.13540649414062, -185.28216552734375, 212.81625366210938, 599.105224609375, 243.21449279785156], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000541.npy"}
{"epoch": 0.7944199706314243, "step": 542, "batch_size": 64, "mean": 451.26104736328125, "std": 610.035400390625, "min": -1052.290771484375, "p10": -223.01857757568357, "median": 395.55072021484375, "p90": 1156.9998046875, "max": 2230.7978515625, "pos_frac": 0.796875, "sample": [1150.455078125, 434.3880615234375, 232.8455047607422, 231.34091186523438, 212.12994384765625, 937.1685180664062, -279.4571533203125, 2230.7978515625, 1159.8046875, 499.31781005859375, 37.71568298339844, 473.388916015625, 838.4603271484375, -75.76876831054688, -14.563409805297852, 484.06768798828125, 1827.3291015625, 316.62445068359375, -34.54509735107422, 1722.013671875, 328.9532775878906, 103.73634338378906, 360.16302490234375, 480.4525146484375, -365.4029846191406, 207.9416961669922, -97.27191162109375, -437.69384765625, -203.0858917236328, 874.9417114257812, -231.5611572265625, 855.752197265625, 812.1484985351562, 160.23635864257812, 1659.758544921875, -43.93948745727539, 611.3575439453125, 466.7292175292969, -544.2444458007812, 651.4972534179688, 361.7438049316406, 452.0263366699219, 677.8394775390625, 82.11182403564453, 1453.643310546875, 195.25680541992188, 269.5918884277344, -1052.290771484375, 654.2540283203125, 164.7244415283203, 714.3792114257812, 353.87469482421875, 101.83586120605469, 429.3576354980469, 1571.3406982421875, 1092.615234375, 309.11895751953125, -877.1859130859375, 1135.5589599609375, 313.3292541503906, 443.9443054199219, 619.383056640625, 613.6666259765625, 766.6063232421875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000542.npy"}
{"epoch": 0.7958883994126285, "step": 543, "batch_size": 64, "mean": 318.96612548828125, "std": 708.8117065429688, "min": -890.6671142578125, "p10": -333.80487060546875, "median": 162.00159454345703, "p90": 1234.2927246093757, "max": 3208.87646484375, "pos_frac": 0.671875, "sample": [22.144805908203125, -36.234619140625, 712.974365234375, -50.965576171875, 48.11345672607422, 2098.7060546875, 4.712841033935547, -31.912307739257812, -890.6671142578125, 768.050537109375, -50.225502014160156, -768.48193359375, 42.37771224975586, 199.1258544921875, 2003.6292724609375, 96.39857482910156, 37.921142578125, 3208.87646484375, -593.2322998046875, 12.431816101074219, 481.00482177734375, -389.82818603515625, -325.92730712890625, 207.0140838623047, -337.18096923828125, -131.10690307617188, -81.7578125, 565.3942260742188, -528.8989868164062, 169.29197692871094, 613.4000244140625, 204.92750549316406, -273.54083251953125, 1419.5546875, -166.13009643554688, -197.3209991455078, 1868.731201171875, 583.0091552734375, 1072.5740966796875, 66.95870208740234, 1303.6007080078125, 148.5769805908203, -205.8025665283203, 308.94854736328125, 782.1980590820312, -8.649543762207031, 354.6737060546875, 1699.8292236328125, -394.40911865234375, 593.3467407226562, 199.16525268554688, -95.19780731201172, 347.06201171875, -184.81314086914062, 473.4775085449219, 738.22802734375, 289.70330810546875, 4.026573181152344, 167.06956481933594, 488.5677795410156, 156.93362426757812, 546.485595703125, 574.1609497070312, 472.73687744140625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000543.npy"}
{"epoch": 0.7973568281938326, "step": 544, "batch_size": 64, "mean": 369.7061767578125, "std": 533.4586791992188, "min": -968.2061767578125, "p10": -152.31194305419922, "median": 343.34117126464844, "p90": 1062.563842773438, "max": 1741.216064453125, "pos_frac": 0.78125, "sample": [969.5899658203125, 554.7386474609375, 350.89111328125, 508.7677917480469, 1441.2197265625, -840.3428955078125, 359.48431396484375, -77.51985931396484, 715.5991821289062, 5.408414840698242, 1230.888671875, 2.1329479217529297, 774.723876953125, 232.12623596191406, -851.4552612304688, 335.7912292480469, 681.920654296875, 223.83563232421875, 27.46136474609375, 419.7769470214844, 392.332763671875, 397.42755126953125, -225.17514038085938, 243.29898071289062, 126.52676391601562, 124.40449523925781, 719.300537109375, 85.70404052734375, -254.79859924316406, -153.70626831054688, 1516.885009765625, 197.89785766601562, -79.8639144897461, 557.4611206054688, -968.2061767578125, 1102.4097900390625, 534.6346435546875, 376.65606689453125, 700.92529296875, -149.0585174560547, 277.7523193359375, 20.019649505615234, 277.2781066894531, 1492.11474609375, 719.0831909179688, -53.94962692260742, 188.3291473388672, 1741.216064453125, 790.4287109375, 566.8885498046875, -4.689701080322266, 150.17190551757812, 718.1867065429688, 1382.51904296875, 629.477783203125, 226.2919158935547, 439.3584289550781, -55.51182556152344, 474.2950744628906, 169.97996520996094, -18.488265991210938, 727.2974853515625, -238.88864135742188, 731.9395141601562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000544.npy"}
{"epoch": 0.7988252569750367, "step": 545, "batch_size": 64, "mean": 425.98883056640625, "std": 541.908203125, "min": -645.8091430664062, "p10": -86.71822128295895, "median": 358.8108825683594, "p90": 1298.9169921875002, "max": 1740.084716796875, "pos_frac": 0.84375, "sample": [-158.89816284179688, 27.955841064453125, -604.7918701171875, 65.46676635742188, 278.0171813964844, 697.2229614257812, 221.67819213867188, 101.57499694824219, 481.4985656738281, -312.9366149902344, 473.2700500488281, 718.7255859375, 117.18126678466797, 598.0429077148438, 1079.5325927734375, 39.98798370361328, 1569.65185546875, 1248.8753662109375, 1218.2406005859375, 387.73114013671875, 775.7991943359375, 93.93124389648438, 400.572265625, 159.88307189941406, 20.454490661621094, 393.7239074707031, -12.03216552734375, 415.71063232421875, 837.7049560546875, -51.816429138183594, 1352.5758056640625, 402.9412841796875, 144.80787658691406, -101.67613220214844, 1740.084716796875, 989.745849609375, 8.146621704101562, 1460.4759521484375, -636.4956665039062, 367.89990234375, -24.891014099121094, 484.6877746582031, 98.24649047851562, 587.478759765625, 349.72186279296875, 670.9617919921875, 886.56005859375, 1320.3634033203125, 267.3514709472656, 236.54376220703125, 50.471214294433594, 80.23481750488281, 307.7348937988281, 390.5755920410156, 48.078887939453125, 1491.4176025390625, 1212.292236328125, -215.32241821289062, 383.6517639160156, 173.81011962890625, 577.8851318359375, -645.8091430664062, 174.8100128173828, 1345.96484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000545.npy"}
{"epoch": 0.8002936857562408, "step": 546, "batch_size": 64, "mean": 300.63092041015625, "std": 446.3199768066406, "min": -1361.48681640625, "p10": -202.28521881103515, "median": 303.7676544189453, "p90": 831.4216186523438, "max": 1150.727294921875, "pos_frac": 0.75, "sample": [-160.38485717773438, 109.11499786376953, 459.18414306640625, 39.2401123046875, 265.8701171875, 432.83929443359375, 290.5367431640625, 488.4241638183594, 172.2532196044922, 208.12393188476562, -170.64456176757812, -227.36871337890625, -63.08079528808594, -79.46966552734375, 709.6571044921875, 122.44770812988281, 28.41944122314453, 345.0080261230469, 840.0990600585938, 268.83868408203125, -88.13445281982422, 801.2881469726562, -262.1936950683594, 1150.727294921875, 596.7977905273438, 768.5248413085938, 694.2891845703125, 488.69952392578125, 912.8756713867188, 336.1841125488281, 1016.2637939453125, -298.82769775390625, -24.447532653808594, 402.0862121582031, 484.68963623046875, -701.9136352539062, 613.6336059570312, 496.4271240234375, 739.7078247070312, 876.6096801757812, -1361.48681640625, 696.2635498046875, 381.029296875, 262.32904052734375, 452.6953430175781, 767.3931884765625, 109.52432250976562, 811.1742553710938, -8.193275451660156, 139.86767578125, 96.27259063720703, 187.7555389404297, 721.079345703125, 316.9985656738281, -185.2336883544922, 579.798095703125, 124.55416870117188, 1059.5684814453125, -209.593017578125, 73.82775115966797, -30.31428337097168, 427.7834167480469, -257.61456298828125, 1002.5039672851562], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000546.npy"}
{"epoch": 0.801762114537445, "step": 547, "batch_size": 64, "mean": 376.65985107421875, "std": 539.6768798828125, "min": -1232.557861328125, "p10": -121.76744079589841, "median": 358.335693359375, "p90": 939.162567138672, "max": 2053.82470703125, "pos_frac": 0.75, "sample": [505.4512939453125, 64.12207794189453, -1232.557861328125, 950.193115234375, -81.57484436035156, 5.120513916015625, 301.21685791015625, 662.97998046875, 228.532958984375, 548.6412963867188, 169.05145263671875, 1834.50244140625, 48.350894927978516, 1635.0592041015625, -136.4010467529297, -85.44886016845703, 533.6427001953125, 598.1320190429688, -86.38847351074219, 913.4246215820312, -33.17955780029297, 133.40570068359375, 445.7979431152344, 250.76016235351562, 576.839111328125, -158.99932861328125, 527.9931640625, 235.4291229248047, 209.03819274902344, 392.7738342285156, 889.4632568359375, -52.47929382324219, 507.0334167480469, 638.3042602539062, 538.6403198242188, 423.0987548828125, -87.62236022949219, 858.983154296875, 323.8975524902344, 83.18951416015625, 723.6558227539062, 2053.82470703125, 580.6080932617188, -601.7045288085938, -279.74603271484375, -1.7695693969726562, -374.99188232421875, 1031.5389404296875, 126.03099060058594, 480.3226013183594, 1378.891357421875, 264.34283447265625, 484.1197204589844, 416.35540771484375, 201.558837890625, 230.9199981689453, 813.75537109375, -80.31280517578125, 399.88433837890625, -68.72021484375, 667.4275512695312, 1287.565185546875, 454.6501159667969, -160.39385986328125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000547.npy"}
{"epoch": 0.8032305433186491, "step": 548, "batch_size": 64, "mean": 358.396728515625, "std": 573.8877563476562, "min": -888.2152099609375, "p10": -164.43082122802736, "median": 276.4086456298828, "p90": 852.657196044922, "max": 2681.704833984375, "pos_frac": 0.765625, "sample": [-2.6453094482421875, 795.0440673828125, 266.39910888671875, 358.4922790527344, 8.609611511230469, 299.1425476074219, -888.2152099609375, 1303.1231689453125, 420.44000244140625, 286.4181823730469, 376.05621337890625, 670.078857421875, 1151.983642578125, 263.08685302734375, 680.4430541992188, -235.0513916015625, -91.08601379394531, 390.3816223144531, 892.3712158203125, 124.71121978759766, 795.1533813476562, 306.4836120605469, 198.48117065429688, -343.21954345703125, 420.8526306152344, 28.691673278808594, 226.11036682128906, 334.57470703125, 326.5863037109375, 172.4412384033203, -162.80526733398438, 2681.704833984375, 865.8211059570312, 188.723876953125, 131.8500213623047, -503.343017578125, -69.59683227539062, 669.5845336914062, -225.39437866210938, 2310.93701171875, 82.34271240234375, -85.91445922851562, -165.1274871826172, 506.6202392578125, -77.38189697265625, 102.11721801757812, 432.387451171875, 136.56103515625, 473.4346008300781, -168.14645385742188, 476.46136474609375, 357.2922668457031, -12.771730422973633, 1877.396240234375, 439.26055908203125, 477.4775390625, 103.72209167480469, -23.630645751953125, 199.7584686279297, 250.81802368164062, 35.643211364746094, 633.111572265625, 821.94140625, 640.5946044921875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000548.npy"}
{"epoch": 0.8046989720998532, "step": 549, "batch_size": 64, "mean": 275.1691589355469, "std": 513.5498657226562, "min": -990.291259765625, "p10": -210.75121917724607, "median": 181.54603576660156, "p90": 931.5884399414066, "max": 1587.0479736328125, "pos_frac": 0.703125, "sample": [261.5070495605469, 829.0223999023438, 113.26307678222656, 35.916770935058594, 114.42753601074219, 82.1483154296875, -101.47444152832031, 500.177001953125, 663.7034912109375, 855.3092041015625, -184.46119689941406, 1394.821044921875, 108.29654693603516, 176.61477661132812, -75.842529296875, -325.32958984375, 964.279541015625, -346.1734924316406, -166.83694458007812, -512.214111328125, 594.8358154296875, 1094.41064453125, -306.0294189453125, 218.405029296875, 277.979248046875, 275.7624206542969, 354.0211181640625, -48.97325897216797, 774.3824462890625, 802.398193359375, -35.16473388671875, 239.84364318847656, 249.8192138671875, -174.74130249023438, 351.6493835449219, -990.291259765625, 72.1319351196289, 703.140625, 72.62679290771484, 70.27699279785156, -61.802703857421875, -42.777305603027344, 336.2847900390625, 793.4177856445312, 1193.3646240234375, 94.54826354980469, -140.73826599121094, 275.30987548828125, -222.01837158203125, 1529.4349365234375, 1394.9154052734375, -750.3717041015625, -60.63124084472656, 693.854736328125, 453.817138671875, -182.61007690429688, 186.477294921875, 169.03671264648438, 566.271240234375, 1587.0479736328125, 341.581298828125, 331.10986328125, 133.13197326660156, 8.534385681152344], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000549.npy"}
{"epoch": 0.8061674008810573, "step": 550, "batch_size": 64, "mean": 311.88720703125, "std": 489.3760070800781, "min": -1075.4822998046875, "p10": -250.5017776489257, "median": 303.83673095703125, "p90": 804.6026000976565, "max": 1838.2447509765625, "pos_frac": 0.8125, "sample": [321.8393249511719, 306.3974304199219, 1022.7275390625, 392.8270568847656, 317.9806213378906, 730.6454467773438, 974.7586669921875, 292.34869384765625, -178.2097625732422, 218.71641540527344, 642.9741821289062, 372.59161376953125, 540.1654052734375, 191.09567260742188, 337.0589599609375, 1730.7484130859375, 40.41454315185547, -328.5146789550781, 79.23808288574219, 121.84649658203125, -1075.4822998046875, -15.426116943359375, 471.45806884765625, 669.1221923828125, -47.46406555175781, 825.0391845703125, 737.9439697265625, 130.640869140625, 254.53744506835938, 180.10206604003906, 38.510894775390625, -722.9266967773438, 306.275146484375, -109.41329193115234, 310.8269958496094, 756.917236328125, 68.16175842285156, 381.98419189453125, 1838.2447509765625, 407.8056945800781, 301.3983154296875, 600.79248046875, 74.91310119628906, 59.626800537109375, 1321.0953369140625, 469.30572509765625, 1070.1636962890625, 88.82667541503906, 173.87338256835938, 575.1172485351562, 491.8877258300781, 546.3073120117188, -580.115234375, 484.8807373046875, -121.77182006835938, 272.14825439453125, 237.96653747558594, 510.4745788574219, 290.9311218261719, -348.32611083984375, 499.656494140625, -281.48406982421875, -353.58245849609375, 42.19010925292969], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000550.npy"}
{"epoch": 0.8076358296622613, "step": 551, "batch_size": 64, "mean": 507.3398132324219, "std": 494.0704345703125, "min": -422.3124084472656, "p10": -94.947932434082, "median": 470.32481384277344, "p90": 1306.5090454101562, "max": 1693.2469482421875, "pos_frac": 0.84375, "sample": [80.34890747070312, -31.013580322265625, 415.0367736816406, 764.0081176757812, 597.10302734375, 623.8399658203125, -381.4726257324219, -25.096630096435547, 1437.40869140625, -201.71226501464844, 68.21770477294922, 321.9759521484375, -64.28639221191406, 567.9993286132812, 546.1292114257812, 1480.357177734375, 489.3163757324219, 317.1175537109375, 551.5978393554688, 1490.3892822265625, 973.081787109375, 332.31768798828125, 581.247314453125, 632.9302978515625, 250.34226989746094, 282.62591552734375, 924.8754272460938, 366.53778076171875, 532.298095703125, 356.7745361328125, 434.6251220703125, -108.08859252929688, 1121.8232421875, 181.87054443359375, 474.5115966796875, 572.5966186523438, 737.3118896484375, 627.0170288085938, 540.8255004882812, 136.9159393310547, -308.0379333496094, 1065.6734619140625, 1312.798828125, 406.2857971191406, 39.06281661987305, 1452.793212890625, -422.3124084472656, -266.8544006347656, 1416.746337890625, 689.4435424804688, -254.13107299804688, 139.36331176757812, 1291.8328857421875, 949.7091064453125, 412.9393005371094, 501.083251953125, 746.3084716796875, 304.7147521972656, 466.1380310058594, 893.1218872070312, 156.57643127441406, 1693.2469482421875, 457.7218017578125, 325.81951904296875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000551.npy"}
{"epoch": 0.8091042584434655, "step": 552, "batch_size": 64, "mean": 578.72119140625, "std": 690.8300170898438, "min": -767.1219482421875, "p10": -84.5979133605957, "median": 521.6614379882812, "p90": 1393.4995483398438, "max": 2880.393798828125, "pos_frac": 0.859375, "sample": [787.5079956054688, 115.60713958740234, 958.4765014648438, 2880.393798828125, 801.221435546875, 1212.446533203125, 591.142822265625, 992.4429931640625, 52.3460693359375, 496.3475341796875, 683.3495483398438, 588.990966796875, 157.84292602539062, 508.4769287109375, 193.0973358154297, 699.5450439453125, 162.93673706054688, 626.5158081054688, 1212.42236328125, -402.1443176269531, 32.812225341796875, 611.8155517578125, 530.4090576171875, 307.93365478515625, -767.1219482421875, 24.49474334716797, 36.57550811767578, 1570.3463134765625, 186.4877166748047, -272.6317138671875, 1414.1611328125, -664.1937866210938, 783.5911865234375, -83.4291763305664, 583.9140625, 180.6805877685547, 460.589111328125, 2336.82958984375, 1345.2891845703125, 1468.560302734375, 532.6888427734375, -141.22610473632812, 473.033447265625, 958.4044189453125, 584.3145751953125, 2482.3525390625, 234.67642211914062, 407.85345458984375, 1327.631591796875, 2175.480712890625, 532.5165405273438, 161.06546020507812, 472.23944091796875, -325.2076110839844, -30.593231201171875, 512.913818359375, -85.09880065917969, 97.83766174316406, 413.64031982421875, 611.8948974609375, 604.2845458984375, 925.1627197265625, 641.2593383789062, 64.95446014404297], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000552.npy"}
{"epoch": 0.8105726872246696, "step": 553, "batch_size": 64, "mean": 297.1253967285156, "std": 545.4743041992188, "min": -704.8883056640625, "p10": -221.7018905639648, "median": 207.60367584228516, "p90": 1013.0090637207032, "max": 2473.127685546875, "pos_frac": 0.71875, "sample": [2473.127685546875, 986.2802734375, -173.88470458984375, 529.3851318359375, 446.060791015625, -113.35099029541016, 52.049285888671875, -67.22492218017578, 147.04359436035156, 243.1483154296875, 180.9547119140625, 234.2526397705078, 118.94794464111328, 104.14077758789062, -55.90473175048828, 261.7152099609375, -420.0150451660156, 16.574951171875, 304.6649169921875, -117.19953918457031, 240.37393188476562, 166.58253479003906, 344.1144104003906, -526.1272583007812, 522.4151000976562, -65.08412170410156, 1083.9664306640625, 96.17889404296875, -8.092826843261719, 299.55975341796875, -230.50904846191406, 508.3166198730469, -201.15185546875, -151.15379333496094, 548.6500244140625, 272.07440185546875, 386.8985595703125, 98.45361328125, 1055.7557373046875, -80.6501693725586, 342.5827331542969, -704.8883056640625, 573.6528930664062, 529.1264038085938, 80.79891204833984, 98.8017578125, -694.6787109375, 3.4766845703125, 548.487548828125, -3.8081741333007812, 1162.63916015625, 41.865745544433594, 1153.298828125, 536.7529296875, 889.9930419921875, 1023.3031616210938, 286.6209716796875, 988.989501953125, -481.44415283203125, 1716.5269775390625, -248.66900634765625, 952.9039306640625, 62.22321319580078, 646.13134765625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000553.npy"}
{"epoch": 0.8120411160058737, "step": 554, "batch_size": 64, "mean": 406.2252197265625, "std": 707.5015869140625, "min": -1491.5380859375, "p10": -481.0877319335937, "median": 374.7191619873047, "p90": 1260.9315673828125, "max": 2174.526123046875, "pos_frac": 0.78125, "sample": [191.1205596923828, -755.1665649414062, 720.835693359375, 655.585693359375, 235.99862670898438, 1001.4588623046875, 503.2489318847656, 381.1029968261719, -457.0753479003906, 371.67742919921875, 2174.526123046875, 382.1583251953125, 407.22344970703125, 276.4163513183594, -168.0294952392578, 455.77886962890625, 584.695068359375, 579.865966796875, 589.5322265625, -43.067039489746094, -667.7827758789062, -741.5573120117188, -491.3787536621094, 10.036750793457031, 2050.7568359375, -244.38629150390625, 284.0518493652344, 377.7608947753906, 440.13134765625, 809.3263549804688, 534.3043212890625, 1274.483642578125, 1605.6158447265625, 170.70359802246094, 857.6202392578125, 336.969482421875, 905.7330322265625, 14.782405853271484, 1525.5860595703125, -151.0147705078125, 0.753326416015625, 265.60614013671875, 1172.52978515625, 575.349365234375, 1026.1124267578125, 125.34867858886719, 296.2734680175781, 3.119110107421875, 888.2005004882812, 856.2941284179688, -882.1862182617188, 676.34228515625, 296.3115539550781, -684.6016235351562, 1131.02197265625, -1491.5380859375, -353.6994934082031, 75.31317901611328, 341.75445556640625, 1229.31005859375, 1804.2467041015625, 1715.1258544921875, 105.41278839111328, -163.61474609375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000554.npy"}
{"epoch": 0.8135095447870778, "step": 555, "batch_size": 64, "mean": 486.11102294921875, "std": 730.6663208007812, "min": -1126.205078125, "p10": -219.22098236083983, "median": 424.35430908203125, "p90": 1469.3458496093751, "max": 2662.134033203125, "pos_frac": 0.796875, "sample": [603.140869140625, 421.7425537109375, 1286.7738037109375, 323.6639099121094, 726.9915161132812, 1179.7601318359375, 367.42510986328125, 93.42550659179688, -82.79229736328125, 157.52047729492188, -197.56089782714844, 888.8892211914062, 734.7978515625, 473.5794677734375, 85.746337890625, 79.26780700683594, 167.31417846679688, -228.50387573242188, 2662.134033203125, 1154.4124755859375, 76.0997314453125, 2043.3526611328125, 63.77991485595703, 512.702880859375, 559.5582885742188, 116.71646118164062, 892.8043212890625, 32.69392395019531, 530.7601318359375, 22.894210815429688, 680.6934814453125, 1180.0413818359375, 556.8502197265625, 495.86968994140625, 17.571807861328125, 224.86524963378906, 751.2254638671875, -627.8060302734375, 119.97445678710938, -113.29462432861328, -920.0902099609375, 1095.0726318359375, 1590.515380859375, 2155.868408203125, -548.2957763671875, 464.8227844238281, 1959.1588134765625, 169.8678436279297, 426.966064453125, 135.78851318359375, 1490.296630859375, -531.1824340820312, 1420.460693359375, -1126.205078125, -136.17218017578125, -76.22103118896484, 1843.316162109375, 542.0701904296875, -112.42727661132812, 167.76727294921875, 1195.05810546875, 429.7942199707031, -238.15769958496094, 677.9513549804688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000555.npy"}
{"epoch": 0.8149779735682819, "step": 556, "batch_size": 64, "mean": 293.83837890625, "std": 430.82000732421875, "min": -430.8654479980469, "p10": -242.99133148193357, "median": 262.71913146972656, "p90": 910.0522399902344, "max": 1476.4210205078125, "pos_frac": 0.734375, "sample": [1226.4212646484375, 257.21295166015625, 130.60220336914062, -131.3087158203125, 268.2253112792969, 137.2431640625, 571.8184814453125, 469.9551696777344, -219.2814483642578, 335.41400146484375, 87.46236419677734, 692.3577880859375, 271.7151794433594, -271.3949890136719, 213.984375, 556.2672119140625, 1121.3699951171875, -0.31048583984375, 706.2927856445312, -167.339111328125, -392.5477294921875, -46.352317810058594, -430.8654479980469, 127.58722686767578, -404.6446533203125, -24.721553802490234, 311.8763122558594, 120.93229675292969, 839.6823120117188, -253.1527099609375, 385.7630615234375, 343.71820068359375, 270.9585876464844, 65.81311798095703, 795.533447265625, -61.13433074951172, 403.58740234375, 184.9253387451172, 231.20730590820312, -12.990667343139648, -108.080322265625, 722.1614990234375, 151.17652893066406, 126.54077911376953, -259.4181213378906, 476.03570556640625, 924.3950805664062, 351.06365966796875, 933.2495727539062, 60.033424377441406, 216.09170532226562, -127.26261138916016, 912.9013061523438, 283.449462890625, 1476.4210205078125, 28.05811309814453, 903.4044189453125, 1423.1737060546875, 476.2895202636719, 512.1231079101562, 349.54541015625, -312.27752685546875, 296.74072265625, 277.95770263671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000556.npy"}
{"epoch": 0.8164464023494861, "step": 557, "batch_size": 64, "mean": 427.0724182128906, "std": 681.6052856445312, "min": -1491.33984375, "p10": -268.6280090332031, "median": 376.5421142578125, "p90": 1274.549792480469, "max": 2357.87109375, "pos_frac": 0.75, "sample": [-1491.33984375, 66.53465270996094, -990.0623779296875, 896.4027099609375, 339.8910827636719, 195.53558349609375, 139.97483825683594, 466.7094421386719, 670.954345703125, 105.60910034179688, 976.7495727539062, 1424.4837646484375, 952.3861694335938, -2.302734375, 86.31466674804688, 426.7318115234375, 454.89019775390625, 935.975830078125, -60.523738861083984, 134.5791015625, 77.34626770019531, 1228.721923828125, 1695.550537109375, 665.211181640625, -108.30426025390625, 493.0048522949219, 7.73779296875, -371.32470703125, -237.77618408203125, 826.5941772460938, 170.6016845703125, 635.958251953125, 1121.2196044921875, 549.0059814453125, 220.18809509277344, 228.19976806640625, 466.7178649902344, -611.5490112304688, 843.7570190429688, 488.8909912109375, 768.719482421875, -281.8502197265625, 485.80072021484375, 131.9469451904297, 217.6968536376953, -506.26715087890625, 707.986328125, 1715.8338623046875, -227.74465942382812, 2357.87109375, -27.548572540283203, 2138.302978515625, 1416.58837890625, 279.8486022949219, 1011.4274291992188, -26.450271606445312, -213.21734619140625, 1294.1903076171875, -366.64178466796875, 1201.9888916015625, -66.85167694091797, 448.6728210449219, 352.908935546875, 400.17529296875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000557.npy"}
{"epoch": 0.8179148311306902, "step": 558, "batch_size": 64, "mean": 577.2942504882812, "std": 623.648193359375, "min": -1646.9298095703125, "p10": -99.19193420410154, "median": 487.27940368652344, "p90": 1354.7662109375, "max": 1931.53271484375, "pos_frac": 0.8125, "sample": [473.45562744140625, 1108.578125, 322.5505676269531, 1234.1942138671875, 165.07135009765625, 366.4234924316406, 500.1624755859375, 54.73249816894531, 838.559814453125, -309.8249816894531, -194.59866333007812, 1092.84716796875, 1247.768798828125, 1079.265380859375, 474.3963317871094, 1067.3671875, 511.92218017578125, 370.7841491699219, 1571.6480712890625, 1800.377685546875, -386.76055908203125, 361.94970703125, 358.863525390625, 1206.9818115234375, -56.56385040283203, 581.6591796875, 1374.01708984375, 444.47845458984375, 1373.1640625, 446.84588623046875, 294.5675354003906, 584.5390625, 871.6580810546875, 1311.837890625, 1434.061767578125, 907.668701171875, -1646.9298095703125, -109.5953369140625, 429.9463195800781, 199.34449768066406, 433.29425048828125, -26.053922653198242, 136.61338806152344, 391.6329345703125, 502.34771728515625, -59.34355163574219, -185.5876007080078, 1931.53271484375, 1159.0662841796875, -0.37288856506347656, 823.5048828125, 1063.451171875, 553.5188598632812, 531.69921875, 1061.1002197265625, -639.5496826171875, 844.8267822265625, 399.2392883300781, 389.47021484375, -74.91732788085938, 284.1434020996094, 1508.328857421875, 1067.288818359375, 1094.18359375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000558.npy"}
{"epoch": 0.8193832599118943, "step": 559, "batch_size": 64, "mean": 485.7807312011719, "std": 623.9253540039062, "min": -742.8096923828125, "p10": -94.86410675048828, "median": 341.9555969238281, "p90": 1236.5454833984377, "max": 2194.314697265625, "pos_frac": 0.796875, "sample": [890.6574096679688, 1215.3140869140625, 596.610107421875, 689.6357421875, 75.67835235595703, 48.331573486328125, 124.25489807128906, 170.2211456298828, 1245.6446533203125, 2194.314697265625, 1109.521484375, 286.2894287109375, 1058.454345703125, 22.498014450073242, 116.02713012695312, 796.4808349609375, 1070.65185546875, 1253.106201171875, -742.8096923828125, -447.58624267578125, 1174.4539794921875, 344.7984619140625, 165.44180297851562, 447.7342529296875, 339.11273193359375, 1159.17431640625, -175.71463012695312, -47.644287109375, 1311.1951904296875, 172.05389404296875, -26.22869110107422, 288.464599609375, 107.72998809814453, 40.851661682128906, 147.0742645263672, -46.88468933105469, -36.11672592163086, -97.52434539794922, 218.7854766845703, 222.59185791015625, 1951.184326171875, 557.8272094726562, 937.3819580078125, 113.0762710571289, -649.7188720703125, 738.9515991210938, 555.470703125, 617.002197265625, 1111.79541015625, -88.6568832397461, 466.5628967285156, 247.3226318359375, 370.5927734375, 1092.580810546875, 1655.103759765625, 1897.62548828125, -371.9496154785156, 802.8170776367188, 1012.6966552734375, 467.3060302734375, 16.681589126586914, -36.46046447753906, -580.8084106445312, 722.9359130859375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000559.npy"}
{"epoch": 0.8208516886930984, "step": 560, "batch_size": 64, "mean": 351.25054931640625, "std": 554.9534912109375, "min": -1013.4078979492188, "p10": -243.62276611328124, "median": 321.4129638671875, "p90": 1124.3649536132814, "max": 1937.2147216796875, "pos_frac": 0.71875, "sample": [1937.2147216796875, -372.052978515625, -81.76358795166016, 1070.33544921875, -407.4544982910156, 570.1406860351562, 89.12071990966797, -9.094551086425781, 98.07952880859375, 426.6664123535156, 224.6641845703125, 108.19168090820312, 244.16619873046875, 471.93524169921875, 1423.3897705078125, -234.72528076171875, 752.6082763671875, 1258.5555419921875, 491.345458984375, 926.5866088867188, 745.7459716796875, -89.98822021484375, 193.00335693359375, 1352.16796875, 858.9557495117188, -542.3005981445312, -21.492733001708984, 1087.9464111328125, 492.65533447265625, -158.5958251953125, 837.7061157226562, -223.4450225830078, 25.81275749206543, 524.5858154296875, 471.7001953125, -485.699462890625, 118.10834503173828, 1343.079345703125, 241.77651977539062, 514.6616821289062, 552.5134887695312, -136.35589599609375, 443.4060363769531, 164.02102661132812, 622.6852416992188, 1139.972900390625, -108.20298767089844, -25.98815155029297, -147.5755157470703, 1182.347900390625, 535.572265625, 210.65908813476562, 618.2431640625, 214.19139099121094, 398.65972900390625, 400.1413879394531, -247.43597412109375, 482.2969970703125, 795.1707153320312, 617.25048828125, -737.7056884765625, 92.68390655517578, -1013.4078979492188, 152.59710693359375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000560.npy"}
{"epoch": 0.8223201174743024, "step": 561, "batch_size": 64, "mean": 299.1540832519531, "std": 622.6829223632812, "min": -2199.531494140625, "p10": -245.44667663574214, "median": 239.5389404296875, "p90": 1076.1979187011723, "max": 1829.0335693359375, "pos_frac": 0.75, "sample": [624.9393920898438, 472.14532470703125, 205.31454467773438, 64.3844223022461, -1023.0006103515625, 1249.15576171875, 722.2349243164062, 78.43003845214844, 685.88134765625, 1119.9434814453125, 116.17791748046875, 532.024169921875, 1829.0335693359375, -199.13587951660156, 231.03945922851562, 200.83621215820312, 85.83331298828125, -110.2651596069336, 354.614990234375, 418.80474853515625, 1190.77978515625, 185.63687133789062, 609.2984619140625, 323.4000549316406, -264.3363037109375, 135.09666442871094, 850.267333984375, 378.19879150390625, 10.074331283569336, 1620.1048583984375, 756.6688232421875, 1281.477294921875, 0.735321044921875, 250.99081420898438, 793.4312744140625, -458.95013427734375, -269.0611267089844, 549.5543212890625, 20.398719787597656, 664.4330444335938, 110.0491714477539, -391.63958740234375, -169.1934814453125, 758.8466186523438, 534.6944580078125, -136.56671142578125, 161.01715087890625, 829.8062133789062, 974.1249389648438, 248.03842163085938, -2199.531494140625, 1556.822509765625, 144.5349884033203, -185.5458984375, 389.6304931640625, -129.54827880859375, 455.77667236328125, -31.579694747924805, 539.9671630859375, 277.88848876953125, -712.4049072265625, 211.10781860351562, -201.37088012695312, -175.65475463867188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000561.npy"}
{"epoch": 0.8237885462555066, "step": 562, "batch_size": 64, "mean": 401.3731689453125, "std": 611.2813110351562, "min": -1205.1649169921875, "p10": -255.351611328125, "median": 387.31919860839844, "p90": 993.2503051757812, "max": 2741.345458984375, "pos_frac": 0.78125, "sample": [449.5926208496094, 2741.345458984375, 1309.153076171875, 501.0110168457031, 117.33010864257812, 504.8375549316406, 820.2769775390625, 248.24961853027344, 919.60400390625, 705.9406127929688, 791.7593994140625, 885.870361328125, 176.14947509765625, 487.3542785644531, 975.22021484375, -266.6827392578125, 236.0904541015625, 888.9261474609375, 189.1608123779297, 437.4035339355469, 266.1610107421875, 566.3633422851562, 549.7635498046875, -342.1501770019531, 50.608116149902344, 772.1666259765625, 992.5581665039062, -46.5291748046875, 1042.2838134765625, 259.7589111328125, -254.234619140625, -255.830322265625, -2.9988574981689453, 676.66259765625, -215.6572265625, 2107.585205078125, -154.25115966796875, 472.8072509765625, 536.1951904296875, 930.7681274414062, 678.1334228515625, 161.06141662597656, 590.228271484375, 311.1346740722656, -1205.1649169921875, 190.6671142578125, -240.56410217285156, 873.19921875, 538.1221313476562, 993.5469360351562, 6.830265045166016, -122.33592224121094, 22.525367736816406, 68.94032287597656, 637.3399658203125, 337.23486328125, -618.1664428710938, 35.716712951660156, 161.1328582763672, -622.5521240234375, 112.77839660644531, -447.27008056640625, 1121.2601318359375, 1033.46044921875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000562.npy"}
{"epoch": 0.8252569750367107, "step": 563, "batch_size": 64, "mean": 302.5437927246094, "std": 761.9033203125, "min": -3131.15966796875, "p10": -483.1230865478515, "median": 357.67799377441406, "p90": 1038.208081054688, "max": 1760.5302734375, "pos_frac": 0.765625, "sample": [223.31854248046875, -3131.15966796875, -936.0216674804688, 325.5395812988281, 737.03125, 55.6202392578125, 1309.3226318359375, 15.319610595703125, 340.3774108886719, 633.572998046875, 939.2952880859375, 183.03955078125, -643.4432373046875, 899.6031494140625, 833.7527465820312, 721.6654052734375, 102.03569030761719, 1760.5302734375, 616.6058349609375, -212.85606384277344, 577.2044677734375, 174.56048583984375, -194.17962646484375, 290.5355224609375, 917.1239013671875, -3.297374725341797, 732.9163208007812, 276.6170654296875, 1113.3475341796875, 942.4512939453125, 128.3515625, 744.2538452148438, -1694.0140380859375, 528.2998046875, -83.49336242675781, 182.3536376953125, -1.0580711364746094, 833.344970703125, 530.8150634765625, 402.77227783203125, 900.5108032226562, 645.6500854492188, 1213.9813232421875, 312.33245849609375, 1079.2467041015625, -180.226806640625, -1258.6617431640625, 1686.938720703125, -502.07440185546875, 1312.3751220703125, 44.010684967041016, 788.8077392578125, 80.97406768798828, -617.7642822265625, 782.8731689453125, -438.9033508300781, 462.86572265625, -355.76739501953125, 80.71626281738281, 526.3032836914062, 374.97857666015625, 399.0061340332031, 298.1989440917969, 554.37548828125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000563.npy"}
{"epoch": 0.8267254038179148, "step": 564, "batch_size": 64, "mean": 400.9611511230469, "std": 538.8273315429688, "min": -664.4231567382812, "p10": -198.73532714843748, "median": 328.3221435546875, "p90": 1203.605187988282, "max": 2158.0546875, "pos_frac": 0.8125, "sample": [402.3485107421875, 460.2082214355469, 765.8179931640625, 154.54734802246094, 645.080810546875, 640.1814575195312, 661.552978515625, -301.9728088378906, 281.7149353027344, -494.9927062988281, 486.6934509277344, 707.3245239257812, -337.63482666015625, 539.9932861328125, 1024.950439453125, 32.875579833984375, -132.47279357910156, 1386.189697265625, 1280.1715087890625, 475.85345458984375, 304.4878234863281, 305.41448974609375, 100.97209167480469, -165.63644409179688, -419.19793701171875, 328.4290466308594, -212.92056274414062, 149.52896118164062, 19.4140625, -664.4231567382812, 341.46844482421875, 1700.7847900390625, 717.5944213867188, -115.45623016357422, 36.240699768066406, 417.99908447265625, 86.03761291503906, 102.46334075927734, 510.976318359375, -34.662288665771484, 596.4434814453125, 2158.0546875, 66.169921875, 605.9697265625, 702.9505615234375, 691.2872314453125, 1369.388671875, 748.6394653320312, 1405.8388671875, 370.92578125, 5.253898620605469, 647.58642578125, 47.690895080566406, 67.37186431884766, 117.10359191894531, 275.82171630859375, 164.34226989746094, 947.1881103515625, 328.2152404785156, -71.98121643066406, 1330.11328125, -246.2196044921875, 928.4835205078125, 216.92977905273438], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000564.npy"}
{"epoch": 0.8281938325991189, "step": 565, "batch_size": 64, "mean": 417.05029296875, "std": 741.8084106445312, "min": -1240.4837646484375, "p10": -438.71748046875, "median": 268.48262786865234, "p90": 1262.0260986328128, "max": 2502.189208984375, "pos_frac": 0.671875, "sample": [-11.985980987548828, 1153.4290771484375, 1416.9482421875, -1116.38134765625, 229.13818359375, 900.7990112304688, 2004.26904296875, -126.97926330566406, -158.8705596923828, -441.9609375, 583.3919677734375, 965.8363647460938, -264.5094299316406, -1240.4837646484375, 243.65643310546875, -503.93115234375, -486.0464172363281, -35.06304931640625, -90.95773315429688, 485.9358215332031, 1001.5013427734375, 168.81149291992188, 170.18997192382812, 1557.2427978515625, -431.1494140625, -33.39366912841797, 157.01220703125, 311.6768798828125, -643.980224609375, 1018.3701782226562, 604.9764404296875, 241.49806213378906, 621.4328002929688, 780.1729736328125, 786.8259887695312, -3.3559112548828125, 943.8981323242188, 532.4514770507812, -261.4919738769531, -50.81468963623047, -69.18487548828125, 925.7744140625, 720.8504028320312, 1852.8502197265625, 957.1480102539062, 2502.189208984375, 563.505859375, 2290.748779296875, 1157.3367919921875, -534.5517578125, -196.886962890625, 11.357803344726562, 879.2127685546875, 507.9445495605469, 936.5079345703125, 288.9182434082031, 1142.31005859375, 282.0052490234375, 76.57379150390625, 41.01209259033203, -243.9666748046875, 254.9600067138672, 59.60173034667969, 1306.8929443359375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000565.npy"}
{"epoch": 0.8296622613803231, "step": 566, "batch_size": 64, "mean": 462.1506042480469, "std": 744.295166015625, "min": -1581.2305908203125, "p10": -445.23589477539053, "median": 532.328857421875, "p90": 1467.4188720703125, "max": 2128.1923828125, "pos_frac": 0.71875, "sample": [-207.97637939453125, -743.8797607421875, 649.538818359375, 1884.3199462890625, 440.0975646972656, -116.38088989257812, 826.5833740234375, 174.20468139648438, -973.357177734375, 527.6597290039062, 605.4078369140625, -199.7843475341797, -23.9930419921875, 665.1713256835938, 800.4625854492188, 414.84112548828125, 638.2069091796875, 118.26341247558594, 16.965396881103516, -1581.2305908203125, 995.5133056640625, 696.8146362304688, 514.1594848632812, 15.93218994140625, 1497.2052001953125, -473.1285705566406, 536.9979858398438, 1116.558349609375, -156.0922088623047, 1099.4033203125, 875.1508178710938, 1066.5225830078125, 763.0645751953125, 98.67362976074219, 1014.355224609375, -284.0871887207031, -144.96200561523438, 1812.5169677734375, 928.9446411132812, -122.47669982910156, -499.5106201171875, 784.0633544921875, 539.7438354492188, 1471.442138671875, 1348.512939453125, 1682.6614990234375, 2128.1923828125, 628.5081787109375, -780.9715576171875, 417.8411865234375, -689.8449096679688, 1676.043212890625, 758.9840087890625, 1458.03125, -55.134178161621094, -380.1529846191406, 1071.3472900390625, 387.67535400390625, 1056.1474609375, -369.7502136230469, 0.9664993286132812, 11.7857666015625, 675.0433349609375, 489.8260803222656], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000566.npy"}
{"epoch": 0.8311306901615272, "step": 567, "batch_size": 64, "mean": 359.1352844238281, "std": 479.75341796875, "min": -1103.66259765625, "p10": -108.0958312988281, "median": 306.6234893798828, "p90": 976.6223754882814, "max": 1750.3419189453125, "pos_frac": 0.828125, "sample": [160.7542724609375, -2.586221694946289, 1180.623046875, 644.993408203125, 521.6952514648438, 698.6298828125, 154.26116943359375, -85.00711822509766, 989.7264404296875, 170.44168090820312, 953.8958740234375, -85.16033172607422, 1182.667236328125, 305.426025390625, 139.29916381835938, 265.34100341796875, 1212.1943359375, 649.2794799804688, 541.7566528320312, 69.55827331542969, 522.7930297851562, 176.26181030273438, -293.5516662597656, 540.5571899414062, 313.155517578125, 621.7682495117188, 664.0026245117188, 1476.84033203125, -146.88487243652344, 673.3875732421875, 76.08647918701172, 314.91632080078125, 289.14544677734375, -1103.66259765625, 190.711181640625, -39.89497756958008, 91.40628051757812, 758.9268798828125, 1750.3419189453125, 812.9856567382812, 306.4877624511719, -800.6920776367188, 306.75921630859375, 152.16317749023438, 202.7521514892578, 921.7824096679688, 314.96588134765625, 256.09906005859375, 372.6365966796875, 411.3935241699219, 46.13438415527344, 986.3623046875, 58.40782928466797, 465.3583984375, -156.48561096191406, 367.8199462890625, 83.92469024658203, 280.565185546875, -117.92533111572266, 209.93914794921875, 392.23272705078125, 524.4092407226562, -478.21661376953125, 520.70263671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000567.npy"}
{"epoch": 0.8325991189427313, "step": 568, "batch_size": 64, "mean": 470.4223327636719, "std": 534.87841796875, "min": -803.6919555664062, "p10": -151.63434906005853, "median": 427.8117980957031, "p90": 1081.443591308594, "max": 1850.8675537109375, "pos_frac": 0.875, "sample": [655.5265502929688, -710.7163696289062, 1140.514404296875, 822.5836791992188, 454.3045654296875, 453.2232971191406, 688.3563842773438, 751.9739990234375, 402.4002990722656, -803.6919555664062, -263.048583984375, -77.155517578125, 558.287841796875, 1775.6971435546875, 185.012939453125, 467.6067810058594, 222.53387451171875, 138.3096923828125, 218.2411346435547, -293.1424560546875, 560.4814453125, 336.7601623535156, 1001.9515380859375, 1034.6131591796875, 456.0260314941406, 304.5458679199219, 115.69124603271484, 1850.8675537109375, 264.5400085449219, 478.2839050292969, 991.3917236328125, 812.4019165039062, 1041.8082275390625, 686.6024780273438, 1510.0562744140625, 373.6884765625, 202.56272888183594, 645.5267944335938, 241.86231994628906, 944.1525268554688, 960.1099243164062, 84.8516845703125, 812.3406372070312, 856.8864135742188, 13.501815795898438, 303.13958740234375, 83.83462524414062, -183.55384826660156, 346.7334899902344, 1098.43017578125, 85.7702865600586, 1325.9884033203125, 339.13385009765625, -673.7085571289062, 227.87954711914062, -421.306640625, 1121.8505859375, 1039.1324462890625, 211.39413452148438, 628.6270751953125, 205.69390869140625, 731.047607421875, 162.1502685546875, 106.46946716308594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000568.npy"}
{"epoch": 0.8340675477239354, "step": 569, "batch_size": 64, "mean": 389.440185546875, "std": 688.5137329101562, "min": -1775.7958984375, "p10": -314.9565216064453, "median": 324.4357452392578, "p90": 1235.848681640625, "max": 2303.76318359375, "pos_frac": 0.78125, "sample": [-729.0148315429688, 89.14273071289062, 1645.673828125, 179.85708618164062, 654.490234375, -250.4409637451172, 718.5188598632812, 85.25051879882812, -517.5682373046875, 140.11058044433594, 684.5489501953125, 1246.336669921875, 499.06689453125, 655.6842041015625, 355.4240417480469, -533.0377197265625, 227.1081085205078, 935.7296752929688, -257.408203125, -27.884302139282227, -1266.654541015625, 1671.429931640625, 66.72628784179688, -253.52532958984375, 27.876192092895508, 659.9873657226562, 1087.8779296875, 52.05735778808594, 45.888336181640625, 75.8536376953125, 1394.814697265625, 862.934326171875, 744.2266845703125, -13.6785888671875, 1318.5469970703125, 445.2736511230469, 106.51434326171875, 378.7586975097656, 736.1809692382812, -1775.7958984375, 574.3563232421875, 147.13279724121094, -5.201478958129883, 1173.1302490234375, 815.93798828125, 137.36427307128906, 804.2041625976562, 181.1231689453125, -339.6200866699219, 736.759033203125, -403.6480712890625, 1729.9742431640625, 40.43761444091797, 513.421875, 1016.8970336914062, -213.8016357421875, 123.11264038085938, 627.314208984375, 293.44744873046875, 110.59112548828125, 561.6897583007812, 617.5277099609375, 1211.376708984375, 2303.76318359375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000569.npy"}
{"epoch": 0.8355359765051396, "step": 570, "batch_size": 64, "mean": 363.46832275390625, "std": 581.173095703125, "min": -1041.3634033203125, "p10": -228.9683898925781, "median": 298.0047607421875, "p90": 1160.9636108398443, "max": 2404.17041015625, "pos_frac": 0.78125, "sample": [389.84906005859375, 1031.3997802734375, 473.8638916015625, 1497.5262451171875, 2404.17041015625, 176.1159210205078, 735.3629760742188, 1423.58935546875, 1440.0845947265625, 366.82135009765625, 380.0599670410156, 919.7982177734375, 277.0450134277344, 506.156982421875, 54.971519470214844, 408.78753662109375, 959.0234375, 396.12548828125, 691.6972045898438, -264.1019287109375, 768.4258422851562, -218.01516723632812, -542.7872314453125, -764.69677734375, 145.8594207763672, 841.045654296875, -559.9962768554688, -233.66262817382812, 297.46685791015625, 382.92529296875, 381.9747009277344, 248.61744689941406, -184.65533447265625, -455.47845458984375, 215.43756103515625, 298.54266357421875, 211.542724609375, 431.35235595703125, 87.53917694091797, 1231.7791748046875, 472.1627197265625, -198.57965087890625, 225.87252807617188, 145.71673583984375, 1216.490966796875, 270.97100830078125, 220.4626007080078, 688.1923217773438, 1030.1600341796875, 750.4169921875, 27.895427703857422, 178.70608520507812, 567.0384521484375, 388.22900390625, -77.15387725830078, -1041.3634033203125, 308.3909912109375, 92.64765930175781, 159.9065704345703, -111.71583557128906, -184.8724365234375, 1273.3621826171875, 186.12832641601562, -178.6569366455078], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000570.npy"}
{"epoch": 0.8370044052863436, "step": 571, "batch_size": 64, "mean": 421.01739501953125, "std": 659.576416015625, "min": -1402.500244140625, "p10": -396.5351318359374, "median": 417.294921875, "p90": 1276.5049926757813, "max": 1990.3077392578125, "pos_frac": 0.765625, "sample": [583.04248046875, 795.7064819335938, 178.72047424316406, 847.5, -206.65249633789062, 451.9015197753906, 59.47788619995117, 1003.3550415039062, 447.6898193359375, 1177.219970703125, 1779.453125, -419.4925842285156, -269.4778747558594, 1703.6817626953125, 166.6269989013672, 496.55426025390625, 806.8865966796875, 746.3033447265625, 500.1064147949219, 839.4586791992188, 1990.3077392578125, 1692.00439453125, 688.8385620117188, 1067.3291015625, -342.9677429199219, -52.6656494140625, 720.6669921875, 355.97027587890625, 1301.9461669921875, 1140.121826171875, 253.01551818847656, -79.8292236328125, 308.7198791503906, 1602.82958984375, 804.779296875, -34.9440803527832, -609.483642578125, 145.42697143554688, 1285.7392578125, 562.1136474609375, 824.1159057617188, -497.8370666503906, 1254.9583740234375, 529.1983032226562, 386.9000244140625, 87.53607940673828, 631.1385498046875, 154.2777862548828, 561.853759765625, 10.638229370117188, -595.75, -1402.500244140625, 280.41162109375, 74.49215698242188, -715.9851684570312, 315.75048828125, 281.36346435546875, 515.433349609375, -727.7559204101562, 64.01908874511719, -212.73660278320312, 538.397705078125, -7.472747802734375, 106.683837890625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000571.npy"}
{"epoch": 0.8384728340675477, "step": 572, "batch_size": 64, "mean": 457.11859130859375, "std": 744.5606689453125, "min": -1297.8472900390625, "p10": -302.02769165039064, "median": 388.17840576171875, "p90": 1399.6583618164068, "max": 2968.98583984375, "pos_frac": 0.75, "sample": [-288.5969543457031, -137.35902404785156, -357.63250732421875, 485.95233154296875, 1849.8109130859375, 16.24602508544922, 58.357452392578125, 23.690711975097656, -32.49099349975586, 152.81668090820312, -127.6075210571289, 1246.2646484375, 987.6956787109375, 418.8699951171875, 601.3833618164062, 1020.2982788085938, 961.96533203125, 1273.224365234375, 1453.8443603515625, 624.1834716796875, 261.58062744140625, 702.2864990234375, 1.3985710144042969, 1064.9241943359375, -1019.1361694335938, 111.2131118774414, 366.6829833984375, -301.759765625, -228.56375122070312, 822.6744995117188, -417.9906005859375, 516.0314331054688, 448.1708068847656, 79.22244262695312, 256.0980529785156, 2968.98583984375, 100.95372772216797, 393.8904724121094, -269.1536560058594, -568.7659301757812, 1666.1348876953125, 139.9116973876953, -302.14251708984375, 609.0902099609375, -231.6531982421875, -341.1861267089844, 1188.3175048828125, 786.6237182617188, 591.6368408203125, -12.100051879882812, 910.469970703125, 273.27105712890625, 382.4663391113281, 1951.6798095703125, 234.52468872070312, 395.8879089355469, 666.3350830078125, 2020.19091796875, 4.61285400390625, -1297.8472900390625, 631.5393676757812, 1254.829345703125, 1586.3482666015625, 626.9876098632812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000572.npy"}
{"epoch": 0.8399412628487518, "step": 573, "batch_size": 64, "mean": 390.7725830078125, "std": 555.318603515625, "min": -642.6961059570312, "p10": -147.2873039245605, "median": 263.1529998779297, "p90": 1006.9551513671879, "max": 2464.382568359375, "pos_frac": 0.796875, "sample": [640.8399658203125, -76.28521728515625, 43.138694763183594, -28.741943359375, -24.723251342773438, 2464.382568359375, 847.6109008789062, -46.04711151123047, 124.29334259033203, 318.02789306640625, 498.4455871582031, 179.30958557128906, -642.6961059570312, 2121.231201171875, 391.791015625, 401.322509765625, 1687.114990234375, -449.89739990234375, 9.336799621582031, -173.09884643554688, -444.7493896484375, 1087.6905517578125, 673.6903076171875, 580.4810180664062, 64.41687774658203, 264.65911865234375, 778.1234130859375, 1076.1748046875, 155.34063720703125, 1211.93017578125, -352.24169921875, 850.4440307617188, 232.90760803222656, 511.4510498046875, 626.77734375, -326.4677429199219, 292.753173828125, 93.52471923828125, 751.3392333984375, 20.43402862548828, 884.440185546875, 170.65768432617188, 192.76641845703125, -87.06037139892578, 372.60943603515625, 86.70396423339844, 249.68536376953125, 125.20658874511719, 913.19189453125, -232.64630126953125, 506.57269287109375, 261.6468811035156, 170.40924072265625, 584.3199462890625, 364.8905029296875, 107.54486846923828, -83.8934555053711, 794.0352783203125, 536.5972900390625, 620.6882934570312, 151.2032470703125, 717.2717895507812, 1047.139404296875, 121.43122863769531], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000573.npy"}
{"epoch": 0.8414096916299559, "step": 574, "batch_size": 64, "mean": 427.0505676269531, "std": 566.963623046875, "min": -892.2723999023438, "p10": -309.8072052001953, "median": 402.9277648925781, "p90": 1275.6574340820314, "max": 1880.0933837890625, "pos_frac": 0.8125, "sample": [355.2391052246094, 728.73828125, -168.51058959960938, 10.441291809082031, 1215.8309326171875, -19.422521591186523, 121.61065673828125, 1301.29736328125, 1035.293701171875, 734.59375, 126.33428192138672, 1584.8482666015625, 1009.6275634765625, 825.6605834960938, 78.59994506835938, 718.60888671875, 91.14153289794922, 414.47125244140625, -580.8832397460938, 1880.0933837890625, 572.26708984375, 697.8229370117188, 437.62310791015625, 623.7376708984375, 239.20050048828125, 708.326416015625, 400.7135314941406, -387.0113830566406, 67.33055114746094, -282.10809326171875, 158.61666870117188, -413.8639221191406, 184.84829711914062, 1427.89892578125, -128.76153564453125, 741.2396240234375, -370.1681823730469, 857.66162109375, 225.30494689941406, 469.0331726074219, 934.7960815429688, -336.12860107421875, 746.4147338867188, 758.1399536132812, 564.462158203125, 235.39541625976562, 444.30120849609375, 83.60850524902344, -892.2723999023438, -321.6782531738281, 405.1419982910156, 91.25457000732422, -103.68025207519531, 1436.259033203125, 51.684906005859375, 627.02734375, 357.7769470214844, 1353.7259521484375, 199.7061309814453, 601.5166625976562, 128.0063018798828, 1618.6357421875, 625.6400756835938, 28.176340103149414], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000574.npy"}
{"epoch": 0.8428781204111601, "step": 575, "batch_size": 64, "mean": 388.8648681640625, "std": 663.2891845703125, "min": -1284.2607421875, "p10": -264.4933624267578, "median": 421.57025146484375, "p90": 1247.1335083007814, "max": 2113.44384765625, "pos_frac": 0.765625, "sample": [1257.4647216796875, 233.4344024658203, 828.1021118164062, 488.98101806640625, 570.7640991210938, -873.2000122070312, 587.1617431640625, 506.6446228027344, 225.74569702148438, 156.48963928222656, 76.74378204345703, -37.372962951660156, 542.322509765625, 921.1671752929688, 544.8482055664062, 257.8067321777344, -127.50762939453125, 755.1495361328125, 297.1234436035156, 300.68585205078125, 357.1366271972656, 76.0556640625, 1278.4058837890625, 654.2326049804688, 566.3456420898438, 337.6831359863281, -1196.380615234375, 282.6517639160156, -655.6621704101562, 491.35772705078125, -903.6231079101562, 1223.02734375, 893.428955078125, 1310.770263671875, 264.2127685546875, -148.15306091308594, -221.71685791015625, -110.63191986083984, 1261.4161376953125, 1014.3516235351562, -268.302001953125, 218.25238037109375, -255.60653686523438, 1019.4683837890625, 1106.8990478515625, -856.4237670898438, 515.4884643554688, 453.0899658203125, 635.3348388671875, 2113.44384765625, 394.0679626464844, 920.691650390625, 449.0725402832031, -36.0247802734375, 1456.2208251953125, 151.21377563476562, 591.2335205078125, -1284.2607421875, 2061.7412109375, 55.24750900268555, -120.29375457763672, 599.31787109375, 134.4866485595703, 545.52880859375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000575.npy"}
{"epoch": 0.8443465491923642, "step": 576, "batch_size": 64, "mean": 292.5570983886719, "std": 561.3081665039062, "min": -954.6218872070312, "p10": -379.76430053710936, "median": 189.5432357788086, "p90": 942.3990478515627, "max": 1623.4512939453125, "pos_frac": 0.71875, "sample": [256.4178466796875, 967.8253173828125, -150.70033264160156, -12.78774642944336, -359.93243408203125, -182.10768127441406, -159.71250915527344, 1623.4512939453125, 895.9277954101562, 107.31110382080078, 297.2474670410156, 1473.29541015625, 126.9752197265625, 127.99598693847656, 760.3950805664062, 481.75030517578125, 524.2242431640625, -827.2288208007812, 270.3153991699219, 589.3046875, 515.5906982421875, 962.1592407226562, 557.3224487304688, 793.776123046875, -32.6127815246582, 1465.593505859375, -129.9652557373047, -388.263671875, -231.62710571289062, 772.9314575195312, -204.4126434326172, 185.73219299316406, 94.80618286132812, 37.307472229003906, 896.2919311523438, 534.9321899414062, 1601.7684326171875, 303.1118469238281, 109.17540740966797, 761.58203125, 61.421783447265625, 870.80126953125, 172.51937866210938, 347.71697998046875, 91.49715423583984, -855.98046875, -57.40404510498047, 596.1170654296875, -629.0078125, 629.633544921875, 367.8995361328125, 1027.4755859375, 873.9777221679688, 841.1671752929688, -954.6218872070312, -400.5388488769531, 151.796875, 218.18643188476562, 193.35427856445312, 158.619384765625, -399.04742431640625, 129.54989624023438, -304.0126953125, 177.36520385742188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000576.npy"}
{"epoch": 0.8458149779735683, "step": 577, "batch_size": 64, "mean": 394.10260009765625, "std": 606.7445678710938, "min": -1336.0460205078125, "p10": -86.51118240356443, "median": 348.35765075683594, "p90": 1171.2167358398438, "max": 2289.764892578125, "pos_frac": 0.84375, "sample": [-1336.0460205078125, 2289.764892578125, 1148.2108154296875, 1368.04296875, 153.2546844482422, 20.046890258789062, 306.18609619140625, 124.84751892089844, 244.8133544921875, -54.302215576171875, 656.2528076171875, 361.6300964355469, 519.9843139648438, 436.6881103515625, 415.206298828125, 715.7554931640625, 1043.4979248046875, 1181.076416015625, -525.7962646484375, 567.6676635742188, 168.88340759277344, 131.20632934570312, -795.925537109375, 256.0699462890625, 336.40814208984375, 726.1246948242188, 124.82731628417969, 238.17210388183594, 443.92425537109375, 335.80938720703125, 169.86434936523438, 369.4172058105469, 269.0549621582031, -31.94426918029785, 1909.6400146484375, 49.827484130859375, 456.8765869140625, -635.6383056640625, 180.23280334472656, -337.49951171875, 367.1577453613281, 180.00247192382812, 336.4661865234375, 512.5234985351562, -65.11170196533203, 515.60302734375, 85.21299743652344, 598.4534301757812, 687.9263916015625, 996.9171142578125, 66.39915466308594, 368.3928527832031, 1422.3067626953125, -95.68238830566406, 1687.8570556640625, 498.49273681640625, 487.4769287109375, -638.333740234375, 13.159133911132812, 1498.9039306640625, 67.18785095214844, 895.4544677734375, 360.2491149902344, 373.43914794921875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000577.npy"}
{"epoch": 0.8472834067547724, "step": 578, "batch_size": 64, "mean": 431.7515563964844, "std": 537.4140014648438, "min": -852.9334716796875, "p10": -134.28537445068358, "median": 406.42726135253906, "p90": 1163.1210021972665, "max": 1855.7125244140625, "pos_frac": 0.796875, "sample": [1855.7125244140625, 132.56414794921875, 807.7503662109375, 107.9022216796875, 563.5010986328125, 354.049072265625, 1289.5260009765625, 544.554443359375, 197.78402709960938, -157.7766571044922, 126.12394714355469, 942.7681274414062, 461.5413818359375, 270.3416748046875, 107.73538208007812, -593.0806274414062, 61.48045349121094, 5.630928039550781, 1653.703369140625, 770.6696166992188, 740.778076171875, 431.636962890625, 965.2075805664062, -127.45542907714844, -53.55548858642578, 765.1222534179688, 833.5835571289062, -89.93628692626953, -307.1986999511719, 132.67596435546875, -27.385391235351562, -401.6031494140625, 405.06268310546875, 422.0982360839844, 171.55116271972656, 535.7014770507812, -83.43848419189453, 294.0546875, -852.9334716796875, -83.39562225341797, 528.319091796875, 704.141357421875, 520.0011596679688, 407.7918395996094, 1292.908935546875, 879.4912719726562, 307.1805114746094, 89.34408569335938, 818.4366455078125, 201.5849151611328, 778.8697509765625, 734.4359741210938, 76.19071960449219, 1247.9410400390625, 821.170166015625, 1596.6103515625, -137.21249389648438, 304.2369384765625, 1483.881103515625, 945.8939208984375, 494.3529052734375, -250.48077392578125, 88.55140686035156, 525.4071655273438], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000578.npy"}
{"epoch": 0.8487518355359766, "step": 579, "batch_size": 64, "mean": 266.0924072265625, "std": 572.3960571289062, "min": -1517.123291015625, "p10": -404.9144714355468, "median": 307.5412292480469, "p90": 954.7327148437502, "max": 1996.640625, "pos_frac": 0.6875, "sample": [22.281883239746094, -174.98057556152344, -1517.123291015625, 555.772705078125, -65.00680541992188, 126.43740844726562, 913.3977661132812, 345.0255432128906, 686.5286254882812, 393.1094055175781, -45.82154083251953, -149.7329559326172, -701.3006591796875, 341.11871337890625, 221.25588989257812, 611.0802001953125, 296.47918701171875, 517.8212280273438, 1090.9638671875, 206.50633239746094, 972.4476928710938, 92.67449951171875, 277.0473327636719, 471.037841796875, 19.795522689819336, 493.762451171875, 351.6718444824219, -52.129432678222656, 574.985595703125, 203.45753479003906, 508.44873046875, -454.81549072265625, 605.7400512695312, 631.7569580078125, 1996.640625, 1300.70361328125, 382.6904296875, -704.0189819335938, -42.91897964477539, 536.793701171875, 1168.7996826171875, -203.24105834960938, 701.0224609375, -288.478759765625, 1476.2928466796875, -0.5860157012939453, -50.48939895629883, -185.94032287597656, -588.0991821289062, 318.603271484375, 883.9525146484375, 378.2829895019531, -9.683303833007812, 552.2717895507812, 374.75177001953125, -730.40966796875, 1138.322021484375, 75.78704833984375, 258.28375244140625, -773.9640502929688, 136.47703552246094, -117.18762969970703, 320.0843505859375, 355.47637939453125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000579.npy"}
{"epoch": 0.8502202643171806, "step": 580, "batch_size": 64, "mean": 471.3471984863281, "std": 651.7671508789062, "min": -1155.16259765625, "p10": -104.73825759887694, "median": 329.16156005859375, "p90": 1170.1019653320313, "max": 2768.177734375, "pos_frac": 0.84375, "sample": [-119.13694763183594, 1014.1905517578125, 256.0238037109375, 556.5064086914062, -163.51226806640625, 210.0133056640625, 183.3927459716797, 1874.9783935546875, 420.5726623535156, 315.0924072265625, 411.0065612792969, -71.00372314453125, 2768.177734375, 271.7104187011719, 61.60026931762695, 664.5158081054688, 284.91888427734375, 138.2195281982422, 811.2881469726562, 450.3857421875, 253.68592834472656, 998.6485595703125, 966.8121948242188, 649.8553466796875, 531.283203125, 27.13562774658203, 1004.8101806640625, 1126.7528076171875, 566.953369140625, 140.09510803222656, 40.105796813964844, 152.43092346191406, 80.50325775146484, 93.1509780883789, 759.45458984375, 1192.638427734375, 38.02227783203125, -197.21487426757812, 751.0755615234375, 693.789306640625, 1796.93896484375, 1.5650768280029297, -1155.16259765625, 1025.8673095703125, 13.463066101074219, 610.619384765625, -561.885498046875, -107.66184997558594, 599.05810546875, 2554.748046875, 34.20072937011719, -44.380035400390625, 1441.802978515625, 146.34677124023438, 380.03521728515625, 397.5985412597656, -97.91654205322266, 317.462158203125, 64.02388763427734, -293.6964111328125, 388.80517578125, 915.9183959960938, 1188.68017578125, 340.8609619140625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000580.npy"}
{"epoch": 0.8516886930983847, "step": 581, "batch_size": 64, "mean": 340.0408020019531, "std": 599.7761840820312, "min": -881.2940063476562, "p10": -432.0906280517578, "median": 349.74864196777344, "p90": 966.6880126953125, "max": 2854.679931640625, "pos_frac": 0.75, "sample": [272.8341979980469, 113.71311950683594, 143.82809448242188, -93.23875427246094, 376.9039306640625, 420.0230407714844, -498.474365234375, 170.89691162109375, 950.6102294921875, -598.087890625, 669.2820434570312, 723.0391845703125, 506.19024658203125, 391.38641357421875, 693.74267578125, -434.2169494628906, 1354.359619140625, 819.6218872070312, 453.61737060546875, 1223.173583984375, -157.90708923339844, 589.0248413085938, 135.769287109375, 568.3308715820312, 655.3643798828125, 520.095947265625, 333.72607421875, 112.95874786376953, 100.622314453125, 973.5784912109375, -256.18701171875, 2854.679931640625, 16.65780258178711, 1421.141845703125, 254.6134033203125, 409.02972412109375, 841.2999877929688, -110.57865905761719, 76.52222442626953, 432.3551025390625, 331.05267333984375, 51.182952880859375, -84.2843017578125, -297.151123046875, 1107.2879638671875, 198.7377166748047, 604.4093627929688, 887.4456787109375, 1265.5997314453125, 455.55810546875, -556.1763916015625, -881.2940063476562, -14.902759552001953, -375.8116760253906, 679.189453125, 400.23614501953125, -501.44903564453125, -631.474853515625, 77.84295654296875, -427.12921142578125, 346.9315490722656, 821.7647705078125, 352.56573486328125, 522.1767578125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000581.npy"}
{"epoch": 0.8531571218795888, "step": 582, "batch_size": 64, "mean": 381.43505859375, "std": 542.4555053710938, "min": -1071.965576171875, "p10": -163.14920349121093, "median": 314.4577178955078, "p90": 1062.8468994140626, "max": 1574.609130859375, "pos_frac": 0.765625, "sample": [-1071.965576171875, 238.28195190429688, 1158.4583740234375, 336.15289306640625, 535.3880615234375, -157.06353759765625, 250.95755004882812, 513.4390258789062, 466.47119140625, -67.61602783203125, 323.09893798828125, 121.90975189208984, -56.78706359863281, 770.73876953125, 663.8209228515625, 1485.482421875, -49.23518371582031, 3.4885406494140625, -157.27012634277344, 1185.87060546875, 415.13116455078125, 45.24897766113281, 569.9398803710938, -842.592529296875, -214.16943359375, 802.9229736328125, 170.71890258789062, 1066.8753662109375, 1421.640869140625, 1574.609130859375, 892.54443359375, 387.2390441894531, 1053.4471435546875, -17.220924377441406, -357.6788330078125, 169.22854614257812, 73.9086685180664, 188.72950744628906, 624.65869140625, 628.0453491210938, 305.8164978027344, -210.5303497314453, 936.3672485351562, 978.6678466796875, 54.55690002441406, 994.0316162109375, -16.603919982910156, 732.5951538085938, -165.66880798339844, 268.05926513671875, 27.965534210205078, 797.8905029296875, 1051.10595703125, 373.90435791015625, -39.94820785522461, -884.9340209960938, 196.74942016601562, 282.579833984375, 797.4910278320312, 638.0292358398438, 1120.894775390625, 294.9571838378906, 665.950927734375, 65.0678482055664], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000582.npy"}
{"epoch": 0.8546255506607929, "step": 583, "batch_size": 64, "mean": 240.80844116210938, "std": 655.31689453125, "min": -1560.837646484375, "p10": -644.9275939941406, "median": 249.16763305664062, "p90": 924.8504821777344, "max": 2032.8511962890625, "pos_frac": 0.71875, "sample": [90.64285278320312, 263.547607421875, 377.3299865722656, -753.3883666992188, 929.7549438476562, 30.559829711914062, 871.5369873046875, 212.94117736816406, 2032.8511962890625, 169.94390869140625, -137.36761474609375, 15.401716232299805, -817.8560791015625, 303.28472900390625, 161.3843994140625, -311.33843994140625, 891.0978393554688, 833.233154296875, 319.5027160644531, 1052.380615234375, 431.64068603515625, 64.79537963867188, -605.8687133789062, -423.27349853515625, -102.16718292236328, -868.224609375, 368.16998291015625, 315.94219970703125, -519.07958984375, -711.8760375976562, -80.96580505371094, -130.85031127929688, 530.3767700195312, 517.3438720703125, 346.7352600097656, 1895.6473388671875, 438.17218017578125, 11.849380493164062, 899.56494140625, 240.42666625976562, 2.56060791015625, 358.00457763671875, 401.0076599121094, -97.83334350585938, -913.1019897460938, -1560.837646484375, 1581.3291015625, 425.2127685546875, -415.6784362792969, 776.1181030273438, 903.9766845703125, 1321.5908203125, 162.8948974609375, 536.859619140625, 913.40673828125, -661.6671142578125, 127.45735168457031, 506.78656005859375, 62.3143310546875, 275.7395935058594, 1231.610595703125, 70.94393920898438, -8.665214538574219, 257.9085998535156], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000583.npy"}
{"epoch": 0.856093979441997, "step": 584, "batch_size": 64, "mean": 472.8240966796875, "std": 579.3696899414062, "min": -664.25439453125, "p10": -120.79265441894526, "median": 348.1924591064453, "p90": 1220.9206176757814, "max": 2466.907470703125, "pos_frac": 0.828125, "sample": [569.5918579101562, 232.5821075439453, -206.1658172607422, 275.4007263183594, 31.6654052734375, -424.27099609375, 604.7492065429688, 453.91729736328125, 1027.74365234375, 51.8136100769043, 13.16595458984375, -141.25634765625, 105.47784423828125, 210.32449340820312, 337.6690979003906, 1086.5155029296875, -48.593204498291016, 1231.3096923828125, 866.3978271484375, -566.1328125, 263.3021545410156, 549.2446899414062, -48.04768371582031, 247.8057403564453, 360.71990966796875, 290.4017639160156, 1274.728515625, 148.25588989257812, 201.06387329101562, 249.0758056640625, 1001.0882568359375, 581.777587890625, 303.6675109863281, -664.25439453125, 610.7750244140625, 395.54248046875, 574.2132568359375, 457.8487243652344, 197.77377319335938, -73.04403686523438, 439.343505859375, -272.33917236328125, 358.7158203125, 1545.406005859375, 845.7857055664062, 307.4176940917969, 1079.4189453125, 1196.679443359375, -250.0032958984375, 2466.907470703125, 1749.5557861328125, 33.87724304199219, 1613.350830078125, 1185.6185302734375, 444.13006591796875, 1518.861572265625, 244.3829803466797, 664.4840698242188, -8.084617614746094, 294.73455810546875, 590.2599487304688, 379.1611328125, 1059.5078125, 139.7258758544922], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000584.npy"}
{"epoch": 0.8575624082232012, "step": 585, "batch_size": 64, "mean": 435.0365905761719, "std": 696.6397705078125, "min": -1651.6640625, "p10": -347.95515747070306, "median": 459.4757995605469, "p90": 1317.3388916015626, "max": 2465.5732421875, "pos_frac": 0.734375, "sample": [216.78073120117188, -145.2773895263672, 635.262939453125, 305.73931884765625, 487.02972412109375, 2465.5732421875, 949.061279296875, -480.82275390625, 364.0834045410156, 273.212158203125, 1024.66357421875, -3.8386688232421875, 600.207275390625, 545.5426025390625, -25.767189025878906, 1431.4356689453125, 1369.5052490234375, 589.746826171875, 1471.427490234375, 559.677978515625, 454.9775085449219, 1262.8226318359375, 243.80169677734375, 463.9740905761719, 1043.264404296875, -42.37751770019531, 664.351318359375, 1072.995361328125, 679.4810180664062, 153.1353759765625, 133.5979766845703, 740.934814453125, 1340.7030029296875, 9.0128173828125, 1088.5709228515625, 210.81716918945312, 1000.9940185546875, -264.5937194824219, -53.55674743652344, 804.3602905273438, 356.0915832519531, -1651.6640625, 921.3739013671875, -796.2978515625, 1019.3485107421875, 483.30169677734375, 500.13897705078125, -435.5294494628906, 842.2791137695312, -248.47628784179688, -50.8687744140625, -1104.3994140625, -45.04652786254883, -383.6814880371094, 40.31166076660156, 1587.162841796875, 476.51300048828125, -543.4384155273438, 808.9507446289062, -33.13677978515625, 176.45359802246094, 152.01023864746094, 138.64967346191406, 1991.784423828125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000585.npy"}
{"epoch": 0.8590308370044053, "step": 586, "batch_size": 64, "mean": 296.1407470703125, "std": 575.6981811523438, "min": -1147.5714111328125, "p10": -362.65656738281245, "median": 321.6334533691406, "p90": 968.9786743164066, "max": 1670.564208984375, "pos_frac": 0.703125, "sample": [1085.91552734375, -228.70098876953125, -277.420166015625, 524.026123046875, 611.1558837890625, 364.951904296875, -271.53057861328125, 344.8194580078125, 294.8876647949219, 92.9730453491211, -524.278564453125, 287.1838073730469, 821.5725708007812, 1420.1337890625, -47.113548278808594, -135.68446350097656, 523.5476684570312, -769.4827880859375, 259.0356750488281, 243.8772430419922, -127.2287826538086, 123.3327865600586, 1073.043212890625, 753.5509643554688, 838.1185913085938, -60.29557800292969, 662.3953857421875, -237.91790771484375, 13.231986999511719, 413.1121520996094, 738.1234741210938, -128.86688232421875, 298.44744873046875, -1147.5714111328125, 600.0914306640625, 690.094970703125, 1518.1553955078125, -369.90106201171875, 518.9641723632812, -136.19586181640625, 877.7471923828125, -436.8719482421875, 534.4965209960938, 441.3619384765625, -1069.570556640625, -643.1375732421875, 359.2327575683594, 382.80126953125, 160.03404235839844, 493.5736083984375, -345.75274658203125, 358.35784912109375, 1008.077880859375, 1559.3797607421875, 847.6115112304688, 225.91673278808594, 438.0128173828125, 1670.564208984375, 117.69219207763672, -69.81568145751953, 147.85205078125, 711.5829467773438, 419.2437744140625, 112.0640869140625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000586.npy"}
{"epoch": 0.8604992657856094, "step": 587, "batch_size": 64, "mean": 404.605224609375, "std": 558.965576171875, "min": -1332.746826171875, "p10": -174.17227630615233, "median": 363.24623107910156, "p90": 1184.9624389648438, "max": 1871.1966552734375, "pos_frac": 0.78125, "sample": [560.4794311523438, 1140.5084228515625, 410.37457275390625, 76.91128540039062, 280.6486511230469, 436.6159973144531, 662.4345703125, -75.46318054199219, -0.46677398681640625, 1452.2685546875, -351.4755859375, -122.36642456054688, 1871.1966552734375, 369.4992980957031, 241.49270629882812, 520.9349975585938, -29.02142333984375, 503.97271728515625, 390.3877868652344, 402.1949768066406, -158.6305694580078, 575.50927734375, 1309.92626953125, 19.203819274902344, 356.9931640625, 1135.211181640625, -255.7151336669922, 953.8455200195312, 656.8216552734375, 9.504135131835938, 204.52435302734375, 57.777854919433594, -554.4934692382812, 63.04949951171875, 639.6461791992188, 144.17115783691406, 275.50067138671875, -1332.746826171875, 1068.4232177734375, 108.58613586425781, -375.3778076171875, 748.8046875, 174.3180694580078, 794.0379638671875, -102.94706726074219, 1204.01416015625, 697.7164306640625, -188.93821716308594, 254.269775390625, 312.1337585449219, 456.8265686035156, 1617.694091796875, 375.87841796875, 662.5245361328125, 531.4833984375, 572.56884765625, 943.6781005859375, -180.8330078125, 297.24981689453125, 17.262187957763672, 323.2646484375, -33.21612548828125, 1457.416015625, 1316.6707763671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000587.npy"}
{"epoch": 0.8619676945668135, "step": 588, "batch_size": 64, "mean": 346.3304443359375, "std": 510.37353515625, "min": -916.4860229492188, "p10": -301.40297241210936, "median": 333.31455993652344, "p90": 1010.6517028808595, "max": 1404.3543701171875, "pos_frac": 0.765625, "sample": [195.54164123535156, 568.1924438476562, -266.3677673339844, 127.31326293945312, 50.89622497558594, 651.58837890625, 634.9326171875, 32.86944580078125, 973.7803344726562, 722.3724365234375, 87.23779296875, -505.8289794921875, -57.61541748046875, 459.5374450683594, -316.4180603027344, -374.9313049316406, 847.879638671875, 593.71337890625, 181.6685791015625, 270.9720764160156, 608.0623779296875, -164.24087524414062, -141.0062713623047, 1147.314697265625, 196.27174377441406, 453.60491943359375, 1314.5699462890625, -117.021240234375, -469.78570556640625, 62.59740447998047, 324.5827331542969, 55.89543151855469, 283.167236328125, 1021.603515625, 908.4135131835938, 502.9371337890625, 165.15016174316406, -252.30929565429688, 1244.2008056640625, 985.0974731445312, 1234.8934326171875, 769.4967651367188, 45.384796142578125, -464.40118408203125, 727.7445678710938, 431.7970275878906, 226.6535186767578, 964.6939697265625, -916.4860229492188, 378.7578125, -17.622997283935547, 1404.3543701171875, 1123.1220703125, 487.471435546875, 417.25927734375, 601.4285888671875, 672.9856567382812, 358.8174743652344, -151.73118591308594, 159.7716064453125, 49.258384704589844, 342.04638671875, 869.005615234375, -555.9913330078125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000588.npy"}
{"epoch": 0.8634361233480177, "step": 589, "batch_size": 64, "mean": 448.1602783203125, "std": 563.23779296875, "min": -788.15576171875, "p10": -233.53995208740224, "median": 394.42601013183594, "p90": 1299.4166259765627, "max": 1941.76025390625, "pos_frac": 0.84375, "sample": [1497.7418212890625, 346.214599609375, 395.403076171875, 7.673320770263672, 772.636962890625, 158.73277282714844, -134.16786193847656, 626.8970947265625, 341.28314208984375, -335.42132568359375, -27.969497680664062, 393.4489440917969, 84.1283187866211, 229.64271545410156, 571.731201171875, 1322.5992431640625, 1941.76025390625, 27.74005889892578, 485.17327880859375, 220.31234741210938, 526.2593383789062, 73.997802734375, 165.8225860595703, 1440.7181396484375, 18.2784423828125, 524.015380859375, 349.7916564941406, 627.9129638671875, 1875.6126708984375, -114.2745361328125, 578.49169921875, 623.9226684570312, 858.846435546875, 515.6638793945312, 1245.3238525390625, -291.7863464355469, -276.12799072265625, 137.4609375, 727.0272216796875, 115.44499206542969, 468.0690002441406, -525.7182006835938, 152.30606079101562, 757.62548828125, 579.3568115234375, 626.381103515625, 1019.74853515625, 304.21087646484375, 537.893798828125, 1411.250732421875, 563.811767578125, 251.52450561523438, 370.002685546875, -788.15576171875, 263.78375244140625, -613.9219970703125, 675.6583251953125, 1423.2044677734375, 32.45356750488281, 294.5532531738281, 1127.6446533203125, 614.3056640625, -518.3065795898438, 1006.6134643554688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000589.npy"}
{"epoch": 0.8649045521292217, "step": 590, "batch_size": 64, "mean": 444.70721435546875, "std": 548.3279418945312, "min": -442.848388671875, "p10": -204.31859741210937, "median": 347.1378479003906, "p90": 1155.741833496094, "max": 1957.569091796875, "pos_frac": 0.75, "sample": [544.1658325195312, 1501.809814453125, 815.7150268554688, 286.46710205078125, 934.4965209960938, -329.2567138671875, 169.59939575195312, 1858.0660400390625, 850.5082397460938, -148.30642700195312, 542.4622192382812, -212.6767578125, 388.9224853515625, 1354.72314453125, -193.85845947265625, -127.04231262207031, -182.50250244140625, 144.7127685546875, 711.62646484375, 213.93768310546875, 97.4075927734375, -341.61114501953125, -7.741912841796875, 305.0625, -139.47247314453125, 211.29437255859375, 769.0935668945312, -208.801513671875, 1319.68212890625, -121.50018310546875, 962.6375732421875, 459.9764404296875, 33.866111755371094, 1376.57861328125, 266.9408874511719, 396.2055969238281, 355.4886474609375, 635.14794921875, 1109.3341064453125, 559.5311889648438, 508.5501708984375, -234.0169677734375, -102.599365234375, -442.848388671875, 338.78704833984375, 65.31724548339844, 275.2501220703125, 889.6747436523438, 254.1676483154297, -315.14105224609375, 462.6216735839844, 1175.630859375, 280.39239501953125, -55.32518768310547, 653.5196533203125, 672.6248168945312, 868.4976806640625, 1957.569091796875, 195.56344604492188, 1040.085205078125, 1069.8076171875, 885.013671875, 138.3460235595703, 717.0843505859375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000590.npy"}
{"epoch": 0.8663729809104258, "step": 591, "batch_size": 64, "mean": 404.666748046875, "std": 561.13037109375, "min": -1182.3060302734375, "p10": -339.85819702148433, "median": 386.64830017089844, "p90": 1085.579052734375, "max": 1615.801025390625, "pos_frac": 0.84375, "sample": [19.57868194580078, 1183.6474609375, 1251.1954345703125, 640.4220581054688, 1041.2530517578125, -107.23118591308594, 6.773298263549805, -1182.3060302734375, 621.8482055664062, 1585.12109375, 943.0369873046875, 144.69439697265625, 604.5032348632812, 313.3900146484375, 57.87908935546875, 1615.801025390625, 735.9481201171875, 453.1645202636719, 126.7911376953125, 256.94757080078125, -22.172927856445312, 1078.8984375, 201.21778869628906, 74.42662811279297, 689.6836547851562, 231.66348266601562, -696.1336669921875, 587.211181640625, 483.85772705078125, 473.4132080078125, -314.61773681640625, 123.25677490234375, 375.966064453125, -624.9935913085938, 979.8795776367188, 221.24058532714844, 166.329345703125, 123.8348388671875, 675.1403198242188, -350.675537109375, -384.687255859375, 838.2882690429688, 130.31101989746094, 397.3305358886719, -630.4778442382812, 997.958740234375, 803.20703125, 98.35552978515625, 641.387451171875, 1087.3822021484375, 76.9824447631836, 1081.3717041015625, 830.4227905273438, 123.6177978515625, 1303.948486328125, 504.7393798828125, 524.5408935546875, 1354.439453125, -511.8295593261719, 706.4503784179688, 162.3979949951172, 452.623779296875, 271.24102783203125, 248.78469848632812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000591.npy"}
{"epoch": 0.8678414096916299, "step": 592, "batch_size": 64, "mean": 364.5346374511719, "std": 701.0535278320312, "min": -1292.0467529296875, "p10": -411.5307952880859, "median": 363.8382110595703, "p90": 1225.8685302734375, "max": 1943.1712646484375, "pos_frac": 0.765625, "sample": [260.6043701171875, 1222.167724609375, -130.73684692382812, 426.6143798828125, -901.8477172851562, 172.04420471191406, 407.5011291503906, 1046.0675048828125, 1683.875244140625, 1291.5040283203125, -1021.2903442382812, 1943.1712646484375, 248.20571899414062, 888.6582641601562, -1015.3310546875, -406.5020446777344, -79.03276824951172, 234.70932006835938, 184.4305419921875, 1227.45458984375, 108.78577423095703, 1012.435302734375, 6.559120178222656, 1162.4405517578125, 358.1861877441406, 116.31068420410156, 187.68408203125, 992.3533325195312, 1523.26025390625, 599.3422241210938, 29.29714584350586, -219.7209014892578, 224.10726928710938, 715.2833251953125, 1083.9527587890625, 197.62216186523438, -369.92462158203125, 369.490234375, -413.68597412109375, 625.4269409179688, 475.3435974121094, -122.8313980102539, 738.16650390625, 1627.380859375, 395.9981689453125, -337.2889709472656, 438.6290588378906, -1292.0467529296875, 858.2437744140625, 352.2661437988281, 687.6036376953125, 143.1529083251953, 809.0447387695312, 407.6213073730469, -844.2892456054688, 872.4885864257812, 687.3125610351562, 153.22906494140625, 1264.285400390625, -331.3177185058594, -1253.1007080078125, 851.5352783203125, 718.9536743164062, 38.36298370361328], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000592.npy"}
{"epoch": 0.869309838472834, "step": 593, "batch_size": 64, "mean": 294.2417297363281, "std": 610.8946533203125, "min": -980.476806640625, "p10": -389.55068359374997, "median": 193.04216766357422, "p90": 1256.3693603515626, "max": 1775.17578125, "pos_frac": 0.640625, "sample": [-19.513931274414062, 824.4112548828125, -281.3191833496094, 92.7115478515625, -543.9664306640625, -502.44659423828125, 1520.0523681640625, -401.65234375, 1216.91796875, 551.6806640625, -134.3167724609375, 1273.277099609375, 1399.1448974609375, 1006.8096923828125, 6.093559265136719, -769.129150390625, 1321.0816650390625, 1402.126220703125, 726.9301147460938, 173.1206512451172, -361.3134765625, 143.15155029296875, 388.1007080078125, 75.8369369506836, 67.07878112792969, 1775.17578125, 532.9534301757812, 102.25748443603516, 338.8079833984375, -791.054443359375, 0.6956233978271484, 463.3591003417969, -303.9640197753906, 212.96368408203125, 668.4728393554688, 771.3321533203125, -116.92898559570312, 1296.60498046875, -326.01422119140625, 386.6839599609375, -808.2815551757812, 611.7677612304688, -330.4488220214844, -980.476806640625, -39.35284423828125, -13.3785400390625, -21.278295516967773, -6.177375793457031, 457.3934631347656, 825.2459106445312, 552.1432495117188, 334.0732727050781, 805.475830078125, 492.052734375, 419.35174560546875, 941.641357421875, -11.574737548828125, 43.63898468017578, -151.41879272460938, 609.8175659179688, -113.374755859375, 511.52215576171875, -172.6735076904297, 689.568603515625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000593.npy"}
{"epoch": 0.8707782672540382, "step": 594, "batch_size": 64, "mean": 362.5345153808594, "std": 588.5816650390625, "min": -803.3651123046875, "p10": -373.60186767578114, "median": 309.84735107421875, "p90": 1172.471533203125, "max": 1659.0762939453125, "pos_frac": 0.75, "sample": [950.3941040039062, -99.54483032226562, 104.50807189941406, 44.391143798828125, -53.07836151123047, -685.2322998046875, 621.7579956054688, 485.7221984863281, 135.06646728515625, 742.064208984375, 507.78253173828125, 361.858154296875, 1659.0762939453125, 657.8987426757812, 138.1966094970703, 874.1573486328125, 726.2088012695312, 256.6512451171875, 1014.3732299804688, -521.7591552734375, -549.9842529296875, 1418.144287109375, -755.4210815429688, 1624.26611328125, -175.3214874267578, 561.48828125, -165.21444702148438, 319.7033386230469, 438.1321105957031, -225.84573364257812, 29.624038696289062, 430.4786376953125, 1405.9603271484375, 536.4317626953125, 280.8233642578125, 606.0821533203125, 547.8671875, 28.021984100341797, 299.9913635253906, -639.697998046875, -54.91318130493164, 934.6318969726562, 181.86935424804688, 0.7637710571289062, 1181.5423583984375, 982.5670166015625, 802.0094604492188, 586.5689086914062, -411.67132568359375, 432.39886474609375, -803.3651123046875, 37.59928894042969, 252.33856201171875, -153.002197265625, 338.73968505859375, -38.827945709228516, 1151.3062744140625, 934.64599609375, 1500.879150390625, 241.38987731933594, 1318.274169921875, -284.77313232421875, 79.921875, 55.292991638183594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000594.npy"}
{"epoch": 0.8722466960352423, "step": 595, "batch_size": 64, "mean": 419.83013916015625, "std": 583.6467895507812, "min": -488.3409118652344, "p10": -162.39042205810546, "median": 309.8653564453125, "p90": 1030.1606567382812, "max": 2732.591064453125, "pos_frac": 0.8125, "sample": [860.5137939453125, 262.7235107421875, 636.5121459960938, 48.620460510253906, 30.326499938964844, 1026.0477294921875, 347.27020263671875, 973.0302124023438, -345.35009765625, 439.42510986328125, 1515.102294921875, 97.31971740722656, 1168.3211669921875, 893.3114013671875, -124.50404357910156, 354.46728515625, 407.1367492675781, 1884.7353515625, 263.189208984375, -169.65408325195312, 46.30984115600586, -186.84591674804688, 641.440185546875, 336.9130554199219, -241.27813720703125, -352.3815612792969, 219.28610229492188, 101.49939727783203, 321.93621826171875, 332.9448547363281, 158.2030029296875, 225.73175048828125, 298.0811767578125, 276.77703857421875, 1031.92333984375, 22.625566482543945, 278.4928894042969, 868.1246337890625, 1598.994140625, 321.6495361328125, 13.168075561523438, -488.3409118652344, 529.01123046875, -384.5769958496094, 483.3296813964844, 373.66796875, 251.0694122314453, -145.44187927246094, 688.4906616210938, 680.271240234375, 20.20165252685547, 826.9766235351562, 122.54279327392578, -97.16212463378906, 412.9451904296875, 99.96943664550781, 903.0557250976562, 96.60401916503906, -140.48583984375, -17.319286346435547, 1748.2620849609375, 506.953857421875, 784.37255859375, 2732.591064453125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000595.npy"}
{"epoch": 0.8737151248164464, "step": 596, "batch_size": 64, "mean": 287.9169921875, "std": 604.2826538085938, "min": -946.5887451171875, "p10": -462.5533630371093, "median": 208.0723648071289, "p90": 1211.5069580078132, "max": 1925.171142578125, "pos_frac": 0.734375, "sample": [-384.6121826171875, 21.73834228515625, 623.289794921875, 52.76258850097656, 778.5349731445312, 611.12744140625, 211.94821166992188, 568.3989868164062, 298.5862731933594, 541.9271850585938, 1925.171142578125, -88.88485717773438, -537.2850341796875, 459.4176940917969, 1029.3001708984375, 1429.0189208984375, 17.727005004882812, 133.7864532470703, 490.5992431640625, 687.673583984375, -184.3426513671875, 540.4077758789062, 24.524089813232422, 214.1157989501953, -495.95672607421875, 150.5144805908203, 554.676513671875, 396.9180603027344, 267.40618896484375, 128.53956604003906, -320.359375, 140.57164001464844, -587.0582275390625, 1478.368896484375, 86.1316146850586, 900.1620483398438, 1528.812744140625, 544.8872680664062, -19.448427200317383, -76.20027923583984, 282.99261474609375, 492.3817138671875, 138.0950164794922, 337.7535705566406, -221.41314697265625, 251.7231903076172, 1289.5955810546875, 204.19651794433594, -263.3399963378906, -265.1055908203125, 1821.186767578125, 70.19293975830078, -946.5887451171875, 197.48934936523438, 319.4880676269531, -647.1332397460938, 63.63575744628906, -665.5039672851562, 680.2808837890625, 118.44332885742188, 1515.90380859375, -309.7743835449219, 423.97332763671875, -604.6839599609375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000596.npy"}
{"epoch": 0.8751835535976505, "step": 597, "batch_size": 64, "mean": 505.18560791015625, "std": 688.1543579101562, "min": -1100.850830078125, "p10": -105.62928161621092, "median": 386.8045654296875, "p90": 1422.7394042968751, "max": 2400.16796875, "pos_frac": 0.78125, "sample": [948.8507080078125, 558.2532958984375, 1053.652587890625, 340.17266845703125, 470.55218505859375, 1425.2379150390625, -113.93612670898438, 1037.8603515625, 1542.05419921875, -380.449951171875, -73.74275970458984, -86.24664306640625, 285.4973449707031, 53.59768295288086, 741.527587890625, 109.5691909790039, 1122.134033203125, 131.41844177246094, 389.5582580566406, 1840.078369140625, 485.06219482421875, 424.19927978515625, -35.659942626953125, 992.908447265625, 265.81256103515625, 281.3429870605469, 34.4310417175293, 194.08729553222656, 117.46263122558594, -855.1453247070312, -1100.850830078125, -42.21466064453125, 1383.4627685546875, 456.27447509765625, 618.352783203125, 2280.459716796875, 1559.7525634765625, -51.22764587402344, -444.3955383300781, 122.25084686279297, 723.3837890625, 1294.95654296875, 86.9340591430664, 274.891845703125, 384.0508728027344, -70.3957290649414, 544.9192504882812, 986.4876708984375, 43.07200622558594, 1416.9095458984375, 93.28289031982422, -324.04815673828125, -278.6724853515625, 2400.16796875, 1765.2894287109375, 142.19004821777344, 559.5194702148438, 478.659912109375, 737.612548828125, 727.2904052734375, 177.1298065185547, 1368.4991455078125, -77.5882797241211, 795.3300170898438], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000597.npy"}
{"epoch": 0.8766519823788547, "step": 598, "batch_size": 64, "mean": 390.3699035644531, "std": 580.1362915039062, "min": -1103.8184814453125, "p10": -199.6864059448242, "median": 432.16258239746094, "p90": 1129.7786987304687, "max": 2001.0634765625, "pos_frac": 0.78125, "sample": [454.5892333984375, 647.310302734375, 367.9067687988281, 1026.43994140625, -1103.8184814453125, 419.5138244628906, 1126.5740966796875, 707.1670532226562, 203.3980712890625, 1200.0950927734375, 34.294097900390625, -669.554443359375, -936.9078369140625, 205.5786895751953, 1370.284423828125, -205.6461639404297, 734.1240234375, 728.349609375, 1176.9371337890625, 1770.49658203125, 271.1168212890625, 1166.6539306640625, 744.53955078125, -355.86907958984375, 327.1766357421875, 451.4145812988281, 569.0841674804688, 347.2054443359375, -899.1488647460938, 160.7406768798828, 691.745361328125, 565.2367553710938, 510.1082458496094, -110.91580963134766, -515.2996215820312, 544.4468383789062, 470.7560119628906, 1131.152099609375, 187.99681091308594, 279.40338134765625, 215.22824096679688, 197.92648315429688, -185.78030395507812, 539.2535400390625, -37.17982482910156, 637.3740234375, 470.3936767578125, 444.81134033203125, 25.29548454284668, 157.82308959960938, 344.1661682128906, 2001.0634765625, 808.1912231445312, 818.3658447265625, 533.6326293945312, 263.3953552246094, 12.8125, 1014.3763427734375, 532.3204956054688, -184.69546508789062, -12.052078247070312, -129.37078857421875, 757.7724609375, -36.12485885620117], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000598.npy"}
{"epoch": 0.8781204111600588, "step": 599, "batch_size": 64, "mean": 329.6752624511719, "std": 495.97906494140625, "min": -1854.2421875, "p10": -122.4646087646484, "median": 315.4972229003906, "p90": 943.4279357910159, "max": 1487.2894287109375, "pos_frac": 0.859375, "sample": [398.2221984863281, 841.6643676757812, 361.00006103515625, 347.07958984375, 728.766845703125, 227.54962158203125, 1487.2894287109375, 295.4619140625, 156.38067626953125, 463.40576171875, 481.0390625, -716.5411376953125, 6.213693618774414, 54.142330169677734, 8.267477035522461, 388.878662109375, 883.2640380859375, 969.2124633789062, 527.136962890625, 1330.0645751953125, 183.24713134765625, 629.9132080078125, 626.9393310546875, 457.8453063964844, 335.53253173828125, 23.662405014038086, 115.84540557861328, 290.6795654296875, 177.6098175048828, -1854.2421875, 624.5150146484375, 184.52899169921875, 490.4129638671875, 177.21641540527344, 1020.9066772460938, 681.1830444335938, 558.7783813476562, -84.25665283203125, 1168.69970703125, 235.21328735351562, 442.924560546875, 212.49905395507812, 876.922607421875, -251.59458923339844, -2.7603912353515625, 418.49334716796875, 558.814208984375, 173.38211059570312, 335.8780212402344, 102.32293701171875, 1028.1768798828125, 247.90663146972656, 31.76490020751953, 12.992088317871094, -327.9250183105469, 34.928646087646484, 293.6859436035156, 470.3221435546875, -307.54974365234375, 1144.5263671875, -138.83944702148438, -328.89898681640625, 244.28880310058594, 544.2271728515625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000599.npy"}
{"epoch": 0.8795888399412628, "step": 600, "batch_size": 64, "mean": 310.2664794921875, "std": 605.6271362304688, "min": -1151.1654052734375, "p10": -475.87102661132815, "median": 235.55113983154297, "p90": 1040.8859008789063, "max": 2319.724609375, "pos_frac": 0.75, "sample": [148.9512176513672, 641.0830688476562, 498.825439453125, -1151.1654052734375, 799.045166015625, 580.871337890625, -101.61993408203125, 530.1640625, -614.8273315429688, 510.3731994628906, 64.43767547607422, 55.74148178100586, 935.4979858398438, -231.54421997070312, 302.28839111328125, 669.508056640625, -618.0126342773438, 1495.093994140625, 61.909507751464844, 251.263916015625, 800.9840087890625, 924.0166015625, -145.0904083251953, 245.4588623046875, -71.93211364746094, -275.2177734375, 772.6443481445312, 1051.0888671875, 1085.6585693359375, 847.700927734375, 190.18008422851562, 42.562042236328125, 888.3052978515625, 47.598304748535156, 1053.00390625, 537.95458984375, 36.92637634277344, 246.41554260253906, -476.23004150390625, 1441.28271484375, 2319.724609375, 249.1439666748047, -170.1177215576172, 55.01207733154297, 1363.0255126953125, 159.16510009765625, 830.870361328125, 38.5650749206543, -728.5313720703125, 522.434814453125, 73.16914367675781, 1017.0789794921875, 373.96630859375, -37.69947052001953, 225.64341735839844, 678.4365234375, -611.5982666015625, 196.19595336914062, 49.70006561279297, -475.0333251953125, -582.8619384765625, 366.89190673828125, -209.88919067382812, 82.56722259521484], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000600.npy"}
{"epoch": 0.8810572687224669, "step": 601, "batch_size": 64, "mean": 399.72930908203125, "std": 546.2156982421875, "min": -1091.2960205078125, "p10": -223.85581817626948, "median": 321.71917724609375, "p90": 1085.09775390625, "max": 1730.1829833984375, "pos_frac": 0.78125, "sample": [613.5379638671875, 1267.75, 1730.1829833984375, 717.0824584960938, -57.81215286254883, 1010.2935180664062, 317.240478515625, 101.06896209716797, -168.48446655273438, 227.41534423828125, -244.91522216796875, -174.7172088623047, 1352.1689453125, 457.68658447265625, 315.09814453125, 637.2391967773438, 326.00103759765625, 601.6878051757812, -265.5102233886719, 1232.0250244140625, 806.6165161132812, 601.3211669921875, 802.1303100585938, -115.64891052246094, 557.3252563476562, 252.27110290527344, -64.89944458007812, 539.5485229492188, 912.6385498046875, 242.33505249023438, 312.1620178222656, 317.43731689453125, -971.42578125, 676.6361083984375, -440.6417541503906, 541.2698364257812, 128.35418701171875, -74.29734802246094, 170.39389038085938, 273.63946533203125, 595.4810791015625, 684.5155029296875, 155.1277313232422, -1091.2960205078125, 1104.28369140625, -526.923095703125, 40.759368896484375, 1368.5106201171875, 636.3767700195312, -311.04443359375, 1002.0933227539062, 949.1947021484375, 176.76353454589844, 354.8878479003906, 191.2067413330078, 925.0093383789062, 184.0924835205078, 1420.400146484375, 1040.33056640625, 351.26422119140625, -106.60880279541016, 235.590087890625, 461.8941650390625, 278.55902099609375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000601.npy"}
{"epoch": 0.882525697503671, "step": 602, "batch_size": 64, "mean": 492.49066162109375, "std": 558.3408203125, "min": -559.865966796875, "p10": -88.06064605712888, "median": 367.70924377441406, "p90": 1105.373474121094, "max": 2352.544677734375, "pos_frac": 0.828125, "sample": [442.026123046875, 148.27447509765625, -148.1681671142578, 334.2962646484375, 62.664894104003906, 161.13421630859375, 985.8067016601562, 704.1370849609375, 190.46620178222656, 256.3587646484375, 439.22198486328125, 44.40840148925781, 606.0579223632812, 845.9345703125, 1132.3212890625, 29.612876892089844, 535.8340454101562, 363.2193603515625, 372.1991271972656, 1232.3974609375, -16.556549072265625, 222.50885009765625, -33.187774658203125, -488.3034973144531, 1010.89794921875, 771.22265625, 269.82745361328125, 480.1635437011719, -6.93768310546875, -98.20375061035156, 71.64705657958984, 1017.2802734375, 832.9473876953125, 143.1330108642578, 782.5933837890625, 1165.87890625, 169.17031860351562, 522.0104370117188, -559.865966796875, -313.6121520996094, 856.2296142578125, 1042.4952392578125, 742.7306518554688, 1004.90576171875, 813.3943481445312, 837.7266845703125, 2265.626708984375, -124.79145812988281, 1236.6697998046875, -419.98486328125, 940.0087890625, 829.2142944335938, 207.72451782226562, 154.6060791015625, 1292.4921875, -64.39340209960938, 280.3985595703125, 838.3829956054688, 104.02239990234375, 126.11961364746094, 871.5188598632812, 2352.544677734375, 361.5928039550781, 289.35174560546875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000602.npy"}
{"epoch": 0.8839941262848752, "step": 603, "batch_size": 64, "mean": 457.523193359375, "std": 689.40625, "min": -804.6351318359375, "p10": -325.92018432617186, "median": 358.19627380371094, "p90": 1276.637097167969, "max": 3218.64501953125, "pos_frac": 0.734375, "sample": [-334.2020263671875, -325.9283142089844, 293.77606201171875, 42.63904571533203, 1231.896484375, -18.3485107421875, 1147.828369140625, 373.06280517578125, 774.7469482421875, 1127.88330078125, -101.619140625, 125.9381332397461, -667.4895629882812, 802.31689453125, 50.40902328491211, 443.5899353027344, 41.728759765625, 1430.0731201171875, 1842.8779296875, 531.9954833984375, 998.1792602539062, 807.5794067382812, 727.172607421875, 73.54347229003906, 304.6053466796875, 286.2290954589844, 443.96221923828125, 185.36288452148438, -52.84933090209961, 269.751708984375, 1475.91796875, -804.6351318359375, 1590.1806640625, -85.10696411132812, -5.367744445800781, 371.12933349609375, 1036.141845703125, -297.51727294921875, 695.31640625, 530.564208984375, 889.7424926757812, -406.50933837890625, -110.01591491699219, -12.32004165649414, -117.80049896240234, 554.2683715820312, 680.2714233398438, 96.94654846191406, -325.9012145996094, 386.8111877441406, 489.60919189453125, 1295.8116455078125, 930.9074096679688, -343.89849853515625, 220.961669921875, -607.6061401367188, 3218.64501953125, 345.2632141113281, 1659.1690673828125, 51.5916748046875, 878.849853515625, 1180.7579345703125, 884.9945678710938, 77.5997314453125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000603.npy"}
{"epoch": 0.8854625550660793, "step": 604, "batch_size": 64, "mean": 498.571044921875, "std": 673.8912353515625, "min": -969.4199829101562, "p10": -245.893635559082, "median": 459.7635192871094, "p90": 1472.7322753906253, "max": 2430.614501953125, "pos_frac": 0.765625, "sample": [-433.46905517578125, 210.66204833984375, -227.16290283203125, -129.2532196044922, 663.2450561523438, 1004.5896606445312, 985.7550048828125, 1052.6168212890625, 810.099853515625, 2177.90087890625, -969.4199829101562, 963.4237670898438, 65.94331359863281, -253.67117309570312, 920.2471313476562, 1037.9755859375, 842.5926513671875, 118.40753173828125, 359.8223571777344, 1072.8502197265625, 203.99267578125, -531.8056030273438, 21.059951782226562, 1411.189697265625, 511.7739562988281, -555.3660278320312, 1561.45556640625, -227.7460479736328, 194.27474975585938, 131.5294189453125, 50.534278869628906, 443.8175354003906, 756.4959716796875, 386.7931823730469, -199.10897827148438, 1647.890625, 725.9150390625, 19.43714141845703, -180.18174743652344, 900.493408203125, 600.1244506835938, 452.9134826660156, 530.77783203125, 361.5081787109375, 481.8337097167969, 2430.614501953125, -448.4713134765625, 1616.77783203125, 466.6135559082031, -42.69786071777344, 565.14599609375, 1099.7547607421875, 788.8629150390625, -41.18513870239258, 1807.921875, 515.9838256835938, -364.92236328125, 25.082489013671875, 403.5135192871094, 729.0379028320312, -88.366455078125, 525.026123046875, 1499.107666015625, 447.9889831542969], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000604.npy"}
{"epoch": 0.8869309838472834, "step": 605, "batch_size": 64, "mean": 362.182373046875, "std": 610.0692138671875, "min": -1155.2242431640625, "p10": -248.03500518798828, "median": 279.25828552246094, "p90": 1060.2768554687505, "max": 2597.36572265625, "pos_frac": 0.734375, "sample": [10.470779418945312, -302.37213134765625, -21.66534423828125, 240.71871948242188, -18.140968322753906, 147.91297912597656, 800.1017456054688, 660.1954956054688, 806.0201416015625, 234.03472900390625, 557.1078491210938, -66.73703002929688, 207.60397338867188, -221.4942626953125, 446.9926452636719, 414.36602783203125, 130.63340759277344, 885.5492553710938, 1624.69970703125, -241.0365753173828, 2597.36572265625, 123.83639526367188, 500.14892578125, -43.235504150390625, 274.31427001953125, 853.3855590820312, -189.35345458984375, -251.03433227539062, 321.27264404296875, 322.38214111328125, 38.369163513183594, -258.904541015625, 946.592529296875, -464.7500305175781, -125.87310791015625, 434.7439880371094, 1138.662109375, 87.12570190429688, 276.68572998046875, 700.2835693359375, 80.89515686035156, 2251.4892578125, -720.3975830078125, -1155.2242431640625, 432.04022216796875, 441.9903564453125, -296.9248046875, 1101.5220947265625, 48.09607696533203, -16.621030807495117, 560.3370361328125, 346.11663818359375, 1284.0379638671875, 279.8188781738281, 278.69769287109375, 389.0863037109375, 450.185302734375, 1133.37841796875, 464.90301513671875, 197.6028594970703, 964.0379638671875, 369.3809509277344, -157.53631591796875, 875.7796630859375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000605.npy"}
{"epoch": 0.8883994126284875, "step": 606, "batch_size": 64, "mean": 399.5196228027344, "std": 563.1110229492188, "min": -550.44580078125, "p10": -325.369090270996, "median": 321.8054962158203, "p90": 1099.0756591796878, "max": 2090.75, "pos_frac": 0.78125, "sample": [-425.75823974609375, 25.291595458984375, 133.67947387695312, -366.8548278808594, 340.7416076660156, 1028.2960205078125, 186.95993041992188, 162.32012939453125, 1129.4097900390625, 778.0033569335938, 140.6767578125, 2073.418701171875, 844.934814453125, -21.93896484375, -491.7922058105469, 230.56512451171875, 382.5449523925781, 219.1021728515625, 339.6344299316406, 1307.15625, 739.6700439453125, -412.9237060546875, 1546.084228515625, 19.417015075683594, 897.615234375, 651.3599853515625, 524.2781982421875, 203.81788635253906, -123.82772064208984, -238.9199981689453, 594.0867309570312, 692.711669921875, -362.418701171875, 316.2107238769531, -550.44580078125, 327.4002685546875, 231.95388793945312, 721.8026733398438, -117.83051300048828, -85.15899658203125, -533.65380859375, -142.5762939453125, 2090.75, 1147.9891357421875, 288.1108703613281, 615.5056762695312, 80.97138214111328, 533.485107421875, 780.0604858398438, 341.62762451171875, 20.633438110351562, 764.8832397460938, -13.009353637695312, 229.61024475097656, 949.3519897460938, 1453.7896728515625, 276.095703125, 705.7510375976562, 18.66326141357422, 279.0626220703125, 558.7457275390625, 641.9107666015625, 536.6282958984375, 353.5959167480469], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000606.npy"}
{"epoch": 0.8898678414096917, "step": 607, "batch_size": 64, "mean": 332.97552490234375, "std": 586.2377319335938, "min": -1157.5836181640625, "p10": -277.4526702880859, "median": 270.28173828125, "p90": 914.1922424316409, "max": 2613.08740234375, "pos_frac": 0.796875, "sample": [421.76617431640625, -140.62953186035156, 509.98577880859375, 250.71849060058594, 171.86126708984375, 165.8962860107422, 1198.6600341796875, 356.53802490234375, 3.679473876953125, 1303.884521484375, -286.052734375, 689.3241577148438, 409.88555908203125, -280.9510192871094, 136.94180297851562, 947.2449951171875, -731.4774780273438, 202.22750854492188, 424.13641357421875, 57.60407257080078, -186.77801513671875, 140.2246856689453, 378.5782165527344, 437.86151123046875, -465.7742919921875, 591.8627319335938, -215.51556396484375, 1520.572509765625, 661.0960693359375, 87.47648620605469, 640.885009765625, 99.88971710205078, 676.2485961914062, 252.52474975585938, 522.1615600585938, 259.0911865234375, 837.0691528320312, 78.3258056640625, 1436.8065185546875, 336.8905334472656, -957.6207885742188, 537.9412231445312, -269.28985595703125, 449.94073486328125, 696.5518188476562, 88.53164672851562, -82.1924057006836, 56.03435516357422, -1157.5836181640625, 215.5218505859375, 493.1814880371094, 273.5054626464844, -96.49899291992188, 629.075927734375, 244.07192993164062, 83.96208190917969, 385.297119140625, 2613.08740234375, 1512.7576904296875, 768.6746826171875, 267.0580139160156, -417.6068115234375, 579.5184326171875, 495.7728271484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000607.npy"}
{"epoch": 0.8913362701908958, "step": 608, "batch_size": 64, "mean": 390.30804443359375, "std": 489.9488220214844, "min": -435.3331604003906, "p10": -257.25680694580075, "median": 393.0030517578125, "p90": 933.8051757812501, "max": 1754.1953125, "pos_frac": 0.796875, "sample": [785.7714233398438, 849.7989501953125, 414.4864196777344, 505.97235107421875, -424.3917541503906, 1529.419921875, -405.7298278808594, 97.51490783691406, -101.25941467285156, 599.7396240234375, 710.31005859375, 34.81010437011719, 38.187461853027344, 717.5039672851562, -170.2423553466797, 155.2386474609375, -398.5772399902344, 437.6656494140625, -154.86891174316406, 343.3501892089844, 514.7357177734375, 694.2055053710938, 86.13963317871094, 733.0076293945312, 29.950660705566406, 405.04779052734375, 655.9276123046875, 469.6752014160156, 950.3101196289062, -205.2744903564453, 63.288143157958984, -41.87739562988281, 242.8236846923828, 311.7379150390625, 735.6715698242188, 304.1143798828125, 398.99273681640625, 419.6112060546875, 1197.072265625, -351.85833740234375, 1754.1953125, 198.7288818359375, 625.7061767578125, 995.434814453125, 1166.443359375, 314.86578369140625, 53.377685546875, 815.5091552734375, -61.740760803222656, -279.5349426269531, 616.5452880859375, 18.23358917236328, 540.8809814453125, 785.0505981445312, -303.5528869628906, 895.2936401367188, -435.3331604003906, 114.54537963867188, 1516.0068359375, 826.4509887695312, 794.4618530273438, 387.01336669921875, 305.0032653808594, 158.1290283203125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000608.npy"}
{"epoch": 0.8928046989720999, "step": 609, "batch_size": 64, "mean": 498.7186584472656, "std": 762.4298095703125, "min": -1177.136962890625, "p10": -279.8130126953124, "median": 463.5579071044922, "p90": 1195.5964721679688, "max": 3735.918701171875, "pos_frac": 0.796875, "sample": [45.336029052734375, 111.31867218017578, 103.67037200927734, -617.123046875, 196.01649475097656, 429.5916748046875, 1286.9114990234375, 528.0215454101562, 532.6859130859375, -349.80181884765625, 878.1000366210938, 1658.3958740234375, 98.28794860839844, 460.495849609375, 757.9537963867188, -1177.136962890625, -82.64419555664062, 543.1890869140625, 927.934814453125, 466.6199645996094, 612.4180908203125, -159.0966796875, 814.1087036132812, 681.9593505859375, -335.52423095703125, 588.3720092773438, 1197.8065185546875, 214.90444946289062, -391.4090881347656, 678.5716552734375, 1593.1192626953125, 622.5570068359375, 2269.361572265625, 511.1737060546875, 2771.014892578125, 953.63671875, -43.31891632080078, 707.4488525390625, -87.89488983154297, 292.2156066894531, 320.8065490722656, 203.11439514160156, 845.9791259765625, 3735.918701171875, 775.14306640625, 55.6168212890625, 470.1866149902344, -103.34727478027344, -767.2109985351562, 152.66067504882812, 304.8843994140625, 278.6922912597656, 1052.0189208984375, 38.79463195800781, -1.9037857055664062, 1190.439697265625, 335.8380432128906, 617.227783203125, 570.3901977539062, 1140.26708984375, 233.1197967529297, 468.38702392578125, -331.548583984375, 43.27104568481445], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000609.npy"}
{"epoch": 0.8942731277533039, "step": 610, "batch_size": 64, "mean": 316.170654296875, "std": 579.8319702148438, "min": -1708.8446044921875, "p10": -412.46955871582026, "median": 287.33538818359375, "p90": 944.4877075195316, "max": 1654.5531005859375, "pos_frac": 0.78125, "sample": [695.0399780273438, 724.8321533203125, 117.35713195800781, 215.33836364746094, -373.9410400390625, 709.7553100585938, 74.8990478515625, 599.7846069335938, 513.6044311523438, -660.8384399414062, 66.12085723876953, 631.585693359375, 51.14856719970703, 1335.8233642578125, 799.5843505859375, 380.3253173828125, 254.39132690429688, 185.21661376953125, -552.7254638671875, -1708.8446044921875, 478.22113037109375, 1654.5531005859375, 164.8741455078125, 1096.932373046875, -222.4700927734375, 360.50567626953125, 164.82769775390625, -438.89306640625, 687.992431640625, 727.2174072265625, 1468.237060546875, -130.7915496826172, 47.78564453125, 649.8466186523438, 379.06048583984375, 1491.82470703125, -428.9817810058594, 176.41812133789062, 316.0522766113281, 732.1978149414062, 683.7120971679688, 413.7435302734375, -44.8381462097168, 711.8298950195312, 23.928070068359375, 267.7675476074219, 982.4364624023438, 135.46109008789062, -134.5347900390625, 335.485595703125, 36.85987854003906, 686.707763671875, 205.8346405029297, -121.69725036621094, 672.1624755859375, 61.562538146972656, 329.72698974609375, 281.2551574707031, -216.01683044433594, 293.4156188964844, -461.0302429199219, -720.836181640625, 855.9406127929688, 1522.177978515625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000610.npy"}
{"epoch": 0.895741556534508, "step": 611, "batch_size": 64, "mean": 527.0625, "std": 734.9736938476562, "min": -802.3525390625, "p10": -247.18706665039062, "median": 388.4667205810547, "p90": 1411.5803955078127, "max": 2494.279052734375, "pos_frac": 0.75, "sample": [1348.4178466796875, 386.1597595214844, 2408.302978515625, 1875.966064453125, 1127.4718017578125, 495.7579345703125, -166.4710693359375, 1379.0406494140625, 103.3716049194336, 219.73562622070312, 323.3341369628906, 730.0592651367188, -0.5287361145019531, -119.81498718261719, -723.0569458007812, -245.43153381347656, -375.3674621582031, -802.3525390625, 325.9050598144531, 1425.5260009765625, -703.8289794921875, 1139.14990234375, 2040.6505126953125, -513.6473999023438, 1329.2904052734375, 641.5806274414062, 32.249000549316406, 1023.2656860351562, 524.1141967773438, 118.87925720214844, -19.23431396484375, 1240.9227294921875, 384.81707763671875, -52.823158264160156, 390.773681640625, 1139.9808349609375, 50.62901306152344, 203.46597290039062, -235.35394287109375, 310.4985656738281, 889.6387939453125, 345.4512023925781, 15.874649047851562, 1791.548095703125, 417.2232666015625, 532.2332763671875, -330.4250183105469, 520.4884643554688, 1038.0706787109375, -117.62735748291016, 605.5828857421875, -247.93943786621094, 258.698486328125, 476.6265869140625, 697.7890625, 531.7973022460938, 338.9913024902344, 1742.951171875, 435.4315185546875, 1268.7491455078125, 1268.726806640625, -13.404655456542969, 2494.279052734375, 9.83895492553711], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000611.npy"}
{"epoch": 0.8972099853157122, "step": 612, "batch_size": 64, "mean": 411.22747802734375, "std": 573.362060546875, "min": -761.7311401367188, "p10": -280.4371597290039, "median": 361.0396423339844, "p90": 1085.6115112304688, "max": 2526.1630859375, "pos_frac": 0.8125, "sample": [948.4190063476562, 273.0752868652344, 260.5617980957031, -386.0462951660156, -66.3304443359375, 27.087671279907227, 1086.180419921875, 372.2347717285156, 437.203369140625, 558.7222290039062, -598.9290771484375, 751.4119873046875, 124.75985717773438, 1084.2840576171875, 871.0294189453125, 993.5631713867188, 494.447021484375, 274.7320251464844, 540.2448120117188, 1192.8531494140625, 826.21142578125, 100.3699722290039, -761.7311401367188, 1222.27392578125, 694.9747314453125, -357.48175048828125, 755.7015991210938, -295.9334716796875, 650.570068359375, 690.1531982421875, 512.4132080078125, -712.6195068359375, 35.71919250488281, 410.6200256347656, 9.815982818603516, -396.139892578125, 594.1946411132812, 349.8445129394531, 48.0130615234375, -123.493408203125, 130.52061462402344, 259.266845703125, 125.05386352539062, 209.00091552734375, 2526.1630859375, 60.002262115478516, 804.173828125, 984.4161987304688, 1264.6868896484375, 267.7763671875, 47.91889953613281, 725.2981567382812, 1386.7095947265625, 147.89614868164062, -21.326095581054688, -244.2790985107422, 546.9060668945312, 6.7796173095703125, 1067.908203125, -218.76878356933594, 1192.1943359375, 609.2621459960938, 889.4332275390625, 58.58447265625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000612.npy"}
{"epoch": 0.8986784140969163, "step": 613, "batch_size": 64, "mean": 493.9320983886719, "std": 609.1317138671875, "min": -1001.8640747070312, "p10": -23.854019165039055, "median": 455.2052917480469, "p90": 1253.6784790039062, "max": 2391.44580078125, "pos_frac": 0.828125, "sample": [638.7548828125, 1290.220947265625, 1251.307861328125, -125.75773620605469, 1127.568603515625, 1020.1651611328125, 33.46126174926758, 1151.6961669921875, 957.366455078125, -9.236471176147461, 1418.0601806640625, 159.35513305664062, 682.6236572265625, 247.27279663085938, 1585.91357421875, -443.72174072265625, -16.488723754882812, 392.66583251953125, 493.04937744140625, -27.010574340820312, 377.7283935546875, 153.2049560546875, 699.8709106445312, 681.9677734375, 1254.6944580078125, 286.59759521484375, 1079.07470703125, 954.9900512695312, 25.631591796875, 539.0335083007812, 736.4989013671875, 31.935150146484375, 5.164678573608398, 519.8457641601562, 417.3612060546875, 1728.33935546875, 681.5233764648438, 646.4644165039062, 2391.44580078125, 2.8063526153564453, -1001.8640747070312, 1036.1483154296875, 672.8517456054688, 796.9268798828125, 813.4833984375, -889.6793212890625, 167.67506408691406, 1522.292724609375, -10.270076751708984, 226.895751953125, 741.1982421875, -235.10235595703125, -475.2185363769531, 156.7645263671875, 1041.053466796875, 549.75634765625, 173.24017333984375, 285.9020080566406, 52.74260711669922, 572.5612182617188, -6.3145904541015625, 92.6819839477539, 199.01022338867188, 87.4715576171875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000613.npy"}
{"epoch": 0.9001468428781204, "step": 614, "batch_size": 64, "mean": 477.8308410644531, "std": 667.7589111328125, "min": -990.074951171875, "p10": -269.23702392578116, "median": 462.0875701904297, "p90": 1320.1372436523438, "max": 2438.869384765625, "pos_frac": 0.71875, "sample": [178.6742401123047, 431.33636474609375, -571.9215087890625, 912.6114501953125, -86.03758239746094, 1299.3065185546875, -27.700862884521484, 420.3276672363281, 555.5486450195312, -1.2325706481933594, 1238.955078125, 299.02764892578125, 492.8387756347656, 863.2731323242188, 213.51605224609375, -171.21450805664062, 1329.064697265625, 754.2293090820312, 149.8827362060547, -35.12837600708008, 1120.741943359375, -16.7857666015625, 712.208984375, 940.888427734375, -534.0455932617188, -135.75021362304688, 493.08843994140625, -74.99917602539062, 556.2468872070312, -305.142578125, 660.7708129882812, 1674.3173828125, -150.4986572265625, 842.15478515625, -506.3440856933594, -185.4573974609375, 702.9551391601562, -673.53515625, -50.85798645019531, 763.5061645507812, 1504.2225341796875, 328.7506408691406, 1461.92626953125, 290.4800109863281, 1137.580322265625, 1098.15966796875, 1222.55224609375, 225.3907470703125, 568.1134643554688, 668.8783569335938, 25.128562927246094, 325.6448059082031, 1344.747314453125, 102.97084045410156, 646.0751953125, 2438.869384765625, 1884.2935791015625, 131.91567993164062, -626.21484375, -990.074951171875, 1224.9442138671875, 113.57781982421875, 533.5609130859375, 840.8619384765625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000614.npy"}
{"epoch": 0.9016152716593245, "step": 615, "batch_size": 64, "mean": 409.751953125, "std": 660.7448120117188, "min": -1885.2677001953125, "p10": -207.52758178710937, "median": 245.13665008544922, "p90": 1095.529919433594, "max": 2605.7197265625, "pos_frac": 0.8125, "sample": [199.80648803710938, 737.9285278320312, 147.24610900878906, 1592.06982421875, 216.6429901123047, 1193.9769287109375, 9.000625610351562, 42.77265167236328, 70.02685546875, 96.64996337890625, 938.81494140625, 262.2750244140625, 242.4305877685547, -331.51123046875, 729.2249755859375, -420.3267517089844, 951.6890258789062, -1885.2677001953125, 229.2504425048828, 385.79180908203125, 247.84271240234375, 149.33343505859375, 1976.3729248046875, 166.201171875, 70.01272583007812, 1056.8809814453125, -198.65402221679688, 395.82818603515625, 547.3196411132812, 54.269775390625, 361.1460876464844, 1026.288818359375, 600.8316650390625, 1040.58447265625, 1226.5318603515625, -40.463279724121094, 28.298629760742188, 120.5301513671875, -365.38787841796875, -164.5958251953125, -631.5209350585938, 1112.09375, 1048.03662109375, 77.4024658203125, 1648.265869140625, 560.1458129882812, 603.1461791992188, 932.5695190429688, 150.96624755859375, 442.821533203125, 995.3926391601562, -208.5132293701172, 200.90016174316406, -37.205718994140625, 736.119873046875, -400.1691589355469, 237.78286743164062, 410.81646728515625, 495.0902404785156, 840.776611328125, -205.2277374267578, 2605.7197265625, 770.8236083984375, 130.22946166992188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000615.npy"}
{"epoch": 0.9030837004405287, "step": 616, "batch_size": 64, "mean": 331.6129455566406, "std": 566.3877563476562, "min": -797.4393920898438, "p10": -338.9397430419922, "median": 286.4792175292969, "p90": 1086.9659790039063, "max": 1755.4197998046875, "pos_frac": 0.765625, "sample": [934.0594482421875, 133.38720703125, 118.37206268310547, 635.47705078125, 478.0758972167969, 40.85157012939453, 206.9169921875, -284.1519470214844, 385.38995361328125, 1134.42529296875, 1340.6937255859375, 565.160888671875, 91.16378021240234, 31.254318237304688, -724.6158447265625, 1099.040283203125, 440.8122863769531, 238.30538940429688, 167.3531494140625, -102.88484954833984, 1453.137451171875, 510.3296203613281, 446.0140686035156, 332.0248107910156, -220.96810913085938, -608.3004150390625, 553.3567504882812, 291.3726806640625, -45.381553649902344, 1206.94970703125, 1738.448486328125, 159.05665588378906, -462.183837890625, 17.132713317871094, 109.05027770996094, -797.4393920898438, -166.867431640625, 266.5127258300781, 857.3814086914062, 662.7277221679688, 445.94366455078125, 351.75628662109375, 457.79571533203125, 599.2321166992188, -682.4966430664062, 984.1177978515625, 159.36439514160156, -317.2313232421875, 149.00787353515625, 731.3580322265625, 410.3389892578125, -258.6924133300781, 1755.4197998046875, 427.38446044921875, 727.9420166015625, 143.64340209960938, 281.58575439453125, 135.3106231689453, -324.7304992675781, 594.1524047851562, 1058.7926025390625, -345.0294189453125, 1030.278076171875, -523.4564208984375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000616.npy"}
{"epoch": 0.9045521292217328, "step": 617, "batch_size": 64, "mean": 239.29647827148438, "std": 559.5552978515625, "min": -1094.4798583984375, "p10": -474.32558898925777, "median": 275.9334259033203, "p90": 792.7090820312501, "max": 2029.682373046875, "pos_frac": 0.75, "sample": [328.029541015625, 152.72906494140625, -220.106201171875, 358.49468994140625, 392.4911804199219, -196.88177490234375, -9.036474227905273, -587.3008422851562, 805.8153686523438, 208.5659942626953, -415.3379821777344, 348.7942810058594, 311.8884582519531, 382.38153076171875, 211.6553192138672, 227.498291015625, 390.55340576171875, 284.28076171875, 58.21815490722656, 1354.1932373046875, 762.1277465820312, 454.4517517089844, 78.92662811279297, 41.07653045654297, 489.7024841308594, 339.09002685546875, 89.7551498413086, -196.8769073486328, 318.570068359375, -1075.8642578125, 17.011873245239258, 405.6364440917969, -446.76593017578125, -255.83078002929688, 667.4461059570312, 280.9746398925781, 1095.4739990234375, 754.324462890625, 575.8028564453125, 463.19317626953125, 651.0283203125, -491.0577392578125, 856.339599609375, 534.0956420898438, 1773.1949462890625, 490.02532958984375, 703.2999267578125, -607.27392578125, 234.76272583007812, 561.8975219726562, -1094.4798583984375, 125.24916076660156, 53.00127410888672, 270.8922119140625, 234.51785278320312, -109.99514770507812, 901.99169921875, -25.97332763671875, 5.261240005493164, -486.1368713378906, 2029.682373046875, 305.34710693359375, -899.61865234375, 53.770965576171875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000617.npy"}
{"epoch": 0.9060205580029369, "step": 618, "batch_size": 64, "mean": 515.2799072265625, "std": 814.4938354492188, "min": -1683.2886962890625, "p10": -404.39765930175776, "median": 424.8233337402344, "p90": 1320.3045166015627, "max": 2884.2783203125, "pos_frac": 0.765625, "sample": [-1683.2886962890625, 810.537109375, 127.77996826171875, 150.76431274414062, 150.73287963867188, 249.8312225341797, 195.06808471679688, 64.12171936035156, 2452.0029296875, 964.5845336914062, 298.3194580078125, -859.1698608398438, 774.1407470703125, 263.70159912109375, 1330.9315185546875, 283.8866271972656, -347.5028076171875, 1202.2021484375, 937.9411010742188, -232.19589233398438, -168.45892333984375, -473.9671936035156, -426.6065673828125, -13.874588012695312, -871.2102661132812, -699.3692016601562, 159.98654174804688, 1241.912841796875, 977.8138427734375, 148.3623046875, 564.5531616210938, 437.05267333984375, 700.1002807617188, 1056.18505859375, 1045.583740234375, -817.6961669921875, -352.5768737792969, 1295.5081787109375, 336.4881286621094, 472.08734130859375, 874.8895874023438, 1366.4005126953125, 1077.8280029296875, 2276.597412109375, 886.046875, -109.81049346923828, 405.0643310546875, 573.2802124023438, 390.83587646484375, 2884.2783203125, 821.199951171875, -15.376291275024414, 1335.7152099609375, 1184.134521484375, 900.1465454101562, 485.41912841796875, -200.6822967529297, 1101.32763671875, 2463.546875, 47.605072021484375, 412.593994140625, 971.4002685546875, 736.3641357421875, 362.84478759765625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000618.npy"}
{"epoch": 0.9074889867841409, "step": 619, "batch_size": 64, "mean": 387.3755798339844, "std": 802.7298583984375, "min": -1163.7530517578125, "p10": -531.2924865722656, "median": 281.0411071777344, "p90": 1427.5682617187501, "max": 3197.489990234375, "pos_frac": 0.671875, "sample": [-1015.06396484375, -678.390869140625, 109.0057373046875, 1032.613525390625, 1447.7137451171875, -169.21530151367188, 285.9031066894531, 193.66653442382812, 304.81683349609375, -517.0035400390625, 639.7770385742188, 706.0902709960938, 432.83441162109375, 1767.3997802734375, -663.5355834960938, 222.567626953125, 1141.417236328125, 321.36712646484375, 1109.149658203125, 1056.605712890625, -790.4032592773438, -697.6331176757812, 1380.5621337890625, 1281.22705078125, -20.429336547851562, -456.45782470703125, -93.7793197631836, 613.5335083007812, 681.672119140625, -41.42645263671875, 12.728790283203125, 566.1223754882812, -431.7088623046875, 39.694580078125, 1475.9366455078125, -352.7147521972656, -62.26910400390625, -250.10484313964844, 802.4884643554688, 630.2420043945312, -32.692928314208984, 1723.14794921875, 443.54443359375, 697.1817016601562, 999.7285766601562, 74.59078979492188, -195.7100830078125, 2467.156494140625, 1641.2440185546875, 287.258544921875, 94.55281829833984, 276.1791076660156, 428.7436828613281, 3197.489990234375, -1163.7530517578125, -537.4163208007812, 699.5411376953125, -91.45648193359375, 1106.177734375, -8.55197525024414, 300.38726806640625, 142.63827514648438, 18.786169052124023, 208.26998901367188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000619.npy"}
{"epoch": 0.908957415565345, "step": 620, "batch_size": 64, "mean": 361.19091796875, "std": 621.2471923828125, "min": -1212.56103515625, "p10": -314.3078735351562, "median": 313.82421875, "p90": 1330.631689453125, "max": 1936.9552001953125, "pos_frac": 0.734375, "sample": [497.4954528808594, -323.7064208984375, 427.4757080078125, 588.7581176757812, 492.98876953125, 772.503173828125, 175.0594940185547, 296.02642822265625, -77.08316802978516, 1335.2845458984375, -361.8983459472656, 244.69757080078125, -292.3779296875, 1350.458740234375, 482.340087890625, 141.03575134277344, 864.5867919921875, 471.2843933105469, -105.30142211914062, 1387.7314453125, 1905.6688232421875, 186.63717651367188, 443.1108703613281, 1197.7950439453125, -567.1993408203125, 215.224365234375, 534.971923828125, 203.69723510742188, 157.32972717285156, 75.38900756835938, -85.63701629638672, 1936.9552001953125, 382.23321533203125, 1708.385986328125, -85.0450439453125, 318.4658203125, -124.15982818603516, 476.1612548828125, -96.9776611328125, 414.4281005859375, 446.27374267578125, 444.65673828125, -128.81689453125, -99.22380065917969, 1319.7750244140625, 769.7691040039062, 656.9473876953125, -1212.56103515625, 129.28826904296875, 248.93118286132812, -201.9559326171875, -1009.1197509765625, 1640.319091796875, 293.1140441894531, 276.1997985839844, 323.1476745605469, 432.5881042480469, 114.1254653930664, -502.0955810546875, 364.32110595703125, 309.1826171875, 1002.86572265625, -445.3007507324219, 378.99267578125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000620.npy"}
{"epoch": 0.9104258443465492, "step": 621, "batch_size": 64, "mean": 558.8348388671875, "std": 725.827880859375, "min": -1684.301025390625, "p10": -227.409992980957, "median": 506.901611328125, "p90": 1376.7398681640625, "max": 2947.17626953125, "pos_frac": 0.84375, "sample": [615.8018188476562, -427.5425109863281, -795.783447265625, -214.1184844970703, 1643.0919189453125, 13.043312072753906, 132.15194702148438, 574.2384643554688, 663.7825927734375, 941.8407592773438, 844.8103637695312, 289.6170654296875, 276.0546875, 826.3770141601562, 312.9542236328125, -454.3628234863281, -240.56228637695312, 936.0029907226562, -1684.301025390625, 229.2928466796875, 1556.7244873046875, 176.92578125, 1023.66357421875, 439.56475830078125, 1184.0982666015625, 2947.17626953125, 104.0068130493164, 1132.5098876953125, 1122.4248046875, -157.21127319335938, 1381.6588134765625, 813.0753784179688, 930.2743530273438, 310.292236328125, 835.2188110351562, 1690.027587890625, 183.14691162109375, 733.7760009765625, 357.4516906738281, 709.596923828125, -108.31529235839844, 1350.1016845703125, -233.10635375976562, 606.5006103515625, 305.3047180175781, 207.5872802734375, 1365.2623291015625, 328.164306640625, 1253.856689453125, 1172.1356201171875, -1014.536376953125, 348.86578369140625, 110.82942962646484, 273.26849365234375, 328.98687744140625, 662.1988525390625, 886.9772338867188, 1563.7152099609375, 171.96517944335938, 413.07403564453125, 1905.60009765625, 1072.122314453125, 635.21826171875, 202.8639678955078], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000621.npy"}
{"epoch": 0.9118942731277533, "step": 622, "batch_size": 64, "mean": 390.4576721191406, "std": 637.1874389648438, "min": -1139.218505859375, "p10": -236.2858474731445, "median": 377.07318115234375, "p90": 892.2039489746095, "max": 2696.63818359375, "pos_frac": 0.8125, "sample": [728.1902465820312, 332.50091552734375, -277.7417907714844, 403.1875, 576.400146484375, 379.47393798828125, -756.2762451171875, 423.6024169921875, 2549.727783203125, 557.5761108398438, 80.1760025024414, -417.3614501953125, 58.90657424926758, -246.3493194580078, -106.20822143554688, 555.9943237304688, 311.4136962890625, 374.67242431640625, 1.343353271484375, 266.9554443359375, 1123.934326171875, -66.53240966796875, 605.1845703125, 2696.63818359375, 773.7816772460938, 42.137115478515625, 593.5746459960938, 715.93017578125, 230.4943084716797, -1099.884765625, 222.27239990234375, -47.25177001953125, 868.1604614257812, 118.86036682128906, 319.00433349609375, 902.50830078125, 508.650634765625, -512.0997924804688, 1079.8192138671875, 560.802734375, 756.3609619140625, 855.3924560546875, 97.2443618774414, 693.9117431640625, 101.5623550415039, -36.1123046875, 461.1336975097656, -212.80441284179688, 52.41221618652344, 349.4740905761719, 1477.539306640625, 463.787353515625, 757.0858154296875, 689.3513793945312, 557.598388671875, 634.7230834960938, -1139.218505859375, 21.89812469482422, 138.61695861816406, 1512.15771484375, 605.118896484375, 415.5276184082031, 14.927845001220703, 289.43365478515625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000622.npy"}
{"epoch": 0.9133627019089574, "step": 623, "batch_size": 64, "mean": 299.5466613769531, "std": 609.82470703125, "min": -1152.286865234375, "p10": -241.35121459960936, "median": 216.107177734375, "p90": 1015.9863708496099, "max": 2748.3203125, "pos_frac": 0.6875, "sample": [-35.83404541015625, 237.6749267578125, -779.6812133789062, 1265.2589111328125, -119.60406494140625, -89.53089904785156, 634.4412231445312, 99.99636840820312, 742.0321655273438, 37.59161376953125, 1308.128662109375, 344.7391357421875, 651.1405029296875, 138.0118865966797, 657.6814575195312, -336.73284912109375, 777.73193359375, 359.00811767578125, -250.29022216796875, 71.8919677734375, -203.25022888183594, 446.9500732421875, 1065.9981689453125, -220.4935302734375, -1152.286865234375, 165.46923828125, 391.3721923828125, -641.5487670898438, 418.32159423828125, 596.2725830078125, -142.35000610351562, 453.8117980957031, -165.30868530273438, -88.1202163696289, -208.95614624023438, 77.84648132324219, 899.2921752929688, 697.3991088867188, 779.7304077148438, 196.16964721679688, 22.616592407226562, -684.533203125, 367.0080261230469, 1410.6993408203125, -57.440521240234375, -65.36227416992188, 2748.3203125, 504.23956298828125, -62.72269821166992, -418.3511657714844, 1089.8603515625, 597.6253662109375, 323.34429931640625, 458.66156005859375, 236.04470825195312, 1766.5040283203125, 23.442138671875, 103.98966979980469, 286.1490783691406, 39.3062858581543, 533.390625, 153.162353515625, 716.56982421875, -1.51373291015625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000623.npy"}
{"epoch": 0.9148311306901615, "step": 624, "batch_size": 64, "mean": 469.693115234375, "std": 584.0753784179688, "min": -1145.2103271484375, "p10": -233.06175842285154, "median": 530.0346984863281, "p90": 1202.64775390625, "max": 1479.192138671875, "pos_frac": 0.78125, "sample": [663.553466796875, -358.5225830078125, 705.3372802734375, 273.3526611328125, 525.7457885742188, 1099.203369140625, 1178.3475341796875, 1479.192138671875, 1400.257568359375, 210.86187744140625, 209.42483520507812, -19.47735595703125, 92.5770034790039, 1125.68212890625, 813.86669921875, -131.53338623046875, 534.3236083984375, 754.4354858398438, 1330.8985595703125, -1127.4422607421875, 52.72834777832031, 230.74131774902344, 17.69654083251953, 887.6783447265625, 421.8315124511719, 224.3680419921875, 905.168212890625, 861.3483276367188, 1050.8023681640625, -249.07516479492188, 165.34754943847656, 1074.0069580078125, -193.4366455078125, 330.3736572265625, -431.64593505859375, 918.9515380859375, 398.8304443359375, -79.42620849609375, 1098.6199951171875, 396.9486083984375, -1145.2103271484375, 75.66645050048828, -195.6971435546875, 768.5325317382812, 899.4935302734375, 1236.3701171875, 183.3577423095703, 820.9222412109375, 1036.8951416015625, 538.4970703125, 199.1023712158203, -327.305419921875, -17.27760124206543, 1213.0621337890625, 577.720458984375, 592.0477905273438, -337.9399108886719, 677.812255859375, -81.90504455566406, 677.882568359375, 1220.554443359375, 199.08612060546875, 1335.973388671875, 1070.77783203125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000624.npy"}
{"epoch": 0.9162995594713657, "step": 625, "batch_size": 64, "mean": 526.8627319335938, "std": 532.9713134765625, "min": -367.6235656738281, "p10": -19.52131958007811, "median": 429.1978454589844, "p90": 1164.4635864257814, "max": 2151.537841796875, "pos_frac": 0.875, "sample": [214.10983276367188, 181.31509399414062, -367.6235656738281, 959.4179077148438, 294.18463134765625, 34.67901611328125, 816.3515625, 1104.24658203125, 1098.19091796875, 980.2439575195312, 96.85964965820312, 718.224853515625, 279.62603759765625, 99.39604187011719, 1853.519287109375, 436.97454833984375, 261.8260498046875, 149.56385803222656, 2151.537841796875, 522.9698486328125, 1831.95556640625, 260.8705749511719, 58.809486389160156, 697.5151977539062, 357.8731994628906, 802.9771728515625, 1839.226318359375, -26.272171020507812, 341.52288818359375, 309.7112121582031, 508.85076904296875, 711.404296875, 497.1507873535156, -266.17523193359375, 498.9892578125, -103.54901123046875, 81.64054107666016, 636.6934814453125, 331.85113525390625, 765.9076538085938, 1104.19873046875, 463.793701171875, 1243.3695068359375, 370.6461181640625, 183.1067657470703, 421.421142578125, -65.21443176269531, 449.127685546875, 701.0884399414062, 724.9283447265625, 93.34534454345703, -3.7693328857421875, 936.9024658203125, 387.9072265625, 98.85462951660156, 210.04025268554688, 170.6897735595703, 471.7460021972656, 1190.2708740234375, -362.1981506347656, 660.9307250976562, -89.6846923828125, 1378.401123046875, 956.7455444335938], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000625.npy"}
{"epoch": 0.9177679882525698, "step": 626, "batch_size": 64, "mean": 290.5083923339844, "std": 666.8143920898438, "min": -2688.511474609375, "p10": -364.9424987792968, "median": 272.4510955810547, "p90": 914.7944091796876, "max": 2181.53369140625, "pos_frac": 0.703125, "sample": [570.7974853515625, 21.9014892578125, 498.55810546875, 388.52001953125, 101.64956665039062, 192.76499938964844, 698.840576171875, 286.9465026855469, 820.8557739257812, -2688.511474609375, 1373.2843017578125, 318.4709167480469, 1504.9964599609375, 2181.53369140625, -579.8904418945312, 164.98153686523438, 257.9556884765625, -416.1135559082031, -7.431640625, -135.94039916992188, -41.96197509765625, 203.14610290527344, 671.0281982421875, 116.64627838134766, -37.57164001464844, 46.19578552246094, 149.39187622070312, -630.9580688476562, 451.2763977050781, 685.9485473632812, -97.23875427246094, 630.7422485351562, -530.7861328125, -583.3296508789062, 614.6925048828125, 1180.0606689453125, 896.6151733398438, 410.5487365722656, 401.59051513671875, 759.9064331054688, 596.5613403320312, -245.54336547851562, 922.5855102539062, 440.39990234375, 1673.2647705078125, 738.5253295898438, -83.63127899169922, 61.916709899902344, 117.8861083984375, 375.87310791015625, -80.88330078125, 844.8372802734375, 320.2792053222656, 1367.4012451171875, -15.627792358398438, 201.4803924560547, 554.7571411132812, 446.129150390625, -716.0010986328125, -30.830123901367188, 138.8375701904297, -176.77041625976562, 293.89874267578125, -2.920940399169922], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000626.npy"}
{"epoch": 0.9192364170337739, "step": 627, "batch_size": 64, "mean": 588.1962280273438, "std": 878.016845703125, "min": -691.8932495117188, "p10": -203.00115966796872, "median": 376.98182678222656, "p90": 1515.878112792969, "max": 4498.9013671875, "pos_frac": 0.765625, "sample": [671.2762451171875, 264.1719970703125, 1186.549072265625, 382.7053527832031, -220.24942016601562, -131.3076934814453, 131.14129638671875, 675.27392578125, 540.7362670898438, 484.0128479003906, 1255.9320068359375, -487.2904357910156, 136.25732421875, 482.8941345214844, -94.79720306396484, 1059.843994140625, -162.75521850585938, 759.9929809570312, 894.3541259765625, 1032.5859375, -658.2284545898438, 53.51224136352539, 648.7969970703125, -20.097990036010742, 1804.36328125, 182.1802978515625, 379.9620666503906, 2579.50146484375, 1072.040771484375, 430.491943359375, 1947.630126953125, 70.93479919433594, 516.6555786132812, 1065.0478515625, 652.7194213867188, 1771.78076171875, 1444.630126953125, 201.10934448242188, 508.4844970703125, -134.91273498535156, 100.634765625, -55.38148498535156, 3307.70751953125, 1546.4129638671875, 878.7115478515625, -237.39840698242188, 4498.9013671875, -249.5076904296875, 364.3589782714844, -67.40646362304688, 1011.0557250976562, 323.9199523925781, 911.3106079101562, -691.8932495117188, 307.21484375, 221.31414794921875, 374.0015869140625, 192.79371643066406, -431.5438232421875, 1197.987548828125, -59.081520080566406, 233.98597717285156, 288.7474670410156, 299.78216552734375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000627.npy"}
{"epoch": 0.920704845814978, "step": 628, "batch_size": 64, "mean": 438.3399658203125, "std": 658.7118530273438, "min": -829.36669921875, "p10": -367.50811462402334, "median": 335.7954406738281, "p90": 1345.2792236328125, "max": 2271.4814453125, "pos_frac": 0.703125, "sample": [288.3643493652344, -16.68109130859375, 383.2265319824219, 1512.66357421875, 1507.809814453125, 158.34970092773438, 708.4002685546875, 200.56756591796875, -269.358642578125, -36.12762451171875, 164.46798706054688, 141.8450927734375, -829.36669921875, 1088.6575927734375, 1072.08154296875, 1209.792724609375, -99.96839904785156, -259.34332275390625, 1628.292724609375, 514.6292724609375, 2271.4814453125, 569.526611328125, 231.3643798828125, -214.572998046875, -42.058677673339844, -409.5721740722656, 460.02728271484375, 868.8385009765625, -458.30694580078125, -517.406005859375, 973.3355102539062, 226.11773681640625, 759.9747924804688, 1352.2061767578125, 1327.758056640625, 831.48876953125, 403.0558166503906, 692.7916870117188, 176.49729919433594, 148.091064453125, -126.8845443725586, 621.3603515625, 546.2125854492188, 95.04889678955078, 1456.2392578125, 1200.5206298828125, 1386.139404296875, 218.02587890625, -416.04705810546875, -810.32470703125, -88.54489135742188, -228.2444610595703, 1329.1163330078125, 851.5735473632812, -601.7533569335938, -20.530710220336914, 1058.39404296875, 442.49603271484375, 504.8879699707031, 138.6309814453125, 1159.498291015625, 657.0847778320312, -65.04609680175781, 26.960926055908203], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000628.npy"}
{"epoch": 0.922173274596182, "step": 629, "batch_size": 64, "mean": 415.13494873046875, "std": 551.8397827148438, "min": -1011.5088500976562, "p10": -238.66657714843748, "median": 447.0528106689453, "p90": 1228.9375732421877, "max": 1750.15380859375, "pos_frac": 0.8125, "sample": [215.51300048828125, 1267.80908203125, -343.7279052734375, -487.59588623046875, 484.5152587890625, 404.67578125, 338.81951904296875, 772.1957397460938, 1428.5562744140625, 472.57513427734375, 506.51824951171875, 975.783935546875, -140.31246948242188, 473.98944091796875, 1255.36328125, -111.34516143798828, 122.42366790771484, 32.703250885009766, 21.92331886291504, 578.4038696289062, 29.06829833984375, 1078.35693359375, 340.8400573730469, -216.67543029785156, 136.9813995361328, 533.4137573242188, -761.100830078125, -86.58277130126953, 1196.8472900390625, 746.7777099609375, 78.0594253540039, 788.8966674804688, 1417.239990234375, 1750.15380859375, 454.2940368652344, 155.61863708496094, -1011.5088500976562, 1258.02392578125, 1011.4163818359375, -248.0913543701172, -325.5155029296875, -654.3220825195312, 261.14593505859375, 439.81158447265625, 751.5635986328125, 1242.6905517578125, 286.9139709472656, 540.8026733398438, 630.2218017578125, 11.167257308959961, 988.137939453125, 277.09283447265625, 74.7869873046875, 712.63330078125, 226.56948852539062, 517.2218627929688, 65.51178741455078, 616.5645141601562, 957.7083740234375, 589.7212524414062, 397.58660888671875, 525.5504760742188, 518.9620361328125, -4.704986572265625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000629.npy"}
{"epoch": 0.9236417033773862, "step": 630, "batch_size": 64, "mean": 238.38504028320312, "std": 684.5905151367188, "min": -1337.765380859375, "p10": -413.39764099121095, "median": 154.04149627685547, "p90": 982.9143310546875, "max": 3268.58740234375, "pos_frac": 0.6875, "sample": [1055.1214599609375, -954.9808349609375, -499.840576171875, 413.79925537109375, -252.08859252929688, 701.1512451171875, 6.876518249511719, -264.94354248046875, 2425.89697265625, -232.77487182617188, 656.7181396484375, -144.68690490722656, 213.21084594726562, 978.98486328125, 690.6873779296875, -1337.765380859375, 148.66848754882812, -572.2791748046875, 297.13409423828125, 216.65545654296875, 468.4186706542969, -175.66078186035156, 138.06675720214844, 1441.0433349609375, -183.6750030517578, 3268.58740234375, 69.29368591308594, 984.598388671875, 114.21306610107422, -173.5729522705078, 36.469329833984375, -357.17987060546875, 278.1431884765625, -541.1090087890625, 361.6480712890625, -470.0132141113281, -147.17384338378906, 263.0555114746094, 150.52273559570312, 396.41168212890625, 268.37835693359375, -146.57904052734375, 371.5365905761719, -407.3197021484375, 160.86639404296875, 358.00677490234375, 95.31720733642578, 230.0443115234375, 678.7225341796875, -55.16968536376953, 112.69319152832031, 1146.56982421875, 689.2308959960938, 157.5602569580078, -310.4590759277344, 61.36695861816406, 365.51934814453125, 92.72538757324219, 606.7408447265625, -416.0024719238281, 1170.353515625, 191.49072265625, 340.15435791015625, 27.262937545776367], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000630.npy"}
{"epoch": 0.9251101321585903, "step": 631, "batch_size": 64, "mean": 372.4952087402344, "std": 657.3193359375, "min": -1788.5369873046875, "p10": -387.81620788574213, "median": 348.4291229248047, "p90": 1124.7512939453127, "max": 2076.857666015625, "pos_frac": 0.71875, "sample": [1173.14208984375, 1001.4219360351562, -356.9875183105469, 280.22705078125, -401.02850341796875, 503.1439208984375, -247.12948608398438, -134.52407836914062, -228.08944702148438, -496.71405029296875, 1677.17919921875, 1269.1319580078125, -483.1910705566406, 492.2913513183594, -585.0531005859375, 180.5398712158203, 536.7420654296875, 821.749267578125, 54.473487854003906, -258.4408264160156, 163.38687133789062, -26.55433464050293, 290.08099365234375, -694.3045043945312, 1034.084228515625, 6.206211090087891, 41.431007385253906, -71.30119323730469, 843.1310424804688, 748.5931396484375, -1788.5369873046875, 462.5038757324219, 2076.857666015625, 791.7523803710938, 370.34820556640625, 654.3690795898438, 284.2302551269531, -159.55809020996094, -197.51998901367188, 1133.049560546875, 311.4350891113281, 575.8426513671875, 1439.9417724609375, 940.9017944335938, 626.1392822265625, -833.3969116210938, 1105.388671875, 371.12255859375, 1445.03857421875, 529.7916870117188, 104.57042694091797, 725.2485961914062, -195.53265380859375, 450.3554382324219, 227.15423583984375, 170.78070068359375, 1093.6865234375, 669.441650390625, 1001.3671875, -85.37313079833984, 265.50531005859375, 1016.6685791015625, 326.5100402832031, 795.971435546875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000631.npy"}
{"epoch": 0.9265785609397944, "step": 632, "batch_size": 64, "mean": 382.27484130859375, "std": 642.3773803710938, "min": -1260.994140625, "p10": -342.402212524414, "median": 390.0014343261719, "p90": 1124.4259521484375, "max": 2411.0390625, "pos_frac": 0.734375, "sample": [692.1393432617188, -750.5953369140625, 191.0650634765625, 342.4601745605469, 657.1784057617188, -1260.994140625, 1182.0482177734375, 1130.6959228515625, 1277.8404541015625, 977.26318359375, 766.94921875, 672.3491821289062, 167.9765625, 883.430908203125, 230.04812622070312, 212.0106201171875, 545.69091796875, 396.6993103027344, 63.53251266479492, -8.397666931152344, 1583.8818359375, 383.3035583496094, 598.9642333984375, 460.69561767578125, -514.2095947265625, 1083.57763671875, 598.6171875, -288.2867736816406, 452.9103698730469, -222.83343505859375, -1191.790283203125, 374.4462585449219, 929.2848510742188, 1091.3118896484375, 595.4722900390625, 781.6921997070312, 944.5693359375, 713.4291381835938, 466.9770812988281, -240.61129760742188, 1238.977783203125, -191.9857177734375, -224.9395751953125, -107.35357666015625, 596.8916625976562, 328.54766845703125, 608.6587524414062, 211.25787353515625, 1066.201416015625, 2411.0390625, 1109.7960205078125, -117.70040893554688, -426.95745849609375, 93.42798614501953, 124.76206970214844, -365.59454345703125, 406.0765686035156, 353.2760314941406, -635.5884399414062, 17.67755126953125, 84.78599548339844, -198.74745178222656, -94.48259735107422, 1206.7711181640625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000632.npy"}
{"epoch": 0.9280469897209985, "step": 633, "batch_size": 64, "mean": 277.3817443847656, "std": 578.2500610351562, "min": -1096.9801025390625, "p10": -379.38096923828124, "median": 233.60167694091797, "p90": 1135.6412231445313, "max": 1563.8419189453125, "pos_frac": 0.703125, "sample": [-299.5200500488281, 1201.0897216796875, 764.1614379882812, -758.5336303710938, 273.0623779296875, 380.74163818359375, -402.13043212890625, -58.928680419921875, 1146.302978515625, 57.12352752685547, 236.39944458007812, 171.44866943359375, 397.610107421875, -958.99853515625, 560.2134399414062, 82.55921173095703, 217.08236694335938, 1185.26416015625, 373.8747863769531, -312.8975524902344, -130.7386474609375, 336.1227722167969, 304.6241455078125, -37.332664489746094, 730.333740234375, 210.47732543945312, -43.90617370605469, -71.82256317138672, -814.3739013671875, 969.2703247070312, 343.5072021484375, 217.21408081054688, 1176.888427734375, 434.9099426269531, 791.5216674804688, -134.40199279785156, -386.05963134765625, -1096.9801025390625, -363.79742431640625, 70.49085235595703, 1470.507568359375, 484.4792785644531, 121.40985107421875, -791.985107421875, 565.1571044921875, 583.3543701171875, 563.7162475585938, 1563.8419189453125, 975.49072265625, 1110.7637939453125, 122.70626831054688, 295.5877990722656, -237.0438690185547, 980.4097290039062, 348.2745361328125, -98.26707458496094, 230.8039093017578, 406.2109375, 1407.19873046875, 86.0128173828125, 557.1184692382812, -46.425262451171875, 66.51233673095703, 224.72483825683594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000633.npy"}
{"epoch": 0.9295154185022027, "step": 634, "batch_size": 64, "mean": 260.56390380859375, "std": 592.4265747070312, "min": -930.4292602539062, "p10": -342.2693481445312, "median": 184.64227294921875, "p90": 1017.3833435058597, "max": 2213.41357421875, "pos_frac": 0.65625, "sample": [234.58021545410156, 909.3756103515625, -307.1315612792969, 1850.5108642578125, -606.0220336914062, -60.741455078125, 27.78681755065918, 309.0683288574219, 292.8017272949219, 1346.9359130859375, 37.97393798828125, 436.32958984375, -71.93390655517578, 328.36767578125, 679.8418579101562, 203.56881713867188, 324.06085205078125, -297.80255126953125, -518.2742919921875, 917.125732421875, 1090.9598388671875, -228.7944793701172, -44.81556701660156, 101.6982421875, -44.031280517578125, 34.762229919433594, 165.71572875976562, 672.8818969726562, 526.1834716796875, 44.50728225708008, 2213.41357421875, 341.3664245605469, 529.133056640625, -836.1072387695312, 1051.4232177734375, 1121.0291748046875, 596.5743408203125, -307.14459228515625, -357.32281494140625, 129.204345703125, 825.4195556640625, -209.57969665527344, -104.67371368408203, -930.4292602539062, 246.7627410888672, -520.3297119140625, 73.91148376464844, -548.58935546875, -104.36644744873047, 937.9569702148438, -297.08526611328125, 772.6180419921875, 384.783447265625, 1334.2655029296875, 384.0478210449219, 123.52165222167969, 259.5402526855469, 692.83349609375, -82.98151397705078, 237.83981323242188, 34.28399658203125, -27.197799682617188, -255.08140563964844, 611.56103515625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000634.npy"}
{"epoch": 0.9309838472834068, "step": 635, "batch_size": 64, "mean": 352.7930603027344, "std": 629.8380126953125, "min": -953.2166748046875, "p10": -285.5994735717773, "median": 321.0361328125, "p90": 977.0861083984377, "max": 2416.47412109375, "pos_frac": 0.6875, "sample": [949.9344482421875, 112.14137268066406, 2416.47412109375, -100.2470703125, 438.4612731933594, -79.72176361083984, 167.32489013671875, -44.3870735168457, -10.00286865234375, 468.06890869140625, 60.19739532470703, 1554.6171875, -17.497665405273438, 746.7066650390625, -544.3375854492188, 357.53741455078125, 528.10400390625, -32.517364501953125, 886.2281494140625, 28.353164672851562, 349.56402587890625, 200.79319763183594, 706.43359375, 919.1700439453125, 316.8038330078125, 62.376686096191406, 9.348617553710938, 374.6093444824219, -249.29505920410156, 1731.1300048828125, -40.29261016845703, 377.94525146484375, 325.2684326171875, 1273.6461181640625, 731.7727661132812, -155.3282470703125, 134.7398681640625, -16.847156524658203, 410.0358581542969, 988.7225341796875, -170.2198028564453, 888.93701171875, 919.8178100585938, 417.9314270019531, -661.893798828125, -70.23921966552734, 600.5361938476562, -49.407264709472656, -301.15850830078125, 547.577392578125, -496.42193603515625, -314.10882568359375, -953.2166748046875, 722.3723754882812, 1210.2532958984375, 315.66082763671875, 343.2125549316406, 603.3450927734375, -897.326171875, 2236.92138671875, 101.38558197021484, 594.2940673828125, 432.66424560546875, 221.8028106689453], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000635.npy"}
{"epoch": 0.9324522760646109, "step": 636, "batch_size": 64, "mean": 414.892333984375, "std": 648.4181518554688, "min": -1002.7171020507812, "p10": -235.92136383056638, "median": 336.9044494628906, "p90": 1011.2282897949219, "max": 3575.054931640625, "pos_frac": 0.78125, "sample": [44.18757629394531, -457.54473876953125, 87.28598022460938, -170.17068481445312, 388.3312072753906, -425.4711608886719, 340.44793701171875, 321.67535400390625, 353.836181640625, 26.572105407714844, 810.9464111328125, -339.3857116699219, 328.796142578125, 1150.3172607421875, 15.780815124511719, 1022.5027465820312, -81.084716796875, 2153.560791015625, 1117.6630859375, 445.65191650390625, 342.3426513671875, 993.2549438476562, -49.698638916015625, -311.35009765625, 582.1253662109375, 356.7884826660156, 1018.93115234375, 288.58343505859375, 228.31748962402344, 3575.054931640625, 33.45872497558594, -218.2707061767578, 934.4601440429688, 1258.5670166015625, -243.48593139648438, 934.5814208984375, 989.8072509765625, 387.2625427246094, 520.4335327148438, -204.19873046875, 563.862060546875, 186.85960388183594, 760.4194946289062, 840.8768310546875, 631.8441162109375, 333.3609619140625, 781.3511962890625, 58.928497314453125, 318.63580322265625, -179.852294921875, 178.07762145996094, 12.936676025390625, -1002.7171020507812, 443.32763671875, 278.4703674316406, -260.13287353515625, 991.34033203125, 760.4415283203125, 245.74644470214844, 421.5760498046875, -8.451583862304688, 277.96600341796875, 501.4252014160156, 865.95263671875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000636.npy"}
{"epoch": 0.933920704845815, "step": 637, "batch_size": 64, "mean": 449.6193542480469, "std": 611.1025390625, "min": -978.3908081054688, "p10": -92.07102890014649, "median": 290.3992919921875, "p90": 1289.9816650390626, "max": 2279.382568359375, "pos_frac": 0.828125, "sample": [130.39785766601562, 166.07644653320312, 640.48876953125, 3.8508834838867188, 35.87668228149414, 975.8934326171875, 304.4056396484375, -30.648984909057617, 697.22509765625, 1276.52294921875, 725.9370727539062, -978.3908081054688, 93.74362182617188, 254.30581665039062, -199.2446746826172, 624.8124389648438, 819.7265625, 1285.2645263671875, 990.3136596679688, 633.6932983398438, 1347.628173828125, 276.3929443359375, 206.03961181640625, 2279.382568359375, 149.44265747070312, 507.625732421875, -274.3496398925781, 12.77364730834961, 261.4535827636719, -93.02786254882812, 248.78555297851562, -118.78213500976562, -143.2894287109375, 572.8870849609375, 448.8595275878906, -79.15159606933594, 351.26641845703125, 663.7069702148438, 754.8095703125, 195.88885498046875, 519.1843872070312, 357.87396240234375, 260.6640319824219, 1070.877685546875, 565.1490478515625, 576.5241088867188, 1292.0032958984375, 31.05707550048828, 1682.5821533203125, -875.9793701171875, 1187.1201171875, 15.490217208862305, 311.1522216796875, 1428.965087890625, 463.5863037109375, 109.09843444824219, 47.15150451660156, 1500.1107177734375, -63.97303771972656, 2136.114990234375, 14.645496368408203, 101.12730407714844, 116.35964965820312, -89.83841705322266], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000637.npy"}
{"epoch": 0.9353891336270191, "step": 638, "batch_size": 64, "mean": 365.7836608886719, "std": 686.160400390625, "min": -3256.06005859375, "p10": -142.25271682739256, "median": 280.29835510253906, "p90": 1175.070642089844, "max": 1728.736328125, "pos_frac": 0.796875, "sample": [1178.910400390625, -638.2326049804688, -3256.06005859375, 924.7012939453125, 256.13165283203125, 120.54052734375, 41.810028076171875, 78.4801025390625, 1166.1112060546875, 409.49432373046875, 714.137939453125, 1408.6419677734375, 180.87278747558594, 712.8438110351562, 285.1597900390625, 528.194091796875, 519.4395141601562, -470.4096374511719, 501.89630126953125, 47.52339553833008, 426.6069641113281, 44.10234832763672, -285.98028564453125, 184.52633666992188, 362.5460510253906, 883.2557373046875, 1348.95751953125, -50.76442337036133, 609.9532470703125, 266.3601989746094, 847.8201293945312, 270.28955078125, 700.3627319335938, -149.63381958007812, -186.3655242919922, 777.63134765625, 813.62939453125, -50.9469108581543, 275.4369201660156, -285.1940612792969, 19.601335525512695, 432.4549865722656, 305.9193420410156, 1728.736328125, 72.90589141845703, -22.15752601623535, 950.107177734375, 108.37095642089844, 626.0424194335938, 1093.7147216796875, -54.48779296875, -104.8243408203125, 978.3447265625, 448.2786865234375, 133.484619140625, 101.02774047851562, 130.61227416992188, 76.78665924072266, 823.317138671875, 1206.2010498046875, -125.03014373779297, 60.75444793701172, 1631.483154296875, 1275.7301025390625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000638.npy"}
{"epoch": 0.9368575624082232, "step": 639, "batch_size": 64, "mean": 158.20431518554688, "std": 680.5521850585938, "min": -1329.8719482421875, "p10": -725.886749267578, "median": 147.1580352783203, "p90": 1045.822351074219, "max": 1724.9517822265625, "pos_frac": 0.640625, "sample": [1313.528564453125, -838.7444458007812, -384.17230224609375, 1285.3759765625, 885.2864990234375, -412.53509521484375, 253.89804077148438, 369.5046081542969, -583.9093017578125, 823.1251831054688, 1496.239013671875, -34.86638259887695, 415.8990173339844, 577.0513916015625, -909.9487915039062, 224.61911010742188, -424.561279296875, -414.29119873046875, 93.65585327148438, 400.9187316894531, 611.3983764648438, -1281.9852294921875, 833.0906372070312, 594.1694946289062, -546.8154296875, -1329.8719482421875, -1106.9295654296875, 304.4040222167969, 104.3688735961914, 44.99226379394531, 571.8933715820312, 543.5918579101562, 37.95220184326172, -619.6622924804688, 1068.5413818359375, 1181.7164306640625, -378.67608642578125, 885.4807739257812, -385.7532958984375, -79.2796401977539, 44.845848083496094, 15.777801513671875, 643.6478881835938, 374.47979736328125, 1724.9517822265625, -834.0711669921875, -27.751625061035156, 4.340171813964844, -298.90362548828125, -367.98956298828125, 158.64649963378906, 856.8240356445312, 275.82379150390625, 992.811279296875, -641.9343872070312, -367.2136535644531, -761.8663330078125, 118.29957580566406, 135.66957092285156, 637.1841430664062, 404.55999755859375, 1122.87255859375, 557.9285278320312, 167.44479370117188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000639.npy"}
{"epoch": 0.9383259911894273, "step": 640, "batch_size": 64, "mean": 561.17333984375, "std": 764.9519653320312, "min": -928.1238403320312, "p10": -186.7608673095703, "median": 437.7881164550781, "p90": 1391.1234252929687, "max": 2931.78271484375, "pos_frac": 0.78125, "sample": [754.3726196289062, 571.193359375, -456.77813720703125, -94.95799255371094, 104.5871353149414, 355.6851501464844, 256.6580810546875, 1367.0701904296875, 1300.9786376953125, 192.122314453125, 1393.04833984375, 1765.7633056640625, 967.111572265625, 145.21438598632812, -223.8740692138672, 489.8113098144531, 878.4544067382812, -87.989501953125, 29.997604370117188, 1386.6319580078125, -833.7437744140625, 979.9503173828125, 391.00299072265625, 788.2706298828125, -164.93045043945312, -928.1238403320312, 99.45455932617188, 414.78753662109375, 2931.78271484375, 809.5339965820312, 586.788330078125, 1017.5432739257812, -598.6994018554688, 615.552978515625, 2490.018310546875, 299.20208740234375, -196.11676025390625, 252.6153106689453, 60.04473876953125, 460.7886962890625, -36.19178771972656, 263.4287109375, 0.2881927490234375, 1061.0609130859375, 1054.8631591796875, 207.35726928710938, 950.6071166992188, 654.8116455078125, -503.1118469238281, 1346.8502197265625, 2207.50634765625, 343.5957946777344, -68.82504272460938, 1045.954345703125, -63.43775939941406, 623.0729370117188, 480.426025390625, 915.820068359375, 14.472854614257812, 803.9810180664062, 1449.706787109375, 2469.844482421875, -91.11631774902344, 213.30734252929688], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000640.npy"}
{"epoch": 0.9397944199706314, "step": 641, "batch_size": 64, "mean": 356.50567626953125, "std": 701.9379272460938, "min": -1166.4652099609375, "p10": -494.84984741210934, "median": 333.7306213378906, "p90": 1000.3296875000001, "max": 2981.018310546875, "pos_frac": 0.75, "sample": [160.79794311523438, 279.21173095703125, 534.2247924804688, 1010.0236206054688, -509.7405090332031, -117.62277221679688, 260.714111328125, 1463.3907470703125, 521.75146484375, 118.01863098144531, -979.14990234375, 509.9044494628906, 16.805068969726562, -449.6033935546875, 2981.018310546875, -506.51470947265625, -456.7192077636719, -467.6318359375, 26.300888061523438, 906.453857421875, -155.01370239257812, 80.0499038696289, 96.58123779296875, 683.5187377929688, 262.4077453613281, 626.1175537109375, -1070.9593505859375, 779.6983032226562, 590.353515625, 146.3087615966797, 805.86083984375, 537.731201171875, 689.39892578125, 86.471435546875, 163.9727783203125, 24.51321029663086, 1337.7125244140625, 373.044677734375, 642.446533203125, 611.5261840820312, 105.21857452392578, -34.77545166015625, -74.5855484008789, -602.6480712890625, 222.24847412109375, 769.48583984375, -1166.4652099609375, 977.7105102539062, 960.5169067382812, 969.4862060546875, 2097.250732421875, 739.0327758789062, -894.3753051757812, 676.5882568359375, -1.2471694946289062, 357.0839538574219, 609.5140380859375, -269.88702392578125, 518.4096069335938, 310.3772888183594, 842.500732421875, 1274.213134765625, 692.85205078125, 1124.4833984375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000641.npy"}
{"epoch": 0.9412628487518355, "step": 642, "batch_size": 64, "mean": 441.9483642578125, "std": 888.4047241210938, "min": -1534.0328369140625, "p10": -383.34586791992183, "median": 354.212646484375, "p90": 1306.9913085937505, "max": 4824.23779296875, "pos_frac": 0.734375, "sample": [97.01930236816406, 528.1009521484375, 757.2469482421875, 662.7727661132812, 209.34815979003906, -1534.0328369140625, 806.2802734375, 268.2866516113281, -67.5508804321289, -499.7584533691406, -105.17505645751953, 93.06283569335938, 1358.521728515625, -394.3726806640625, 768.86962890625, 985.1082153320312, 456.75201416015625, 814.1693725585938, -921.0610961914062, 91.36795043945312, 276.4516296386719, -82.75274658203125, 426.9288330078125, 14.786420822143555, 384.564697265625, 717.5232543945312, -70.1854019165039, 250.0267333984375, 91.59788513183594, 444.3188171386719, 863.686279296875, 614.144287109375, 765.9202270507812, 2399.538818359375, -305.5292663574219, 83.44558715820312, 323.860595703125, 663.392578125, -536.8076171875, 180.31289672851562, 845.9376831054688, 183.16709899902344, 482.5125427246094, 1401.54931640625, -357.61663818359375, 1186.753662109375, 752.3440551757812, 1628.8822021484375, -184.2388458251953, -44.5167236328125, 4824.23779296875, 889.3688354492188, -619.41845703125, 409.5784912109375, -113.76609802246094, -170.634521484375, 392.6728515625, 2467.468994140625, 61.182098388671875, -934.5322265625, 509.620361328125, 1654.7095947265625, 229.8385009765625, 909.4138793945312], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000642.npy"}
{"epoch": 0.9427312775330396, "step": 643, "batch_size": 64, "mean": 343.14263916015625, "std": 537.0115356445312, "min": -723.36474609375, "p10": -342.707192993164, "median": 323.5500030517578, "p90": 939.8549133300783, "max": 1847.2142333984375, "pos_frac": 0.765625, "sample": [347.1217346191406, 250.90415954589844, 150.14642333984375, 839.2304077148438, 843.7949829101562, 608.9361572265625, 568.2978515625, -5.430168151855469, 337.61181640625, 859.3510131835938, 14.755126953125, -582.1231689453125, 294.7516174316406, 310.4554748535156, -188.1741485595703, 850.038330078125, 914.3361206054688, 949.2970581054688, 1080.7904052734375, 488.6521911621094, 274.48388671875, 39.500091552734375, -272.7654113769531, 143.29031372070312, 95.60279846191406, 194.40228271484375, -723.36474609375, 696.3463134765625, 467.6176452636719, 908.888427734375, -329.80267333984375, 274.2325134277344, 379.2979736328125, 204.3963623046875, 917.8232421875, 1243.080322265625, 374.4848327636719, -407.04791259765625, 627.59521484375, 366.0407409667969, -599.7833862304688, 1847.2142333984375, 1052.89306640625, 336.64453125, 657.6764526367188, 129.8343505859375, 2.8968448638916016, 424.25152587890625, 1819.4649658203125, 51.37099838256836, 832.1563720703125, 717.5551147460938, 736.6427612304688, -278.8077087402344, 986.79638671875, 42.41791534423828, -443.94378662109375, -348.2377014160156, 393.9234924316406, -50.231781005859375, -209.0474853515625, -162.01449584960938, -489.8772888183594, 104.48729705810547], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000643.npy"}
{"epoch": 0.9441997063142438, "step": 644, "batch_size": 64, "mean": 351.42352294921875, "std": 724.2918701171875, "min": -1853.4688720703125, "p10": -498.55739135742186, "median": 244.42681884765625, "p90": 1174.8581176757814, "max": 2460.921142578125, "pos_frac": 0.734375, "sample": [2460.921142578125, 8.369043350219727, 856.4048461914062, -1853.4688720703125, 483.6162109375, 341.2413635253906, -663.5821533203125, 132.38343811035156, 810.49951171875, 360.1605224609375, 42.48985290527344, -106.83903503417969, 66.49323272705078, 430.46405029296875, 145.4982452392578, 580.8087768554688, 199.54498291015625, -40.3157958984375, 319.8346862792969, -496.5423278808594, 155.84024047851562, 1965.9630126953125, 70.12144470214844, -695.1489868164062, 772.4378051757812, 928.8302001953125, -2.1013259887695312, 464.20855712890625, -293.83746337890625, 1671.5281982421875, -70.87694549560547, -383.5440368652344, 688.571533203125, -58.92735290527344, 40.63853454589844, -10.117437362670898, 1187.6605224609375, 706.7593994140625, 658.7100219726562, 289.30865478515625, 144.16122436523438, -577.372802734375, 536.37255859375, 65.7943344116211, 1124.0325927734375, 533.712890625, -527.4901123046875, -215.81927490234375, 125.64112854003906, 1004.062744140625, 1627.5963134765625, 811.9615478515625, 115.66780090332031, -993.3098754882812, 982.4824829101562, 1144.98583984375, 296.64306640625, 1348.2230224609375, 28.681365966796875, 507.9846496582031, 629.0026245117188, 1935.3614501953125, 178.14678955078125, -499.4209899902344], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000644.npy"}
{"epoch": 0.9456681350954479, "step": 645, "batch_size": 64, "mean": 175.01731872558594, "std": 659.5081787109375, "min": -1298.829833984375, "p10": -631.1125610351562, "median": 100.1280517578125, "p90": 1027.2443725585938, "max": 2012.3572998046875, "pos_frac": 0.578125, "sample": [357.2342834472656, 1019.3172607421875, -617.9749755859375, 847.3087768554688, -1298.829833984375, -633.9696655273438, -640.321533203125, 49.4036865234375, 404.5758972167969, -122.15293884277344, -82.95814514160156, -20.36395263671875, 53.755821228027344, -7.079381942749023, -98.91903686523438, -378.531494140625, 444.0369873046875, 292.5385437011719, 1024.8170166015625, 1077.294189453125, 617.8828735351562, -453.17138671875, 97.0469970703125, -14.759933471679688, 103.44447326660156, 15.927532196044922, 1028.28466796875, -45.616615295410156, -624.4459838867188, -1003.2206420898438, 193.01785278320312, 419.45245361328125, -726.044921875, 1356.6207275390625, 825.5394287109375, -24.849090576171875, 1830.43212890625, 756.6695556640625, -1136.6944580078125, -597.02880859375, 1339.45068359375, -44.542816162109375, 103.2091064453125, 277.8437805175781, 590.3388671875, 524.6203002929688, 364.39752197265625, 323.1175231933594, 114.01844024658203, 554.0120239257812, -8.80938720703125, -258.57940673828125, 1277.5638427734375, 25.62163543701172, 422.106201171875, 2012.3572998046875, -165.2030029296875, 281.498046875, -883.1807861328125, -66.8412857055664, 498.1268310546875, 115.88186645507812, -111.05235290527344, -372.5146484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000645.npy"}
{"epoch": 0.947136563876652, "step": 646, "batch_size": 64, "mean": 299.66253662109375, "std": 565.7901611328125, "min": -1046.0662841796875, "p10": -358.7328124999999, "median": 218.15110778808594, "p90": 1009.9985107421876, "max": 1971.3475341796875, "pos_frac": 0.734375, "sample": [976.391845703125, 938.7107543945312, -31.03827476501465, 1529.91455078125, 600.204833984375, 175.39137268066406, 641.6393432617188, 651.2796020507812, 892.678466796875, 23.774978637695312, 318.4888610839844, 471.427001953125, -833.209716796875, -27.278289794921875, 1024.4013671875, 162.28382873535156, -465.8937683105469, 274.74029541015625, 972.8084106445312, -62.66362762451172, 360.58917236328125, -151.01513671875, -150.56671142578125, 532.4141845703125, 85.75375366210938, 339.099365234375, 413.34271240234375, 225.4787139892578, 55.93359375, 842.7647094726562, 98.95233154296875, 393.1384582519531, 86.13217163085938, 231.99205017089844, -162.1824493408203, 210.82350158691406, 41.63348388671875, 1601.51708984375, -272.2833251953125, -1046.0662841796875, 466.389892578125, 586.9095458984375, 134.57313537597656, -395.7825927734375, -102.65794372558594, -557.5196533203125, -89.97727966308594, 63.448814392089844, -430.2786865234375, 1971.3475341796875, 1088.910888671875, 252.01593017578125, 621.0103149414062, 84.92619323730469, 1263.6263427734375, -225.01429748535156, -603.076171875, 77.51888275146484, 142.5797576904297, 118.9849624633789, 1081.288818359375, 935.4690551757812, 427.956787109375, 294.2488708496094], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000646.npy"}
{"epoch": 0.9486049926578561, "step": 647, "batch_size": 64, "mean": 376.28173828125, "std": 529.1831665039062, "min": -1044.6282958984375, "p10": -165.76641540527342, "median": 340.0340576171875, "p90": 1036.2400146484374, "max": 1969.6241455078125, "pos_frac": 0.78125, "sample": [771.55029296875, 587.1267700195312, 302.8271484375, 237.04591369628906, 274.9012145996094, 1159.206298828125, 299.6820983886719, 357.5486145019531, 548.8344116210938, 531.2360229492188, 1539.8382568359375, 79.43240356445312, 43.530059814453125, 1741.4732666015625, 713.2796630859375, 480.523681640625, -90.63890075683594, 1055.1282958984375, 639.4127197265625, -156.82315063476562, 206.24154663085938, 179.54058837890625, 70.70921325683594, -149.43788146972656, 634.801025390625, 322.1142578125, -72.54380798339844, 1969.6241455078125, -814.9738159179688, 822.7257690429688, -231.40045166015625, -267.450439453125, 103.94039916992188, 744.4016723632812, 987.1204223632812, 713.5398559570312, 443.305908203125, 411.27459716796875, -580.2643432617188, 258.63037109375, 560.4560546875, 132.644775390625, 1036.45849609375, -169.5992431640625, -51.653621673583984, 400.210693359375, 415.4840087890625, 426.40447998046875, -2.2080307006835938, 1134.688720703125, 166.1620330810547, 96.8271713256836, 1035.730224609375, 303.005859375, -28.318130493164062, 551.6912841796875, -1044.6282958984375, 808.636962890625, -273.59002685546875, 352.12420654296875, 453.3003234863281, 327.94390869140625, 386.9196472167969, 196.32473754882812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000647.npy"}
{"epoch": 0.9500734214390602, "step": 648, "batch_size": 64, "mean": 573.0113525390625, "std": 761.2134399414062, "min": -977.2382202148438, "p10": -197.3195037841797, "median": 465.2696838378906, "p90": 1561.6725585937506, "max": 3535.578125, "pos_frac": 0.765625, "sample": [499.7170104980469, 263.19049072265625, 213.77841186523438, 540.4767456054688, 342.99176025390625, 650.4082641601562, 1926.5565185546875, 1641.1658935546875, 676.2152099609375, 1326.6741943359375, 89.71463012695312, -608.9654541015625, 166.90115356445312, 945.9666137695312, 448.338623046875, -148.2788848876953, 1290.8253173828125, 690.1552124023438, -4.5208282470703125, -194.23681640625, -29.18525505065918, 471.6885070800781, -20.25922203063965, 1015.486328125, -231.7582244873047, 1777.2821044921875, 645.3438110351562, 106.86088562011719, -31.515548706054688, -160.52459716796875, 814.726318359375, 849.1474609375, -198.64065551757812, 3535.578125, 650.9221801757812, 431.91455078125, -977.2382202148438, 578.453125, 141.34014892578125, -685.6121826171875, -243.2498779296875, 303.585205078125, 2295.402099609375, 726.1011352539062, 686.349609375, 439.0501403808594, 1258.87548828125, -66.59405517578125, 614.7490844726562, 676.9224853515625, 1054.809326171875, 361.8047180175781, 355.0747985839844, 558.8428344726562, 1235.60888671875, 131.76547241210938, 458.8508605957031, 748.348388671875, 392.60540771484375, 2233.345458984375, 1376.1881103515625, 264.9394226074219, -379.3087463378906, 1747.577392578125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000648.npy"}
{"epoch": 0.9515418502202643, "step": 649, "batch_size": 64, "mean": 268.9690856933594, "std": 651.1288452148438, "min": -834.8048095703125, "p10": -455.6562133789062, "median": 235.96407318115234, "p90": 924.3490539550781, "max": 3146.726806640625, "pos_frac": 0.609375, "sample": [454.0421142578125, 25.90839385986328, 402.8141174316406, -120.50970458984375, 632.6165771484375, -282.8569030761719, -54.81667709350586, 186.09100341796875, 722.024169921875, 319.6383056640625, 560.4243774414062, -135.105224609375, -529.0042114257812, 60.34861755371094, -557.8533935546875, -185.92404174804688, -128.3430633544922, 423.7997741699219, -290.3868713378906, 279.9761047363281, 568.7677001953125, 214.2870330810547, -551.09326171875, -180.1239776611328, 906.669921875, -768.6732788085938, 738.1129760742188, 923.6049194335938, 8.230182647705078, 576.2935180664062, 346.5594177246094, -349.03961181640625, 58.82769775390625, -76.21138763427734, 894.6036376953125, 554.3671875, 806.414794921875, 406.59844970703125, 765.8644409179688, -433.0146484375, 1169.60498046875, 402.3209228515625, -6.111270904541016, 1101.3087158203125, 753.0697021484375, -213.09573364257812, 1435.7845458984375, 3146.726806640625, 415.497314453125, 343.041015625, -113.26444244384766, 257.64111328125, 23.494766235351562, 843.9743041992188, -341.7607421875, 1243.416015625, 1247.576416015625, -465.3597412109375, -47.7595100402832, -123.98291778564453, -748.8121337890625, -834.8048095703125, 924.66796875, -393.08050537109375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000649.npy"}
{"epoch": 0.9530102790014684, "step": 650, "batch_size": 64, "mean": 419.9454345703125, "std": 687.3458862304688, "min": -1141.910400390625, "p10": -434.7271606445312, "median": 346.8834686279297, "p90": 1200.778015136719, "max": 2199.311279296875, "pos_frac": 0.765625, "sample": [79.46936798095703, -544.785400390625, 764.740234375, 816.3489379882812, 213.11398315429688, 1037.00341796875, 292.0593566894531, -881.289306640625, 2199.311279296875, 593.120849609375, 1021.2299194335938, -652.0293579101562, 1361.5758056640625, 1006.8744506835938, 874.5537719726562, 1147.740966796875, 331.673583984375, 1549.1419677734375, 1070.9434814453125, 2187.149658203125, 834.0693969726562, -138.6224822998047, 1589.344482421875, 531.4661865234375, 275.4721374511719, 45.4718132019043, -77.42938232421875, 470.9978942871094, 158.50662231445312, 454.9624328613281, 658.6456298828125, -80.38185119628906, -334.9857177734375, -896.8990478515625, -361.0693359375, 241.92337036132812, -40.27042770385742, 1954.149658203125, -108.87071228027344, 64.230712890625, 531.6150512695312, 354.4084777832031, -453.8600158691406, 590.3381958007812, 643.3798828125, 563.607666015625, 66.82362365722656, 793.1845703125, 114.78498077392578, 1223.5081787109375, 293.7322692871094, 339.35845947265625, 556.00830078125, 872.7377319335938, 782.7803955078125, 134.3616943359375, 439.12335205078125, -547.2074584960938, -1141.910400390625, 236.08221435546875, 821.251220703125, 80.36127471923828, -390.0838317871094, 263.4640197753906], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000650.npy"}
{"epoch": 0.9544787077826725, "step": 651, "batch_size": 64, "mean": 535.0650634765625, "std": 584.499755859375, "min": -411.5257568359375, "p10": -31.623396301269526, "median": 437.83465576171875, "p90": 1293.09287109375, "max": 2131.12158203125, "pos_frac": 0.84375, "sample": [883.3671264648438, 560.3670043945312, 2095.4765625, 239.572265625, -348.5172119140625, -16.717529296875, 378.3096923828125, 600.0703125, 63.20970153808594, 145.39356994628906, 1302.0018310546875, 2131.12158203125, 320.9993591308594, -64.17559051513672, 45.0697021484375, -33.379478454589844, 61.63461685180664, 477.21478271484375, -239.98580932617188, 831.8124389648438, 1673.6397705078125, 339.62933349609375, 833.4229125976562, 314.868896484375, 672.4921264648438, 55.05743408203125, -19.790311813354492, -77.935546875, 183.462158203125, 658.40576171875, 175.19178771972656, -27.52587127685547, 959.829345703125, 50.2081298828125, 1257.216796875, 913.1997680664062, 567.8753051757812, 28.962953567504883, 764.785400390625, 3.726177215576172, 438.72015380859375, 1109.199951171875, -339.6855163574219, 485.8172302246094, 624.48193359375, 662.93896484375, 31.430694580078125, 868.3294067382812, 51.53947067260742, 604.954345703125, 286.84002685546875, 338.1643981933594, 1272.3052978515625, 1811.1654052734375, 436.94915771484375, -411.5257568359375, 1132.1046142578125, 401.7684326171875, 1701.0355224609375, 480.7572326660156, 1066.696044921875, 1429.2147216796875, 898.688232421875, 102.70648193359375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000651.npy"}
{"epoch": 0.9559471365638766, "step": 652, "batch_size": 64, "mean": 408.88043212890625, "std": 691.3773193359375, "min": -1759.1400146484375, "p10": -268.04411621093743, "median": 295.58375549316406, "p90": 1306.3638671875003, "max": 2550.3115234375, "pos_frac": 0.796875, "sample": [285.569091796875, 108.91537475585938, 762.5415649414062, -945.2781372070312, 572.6546020507812, 360.4902648925781, 226.58872985839844, -454.3736877441406, 711.4337768554688, 160.77603149414062, 1412.1546630859375, 1419.8739013671875, 1202.84326171875, -1759.1400146484375, 67.92449188232422, 479.1089172363281, 58.621978759765625, 954.7611083984375, 1345.4896240234375, -332.2723083496094, 906.7329711914062, 324.0956115722656, 92.24553680419922, -135.44500732421875, 0.14005470275878906, 2.20391845703125, 880.829345703125, -62.13499450683594, 347.9084167480469, 107.07593536376953, 114.70500183105469, 2550.3115234375, -292.1574401855469, 2297.902587890625, 318.907470703125, 1700.7547607421875, 73.97257995605469, 151.527099609375, 12.228273391723633, 29.609466552734375, 449.27264404296875, -45.75366973876953, 202.85610961914062, -61.925697326660156, 963.1719970703125, 836.3619384765625, 845.8238525390625, -211.77969360351562, 755.4998779296875, 650.0646362304688, 1621.0391845703125, 108.81788635253906, 810.0131225585938, 159.65725708007812, -460.432373046875, 475.1972961425781, 1215.0704345703125, -339.47760009765625, 372.5177001953125, 767.6251220703125, 240.3361358642578, 470.32281494140625, 305.5984191894531, -21.624954223632812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000652.npy"}
{"epoch": 0.9574155653450808, "step": 653, "batch_size": 64, "mean": 372.00970458984375, "std": 634.815185546875, "min": -1070.010009765625, "p10": -393.1767944335937, "median": 310.5693664550781, "p90": 1267.6008056640628, "max": 2081.242919921875, "pos_frac": 0.6875, "sample": [768.6011962890625, -0.048065185546875, -102.21034240722656, 1825.62744140625, 336.1873779296875, -415.8334655761719, 156.37940979003906, -292.756103515625, -9.8680419921875, 880.7503051757812, 669.3104248046875, -415.0765380859375, -95.47069549560547, 1345.0289306640625, 794.4896850585938, 1170.7138671875, 844.3809814453125, 1087.61572265625, 1309.123779296875, -89.83538818359375, 152.72677612304688, 332.35809326171875, 1652.2247314453125, 359.38433837890625, 415.6893310546875, 697.63671875, 1006.5425415039062, 328.00146484375, -46.017662048339844, -470.5426940917969, 1382.081787109375, 293.13726806640625, 612.1445922851562, -1070.010009765625, -100.2872085571289, -114.79200744628906, 275.81414794921875, 558.9244384765625, -604.5177612304688, 1910.5908203125, 2081.242919921875, 557.5914306640625, 19.987455368041992, -36.93743896484375, 712.2178955078125, 657.1932373046875, 17.630950927734375, -324.641845703125, 238.31375122070312, 230.7211456298828, 397.1346740722656, 133.05262756347656, 848.4739990234375, 175.90280151367188, 490.68829345703125, -587.8296508789062, -436.0905456542969, 388.89013671875, 684.8375244140625, 97.16310119628906, 453.0977478027344, -342.077392578125, 219.79586791992188, -205.93870544433594], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000653.npy"}
{"epoch": 0.9588839941262849, "step": 654, "batch_size": 64, "mean": 344.0230407714844, "std": 568.2992553710938, "min": -526.3980712890625, "p10": -338.75197753906247, "median": 225.1781463623047, "p90": 1235.4175537109377, "max": 1817.144775390625, "pos_frac": 0.671875, "sample": [36.77178192138672, 404.79022216796875, -224.39312744140625, 149.341064453125, 453.61883544921875, -74.64555358886719, -346.2597961425781, 345.4005126953125, -74.07225036621094, 670.764892578125, 689.7698364257812, -321.2337341308594, 113.55919647216797, 450.3548583984375, -417.95074462890625, -208.60589599609375, -31.68590545654297, 1676.6407470703125, 897.259033203125, 522.4780883789062, 576.3344116210938, 1541.5594482421875, 419.4278564453125, 234.14105224609375, 1283.6087646484375, 589.2881469726562, -95.95066833496094, 672.3994140625, 152.47592163085938, -103.78131103515625, -100.21339416503906, 1259.4013671875, 438.10223388671875, 62.363922119140625, 29.102218627929688, 12.460273742675781, 832.9878540039062, 184.23483276367188, 207.27749633789062, 410.1642150878906, 445.42938232421875, 737.9785766601562, 661.69775390625, -186.12905883789062, -526.3980712890625, 850.1768188476562, 314.5965576171875, 1660.6080322265625, 1179.455322265625, -176.65565490722656, 92.759033203125, 1283.169921875, 556.4208374023438, 216.21524047851562, -51.87688446044922, -425.73931884765625, 523.4403686523438, -363.7797546386719, 1817.144775390625, 1121.6119384765625, -29.384307861328125, -420.67340087890625, -419.8182373046875, -160.06228637695312], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000654.npy"}
{"epoch": 0.960352422907489, "step": 655, "batch_size": 64, "mean": 363.5751647949219, "std": 598.2679443359375, "min": -727.5556640625, "p10": -318.69305419921875, "median": 271.2021179199219, "p90": 1091.5688720703126, "max": 2187.298583984375, "pos_frac": 0.71875, "sample": [-368.36383056640625, -727.5556640625, 1237.007080078125, 105.54592895507812, 36.386817932128906, 1144.25537109375, 741.123779296875, 858.2678833007812, 352.1047058105469, -300.475341796875, 734.6224975585938, 46.611236572265625, -86.98876953125, -529.9744262695312, -8.345525741577148, 1085.634765625, -136.17630004882812, 865.2569580078125, -301.66943359375, 1064.29736328125, -231.7476806640625, 351.60565185546875, 889.896484375, -597.5521850585938, -325.9888916015625, 563.018798828125, 328.8591003417969, 42.249298095703125, -103.70683288574219, 261.79779052734375, -92.19210052490234, 1568.48828125, 125.64064025878906, 280.6064453125, 608.6972045898438, 841.510986328125, -610.3556518554688, 762.8802490234375, 584.009765625, 934.510498046875, 350.70147705078125, 67.88199615478516, 596.07861328125, 2187.298583984375, 1497.0819091796875, -123.21377563476562, 391.8445129394531, -506.3506164550781, 1078.0592041015625, 482.58489990234375, 57.42109680175781, 132.0570068359375, 29.74603271484375, 196.5077362060547, 1094.112060546875, 113.80601501464844, 1550.4908447265625, -164.4427032470703, -158.9131622314453, 512.73095703125, 677.03564453125, 185.25086975097656, 151.6186981201172, 875.6293334960938], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000655.npy"}
{"epoch": 0.9618208516886931, "step": 656, "batch_size": 64, "mean": 634.9487915039062, "std": 692.1862182617188, "min": -467.1212158203125, "p10": -89.35536956787108, "median": 439.7911376953125, "p90": 1718.9023193359378, "max": 2702.217529296875, "pos_frac": 0.8125, "sample": [1422.609619140625, 788.272216796875, -467.1212158203125, -41.31941604614258, 134.21270751953125, 250.88934326171875, 319.97869873046875, 547.6653442382812, 291.9389343261719, 785.2789306640625, 808.2479248046875, 828.2152709960938, 328.484130859375, 163.62075805664062, 496.6015930175781, 1643.505859375, -35.81463623046875, 50.9404296875, 1751.215087890625, 174.5457763671875, 875.0036010742188, -31.5513916015625, 2054.27880859375, -153.33401489257812, -114.5877685546875, 255.1951904296875, 872.8231811523438, 1056.50927734375, 424.24945068359375, 1152.98779296875, 378.67706298828125, 2702.217529296875, 761.1569213867188, -117.16465759277344, 2200.235107421875, 868.064453125, 455.33282470703125, 942.3582763671875, 2228.114013671875, 87.13436889648438, 2008.763671875, -264.568603515625, 1253.03125, 63.702392578125, 1095.1728515625, 971.3910522460938, 301.58306884765625, 2029.4388427734375, 322.39837646484375, 181.31021118164062, 849.7644653320312, 104.70396423339844, -212.61117553710938, 348.9937744140625, 942.200927734375, 205.39309692382812, -26.867202758789062, 291.54425048828125, -70.62853240966797, 962.6752319335938, 867.872802734375, -97.38115692138672, 701.9318237304688, 667.2391357421875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000656.npy"}
{"epoch": 0.9632892804698973, "step": 657, "batch_size": 64, "mean": 511.4633483886719, "std": 663.112548828125, "min": -1340.15625, "p10": -121.92846679687494, "median": 444.7928466796875, "p90": 1308.045056152344, "max": 2365.6083984375, "pos_frac": 0.84375, "sample": [1222.896240234375, 28.468738555908203, 795.0856323242188, 454.484130859375, 1483.534912109375, 1527.150390625, 743.0497436523438, 120.00440979003906, 1179.494873046875, -50.5718994140625, -371.77008056640625, 489.0758056640625, 1324.669189453125, -143.0727996826172, 221.8474578857422, 509.5240478515625, 59.00129699707031, 395.5807189941406, 81.53997039794922, 1322.3680419921875, 2210.852294921875, 453.52264404296875, 131.97320556640625, 2337.874267578125, 1274.624755859375, 1201.318115234375, 267.37799072265625, 9.23663330078125, 645.32470703125, 571.031005859375, 974.57861328125, 426.5936279296875, 33.05078887939453, -1340.15625, 579.6759643554688, 527.2648315429688, 277.14715576171875, -208.17828369140625, 26.84979248046875, 263.07501220703125, 614.413330078125, -955.394775390625, -207.60647583007812, 566.2081909179688, 910.1259765625, 67.43582916259766, 266.5677490234375, 600.726318359375, 372.048828125, -47.92156219482422, 727.2664184570312, 780.2937622070312, 138.33030700683594, 859.4559936523438, 436.06304931640625, 340.8774719238281, 1249.2098388671875, 2365.6083984375, 414.5504455566406, -224.72573852539062, 256.5864562988281, -72.59169006347656, 734.4368286132812, 486.28900146484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000657.npy"}
{"epoch": 0.9647577092511013, "step": 658, "batch_size": 64, "mean": 405.5053405761719, "std": 590.6372680664062, "min": -1091.5194091796875, "p10": -402.50959167480465, "median": 391.8435821533203, "p90": 1167.2783813476565, "max": 1858.0052490234375, "pos_frac": 0.75, "sample": [836.7709350585938, 842.80712890625, 587.8585205078125, -291.71697998046875, 372.1599426269531, -151.27960205078125, 1085.3582763671875, 304.3688049316406, -414.719970703125, 422.9096374511719, 403.69110107421875, 1353.8951416015625, 43.19029235839844, 303.6872253417969, 312.02716064453125, -515.2586669921875, 316.7903137207031, 342.8001403808594, 124.93045806884766, -75.96629333496094, 563.2762451171875, 238.1942138671875, 447.32745361328125, 143.9008026123047, -1091.5194091796875, 959.624755859375, -601.9384765625, 629.0603637695312, 582.4058227539062, -147.32269287109375, -120.03849792480469, 266.81365966796875, -49.54678726196289, 437.299072265625, 234.26377868652344, 1528.2198486328125, -35.07398223876953, 1183.2874755859375, 788.7147216796875, 1129.923828125, 85.91145324707031, 379.9960632324219, 656.4296875, 1370.9310302734375, 1062.78662109375, 607.8724365234375, -374.0187072753906, 514.1343383789062, 762.9955444335938, 853.371337890625, -436.36187744140625, 1382.2298583984375, -414.928466796875, 1858.0052490234375, 661.5194702148438, 1033.2542724609375, 588.209716796875, 1487.9083251953125, -347.2110900878906, 708.199462890625, 104.27021789550781, 488.8540344238281, 219.88232421875, -593.0755004882812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000658.npy"}
{"epoch": 0.9662261380323054, "step": 659, "batch_size": 64, "mean": 394.2469482421875, "std": 556.2286376953125, "min": -685.5004272460938, "p10": -193.03634033203122, "median": 332.53062438964844, "p90": 1115.0319824218752, "max": 2293.134765625, "pos_frac": 0.75, "sample": [662.7979736328125, -58.962982177734375, 182.58932495117188, 828.9134521484375, -151.3179168701172, -210.91566467285156, 1197.48779296875, -319.54144287109375, -501.9113464355469, -136.95899963378906, 1045.69677734375, 360.97589111328125, 489.39239501953125, -127.99723815917969, 64.13514709472656, 181.516845703125, 588.5054931640625, 157.80661010742188, 221.49853515625, 212.49795532226562, 328.9532165527344, 540.7308959960938, -484.50103759765625, 2293.134765625, -685.5004272460938, 1360.30078125, 404.8584899902344, 561.46630859375, 1144.7470703125, 318.79644775390625, 349.8345642089844, 323.9792785644531, 0.345489501953125, -26.601821899414062, 605.6060791015625, 286.7972412109375, 144.6160125732422, 17.474018096923828, 55.09608459472656, 1176.3753662109375, -514.0553588867188, -5.222831726074219, 647.2965087890625, -91.59107971191406, 708.5199584960938, -14.243656158447266, -34.547019958496094, 665.3270874023438, 1613.075927734375, 1759.1561279296875, 811.0359497070312, 182.45506286621094, 666.435791015625, 509.1329040527344, 457.06109619140625, 336.1080322265625, 995.7023315429688, 987.3076782226562, 567.8973388671875, -284.5773620605469, 818.8896484375, 118.17903137207031, 439.0303955078125, 490.71337890625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000659.npy"}
{"epoch": 0.9676945668135095, "step": 660, "batch_size": 64, "mean": 507.5157470703125, "std": 622.8057861328125, "min": -1056.7882080078125, "p10": -181.06907501220704, "median": 426.82086181640625, "p90": 1318.6878173828125, "max": 2204.735595703125, "pos_frac": 0.765625, "sample": [-124.18708801269531, 246.45346069335938, 1150.07080078125, 444.2776184082031, 411.26666259765625, 489.45166015625, 637.6348876953125, 1230.4150390625, 1305.096435546875, 770.8545532226562, 356.56109619140625, -257.60760498046875, 1178.047119140625, 514.9700317382812, 882.9615478515625, 1667.45556640625, -167.1522674560547, -50.54512023925781, -310.62469482421875, -9.339935302734375, 1163.7379150390625, 1516.0853271484375, 39.25739288330078, 412.4570617675781, 818.4234619140625, 1859.5419921875, 424.00115966796875, -183.5478973388672, 703.579345703125, 429.64056396484375, 155.50320434570312, 1324.5126953125, -480.4205322265625, 2204.735595703125, 336.65484619140625, 616.4765014648438, 792.9755859375, 103.12025451660156, 389.8109436035156, 815.9220581054688, 188.3005828857422, 178.99366760253906, 696.927490234375, 684.8143920898438, -42.107879638671875, -47.556365966796875, 62.74555206298828, 1530.51513671875, 829.2575073242188, 1111.4710693359375, -282.5328369140625, -40.886024475097656, -543.8995361328125, 660.2755126953125, 1006.798828125, 463.2921142578125, 167.9827880859375, 311.5195007324219, 595.5159301757812, 1624.0294189453125, -175.28515625, -1056.7882080078125, 328.37957763671875, 420.7174377441406], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000660.npy"}
{"epoch": 0.9691629955947136, "step": 661, "batch_size": 64, "mean": 452.12884521484375, "std": 632.3477783203125, "min": -760.7965087890625, "p10": -111.15544509887691, "median": 386.69847106933594, "p90": 1083.0947570800788, "max": 3187.802734375, "pos_frac": 0.828125, "sample": [172.1627655029297, -66.90030670166016, 805.8751220703125, -20.486467361450195, 402.58392333984375, 162.4553680419922, 539.1502075195312, -158.55807495117188, 649.152587890625, 327.14520263671875, 1155.3160400390625, 403.8909912109375, 624.4071655273438, 609.8284912109375, 3187.802734375, 914.513427734375, 192.41940307617188, -34.586669921875, -675.94873046875, 167.24746704101562, -40.36570358276367, 914.5784301757812, 139.60655212402344, 394.3825378417969, 235.6236572265625, 198.1760711669922, 336.45166015625, 895.957275390625, 1273.14599609375, 52.356109619140625, 309.436767578125, 535.9168090820312, 632.93896484375, 1342.64599609375, 487.3496398925781, 282.43450927734375, 189.89556884765625, 302.2533264160156, 204.2480926513672, 549.7162475585938, -540.5753173828125, 379.014404296875, -130.12193298339844, 807.0400390625, 818.018798828125, -760.7965087890625, 604.921630859375, 685.8359985351562, 39.31885528564453, 692.9284057617188, 2525.623046875, -646.9630737304688, 318.75634765625, 13.737958908081055, 730.4425048828125, 1499.5396728515625, 108.16448211669922, 602.4156494140625, -137.78558349609375, 1321.3770751953125, 457.4291076660156, 516.066162109375, 432.8846435546875, 4.754829406738281], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000661.npy"}
{"epoch": 0.9706314243759178, "step": 662, "batch_size": 64, "mean": 363.9100341796875, "std": 667.5560302734375, "min": -1018.5471801757812, "p10": -452.38113403320307, "median": 388.5538330078125, "p90": 1083.431970214844, "max": 2341.130859375, "pos_frac": 0.734375, "sample": [-74.35799407958984, 855.3079833984375, 1332.9970703125, -1018.5471801757812, 564.2538452148438, -392.86212158203125, 440.67401123046875, 824.9627685546875, 1870.2249755859375, 25.327430725097656, 882.7548828125, -477.8892822265625, -995.8035888671875, -186.9601593017578, 1264.3480224609375, 89.73241424560547, 25.752187728881836, 356.60150146484375, 2341.130859375, 877.81201171875, 1106.948974609375, -175.90640258789062, 402.001220703125, -151.49957275390625, 172.556640625, 851.7540283203125, 115.73536682128906, 1028.5589599609375, 315.4385681152344, 206.04962158203125, 378.525634765625, -284.27972412109375, 292.9517822265625, -371.4171447753906, 497.35052490234375, 980.665283203125, 551.798583984375, 419.0500183105469, 658.7398681640625, 630.0344848632812, 35.14959716796875, 228.5423126220703, 933.1163330078125, 2082.999267578125, 743.46630859375, 1238.466552734375, -909.5267944335938, 441.78564453125, 901.538330078125, 398.58203125, -514.7301025390625, 65.45465087890625, -91.25364685058594, 501.16229248046875, -553.0845947265625, -92.58585357666016, 529.5029907226562, 562.6837768554688, 336.2253723144531, 556.3697509765625, 605.3076171875, -748.5584106445312, -253.38916015625, 62.50328063964844], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000662.npy"}
{"epoch": 0.9720998531571219, "step": 663, "batch_size": 64, "mean": 427.7143859863281, "std": 727.115478515625, "min": -1694.7257080078125, "p10": -237.8706283569335, "median": 300.20835876464844, "p90": 1213.0588378906252, "max": 3200.92431640625, "pos_frac": 0.78125, "sample": [828.496826171875, 369.94970703125, 823.0215454101562, 22.444974899291992, 439.67913818359375, 1145.0361328125, 136.33116149902344, 315.3774719238281, -56.46710205078125, 336.5203552246094, 93.69747161865234, 149.06797790527344, 1422.14892578125, 888.4850463867188, 63.12586975097656, 875.6868286132812, 719.1236572265625, 101.49433135986328, -1694.7257080078125, 137.70416259765625, 285.03924560546875, 527.9566040039062, -304.23101806640625, 714.2759399414062, 639.3374633789062, 363.263427734375, 592.9968872070312, 140.43026733398438, 460.5006103515625, 3200.92431640625, -269.7905578613281, 1035.660888671875, 1557.8240966796875, -10.287460327148438, 571.07373046875, 2204.167724609375, 1591.780029296875, 57.31132125854492, 17.951021194458008, 1900.9798583984375, 1005.8092041015625, -340.6411437988281, 116.9930419921875, 352.31134033203125, -85.03192138671875, 685.5337524414062, 1017.634521484375, 151.5701446533203, 193.83657836914062, 975.71826171875, 209.57191467285156, 33.850547790527344, 1242.21142578125, -1086.684814453125, 767.7183837890625, -21.267017364501953, -427.6007385253906, -163.3907928466797, 82.67929077148438, 675.0702514648438, -415.9674072265625, -55.05042266845703, -93.30599975585938, 158.78866577148438], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000663.npy"}
{"epoch": 0.973568281938326, "step": 664, "batch_size": 64, "mean": 486.3853759765625, "std": 654.1492309570312, "min": -750.6922607421875, "p10": -184.4120376586914, "median": 408.42686462402344, "p90": 1227.3519165039065, "max": 2328.0234375, "pos_frac": 0.734375, "sample": [1049.2052001953125, -7.939638137817383, 663.85986328125, 682.8703002929688, 841.551513671875, 149.20584106445312, 310.05078125, 1910.4854736328125, -126.0315170288086, 1581.054443359375, -571.5169677734375, 748.0634765625, 413.84637451171875, 265.2309875488281, 566.6636352539062, 255.52967834472656, -750.6922607421875, 428.86651611328125, -12.436485290527344, 521.1727905273438, 1248.297607421875, 1143.156982421875, 121.08062744140625, 1079.729248046875, -149.81301879882812, 171.06936645507812, 1969.25830078125, -178.96273803710938, 546.8881225585938, -464.9712829589844, 1043.197265625, 132.34292602539062, -203.84918212890625, 1092.184814453125, 520.5202026367188, 403.0073547363281, -734.3430786132812, 373.36163330078125, 658.2886962890625, 1159.537353515625, 64.55982971191406, 511.2035827636719, 858.3264770507812, -22.26953125, 985.3163452148438, 2146.91845703125, 751.5667114257812, 280.22576904296875, 2328.0234375, -101.65008544921875, -129.39566040039062, 1289.326171875, 1178.4786376953125, 202.78602600097656, 376.0643615722656, 431.5334167480469, 141.92202758789062, 327.23883056640625, -186.74745178222656, -6.025482177734375, 439.3348693847656, -2.571502685546875, 801.9874877929688, -386.51104736328125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000664.npy"}
{"epoch": 0.9750367107195301, "step": 665, "batch_size": 64, "mean": 407.27813720703125, "std": 496.0669250488281, "min": -518.7260131835938, "p10": -96.43661422729491, "median": 368.2687683105469, "p90": 1018.5424499511723, "max": 1654.29833984375, "pos_frac": 0.765625, "sample": [-102.79952239990234, 34.52334213256836, 417.5956115722656, 342.9728088378906, -62.686561584472656, 420.4735107421875, 578.503662109375, -38.3653564453125, 1516.3858642578125, 383.9007568359375, 287.0401916503906, 592.3799438476562, 161.1244354248047, -415.13531494140625, 548.5784301757812, -36.207672119140625, -68.12399291992188, 219.68893432617188, 26.997573852539062, 500.43341064453125, 731.266845703125, 151.71949768066406, 461.00054931640625, 368.4427795410156, -81.58982849121094, 321.3728332519531, -518.7260131835938, 1454.730224609375, 728.5767822265625, -25.995319366455078, 786.7196655273438, 402.2844543457031, 724.6168212890625, 426.8518981933594, 1191.90380859375, 166.09185791015625, 1067.0755615234375, 536.4588623046875, 368.0947570800781, 262.7430419921875, 273.0625305175781, 1567.8978271484375, 518.8535766601562, -305.13568115234375, 604.9796752929688, 866.8262939453125, 335.66552734375, -445.873046875, 485.9794921875, 905.2985229492188, 99.63568878173828, 169.19566345214844, 846.1979370117188, 146.82962036132812, 1645.3026123046875, -246.4199981689453, 668.4729614257812, -217.87338256835938, 248.94747924804688, 822.6636352539062, 1654.29833984375, -30.890365600585938, 679.7365112304688, -58.76785659790039], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000665.npy"}
{"epoch": 0.9765051395007343, "step": 666, "batch_size": 64, "mean": 405.50140380859375, "std": 587.4401245117188, "min": -950.8848876953125, "p10": -266.8505676269531, "median": 342.16148376464844, "p90": 1031.7565002441406, "max": 2664.9169921875, "pos_frac": 0.78125, "sample": [729.36328125, 323.21368408203125, 682.45068359375, 433.6502990722656, -454.85308837890625, 612.371826171875, 1019.6464233398438, 965.7843017578125, 137.64352416992188, 944.165771484375, -33.68067169189453, 196.78024291992188, 314.0853576660156, -403.4537353515625, -291.38983154296875, 229.8118133544922, 270.484619140625, -207.56292724609375, -74.608642578125, -53.592342376708984, 1314.8555908203125, 6.117134094238281, 700.3867797851562, 499.328125, 2031.5859375, -12.739395141601562, 769.2479858398438, 359.13525390625, -173.41848754882812, -619.8602905273438, 353.9173889160156, 656.5501708984375, 468.45367431640625, 1036.946533203125, 906.1788330078125, 449.43804931640625, 927.1303100585938, -324.4361267089844, 395.2567138671875, 538.0777587890625, 57.71828842163086, 669.5303344726562, -209.59228515625, 1206.961669921875, 418.83526611328125, 223.29791259765625, -950.8848876953125, 1038.422119140625, 303.5636901855469, 98.11404418945312, 380.73590087890625, 712.2042236328125, 584.900146484375, 2.9948158264160156, 215.7286376953125, 133.72232055664062, 214.926025390625, 329.7464294433594, 2664.9169921875, 226.44679260253906, -451.8252868652344, 881.6838989257812, 330.40557861328125, 1247.074462890625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000666.npy"}
{"epoch": 0.9779735682819384, "step": 667, "batch_size": 64, "mean": 486.9377136230469, "std": 629.6569213867188, "min": -682.1517333984375, "p10": -259.1457885742187, "median": 491.6921081542969, "p90": 1202.9392944335941, "max": 2206.780517578125, "pos_frac": 0.78125, "sample": [441.4969787597656, -496.92669677734375, 678.6801147460938, -682.1517333984375, -108.99288940429688, 1017.0732421875, 1011.528076171875, 583.12353515625, 565.7623901367188, 1251.6021728515625, 208.9102020263672, 149.59359741210938, 103.66565704345703, 476.121826171875, 344.5682678222656, -624.3242797851562, 678.548828125, 570.77734375, 1721.992431640625, 870.3876342773438, 787.0485229492188, 226.47360229492188, 1089.392578125, 108.4042739868164, 568.9268798828125, 405.46405029296875, 991.1597290039062, -74.20409393310547, 935.8873291015625, 756.3726196289062, 719.5989990234375, 606.2213134765625, 992.4575805664062, 642.5000610351562, 417.1100769042969, -267.6137390136719, 507.26239013671875, 371.7845458984375, 343.5299377441406, -24.13831329345703, -131.96055603027344, 1609.480224609375, 1846.9918212890625, 2.0548324584960938, 745.2744750976562, 1385.208740234375, 684.0427856445312, 230.27828979492188, 116.10638427734375, 700.154052734375, -51.203895568847656, 840.0674438476562, -540.519287109375, 195.97254943847656, 2044.623291015625, 2206.780517578125, 80.89842987060547, 883.8397216796875, -639.6102905273438, 248.47793579101562, -500.42718505859375, -239.38723754882812, -147.82705688476562, 729.6237182617188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000667.npy"}
{"epoch": 0.9794419970631424, "step": 668, "batch_size": 64, "mean": 324.0137634277344, "std": 728.1632080078125, "min": -1191.6019287109375, "p10": -505.3289031982422, "median": 260.4690856933594, "p90": 1103.2958007812501, "max": 2581.7744140625, "pos_frac": 0.71875, "sample": [463.5840759277344, 267.65185546875, 1250.9188232421875, -1108.8231201171875, 158.5044708251953, 878.8231201171875, 1434.529541015625, 157.7465057373047, 1075.275634765625, 442.8115539550781, -589.8824462890625, 601.9480590820312, 113.93029022216797, 45.97652816772461, 351.52508544921875, 906.6093139648438, 163.2265167236328, 89.23085021972656, -437.7918701171875, -503.76080322265625, -919.1495361328125, 955.188232421875, -478.36175537109375, 792.7893676757812, -150.04550170898438, 864.9398803710938, 289.4995422363281, 347.1341247558594, 34.221832275390625, 613.7603149414062, 339.25689697265625, 703.5767822265625, 1115.304443359375, -506.0009460449219, 945.724609375, 408.09771728515625, -1004.4374389648438, 1003.449951171875, 109.5359878540039, -256.6659851074219, 261.42877197265625, -103.13642120361328, 1438.153564453125, -91.90984344482422, -593.1041870117188, 1005.4630737304688, 5.15203857421875, -307.72052001953125, 248.12484741210938, -39.62755584716797, 259.5093994140625, 216.3057861328125, 282.068115234375, 237.46405029296875, -249.17855834960938, 1022.4555053710938, 348.6669921875, 314.11773681640625, 2315.227294921875, -1191.6019287109375, -54.17414093017578, 1802.945068359375, 58.62554931640625, 2581.7744140625], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000668.npy"}
{"epoch": 0.9809104258443465, "step": 669, "batch_size": 64, "mean": 496.4770202636719, "std": 638.2398681640625, "min": -1229.8983154296875, "p10": -165.74735488891594, "median": 386.17613220214844, "p90": 1416.1716674804688, "max": 2394.5087890625, "pos_frac": 0.828125, "sample": [186.1343536376953, 52.961395263671875, -88.2121810913086, 1131.0281982421875, -771.7473754882812, 361.16815185546875, 664.0033569335938, 603.7587890625, 383.97772216796875, -57.714317321777344, 2394.5087890625, 6.910121917724609, 27.071136474609375, 285.7638244628906, -254.76370239257812, 388.3745422363281, 1582.8172607421875, -0.8650417327880859, 97.61072540283203, 222.5185089111328, 537.391357421875, 718.07568359375, 1428.060302734375, 1263.7947998046875, 103.11729431152344, 1708.53759765625, 91.67788696289062, 1425.3525390625, 950.645751953125, 142.58493041992188, 607.29931640625, 1277.4427490234375, -198.97671508789062, 594.8382568359375, 1394.7496337890625, -539.3407592773438, -261.8236999511719, 805.1028442382812, 838.318603515625, 792.7557373046875, 1504.05859375, 325.47564697265625, 167.160888671875, 327.2902526855469, -1229.8983154296875, 801.1895141601562, 891.2769165039062, 635.1448974609375, 404.97857666015625, 290.8447265625, -615.4642944335938, 1028.09765625, 350.0005798339844, 395.9775695800781, 551.1617431640625, 764.6649780273438, 53.845787048339844, 270.9145812988281, 511.6517028808594, 1307.5650634765625, -61.03676986694336, 1487.705078125, 348.3307800292969, 368.6845703125], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000669.npy"}
{"epoch": 0.9823788546255506, "step": 670, "batch_size": 64, "mean": 490.96636962890625, "std": 652.422119140625, "min": -1213.630859375, "p10": -267.41424865722655, "median": 363.13092041015625, "p90": 1393.9337036132813, "max": 1843.49267578125, "pos_frac": 0.75, "sample": [183.59573364257812, 488.333251953125, 238.90716552734375, 1398.8052978515625, 435.1974792480469, -181.56268310546875, -240.68246459960938, -93.28385925292969, 1026.6966552734375, 583.5147705078125, 293.85546875, -153.25042724609375, -1213.630859375, 1843.49267578125, 1118.0447998046875, 1715.2559814453125, 712.3801879882812, 345.7199401855469, 1194.4986572265625, 584.73388671875, 103.76045227050781, -521.046142578125, 1382.566650390625, -240.2369384765625, -523.6159057617188, 361.38604736328125, 1596.0067138671875, -2.7506065368652344, 945.2492065429688, -438.8877258300781, 1465.7322998046875, 831.8736572265625, -196.2519073486328, 241.39474487304688, 243.03536987304688, 1423.3690185546875, 1191.4527587890625, -291.9923095703125, 242.77059936523438, 1324.366455078125, 435.5880126953125, 1163.7191162109375, 935.56103515625, -189.54925537109375, 1697.7447509765625, 827.2695922851562, 262.7757568359375, 332.1949462890625, -278.8707275390625, -283.44085693359375, 126.88021850585938, 1336.287353515625, 164.49530029296875, 364.87579345703125, 1241.95654296875, 143.57290649414062, 336.8001708984375, 993.7440795898438, -49.53411865234375, 853.036865234375, 432.5327453613281, 533.9708862304688, 65.7071533203125, 555.72607421875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000670.npy"}
{"epoch": 0.9838472834067548, "step": 671, "batch_size": 64, "mean": 397.8563537597656, "std": 647.4110107421875, "min": -1492.1669921875, "p10": -197.82297210693355, "median": 347.3182373046875, "p90": 1218.0906127929688, "max": 1953.7083740234375, "pos_frac": 0.75, "sample": [18.842819213867188, -1492.1669921875, 526.169921875, 82.78826904296875, -120.84072875976562, 762.9373779296875, 249.07449340820312, 328.97186279296875, 643.8062133789062, 338.293701171875, 399.7247314453125, -142.22622680664062, 143.78297424316406, 1130.107666015625, 970.7449951171875, 101.94377136230469, 227.8407745361328, 222.419677734375, 881.50048828125, 747.3036499023438, 99.8531265258789, 1401.7947998046875, -1453.626220703125, 1218.01123046875, 755.4930419921875, -11.54322624206543, 1913.156494140625, 419.49139404296875, 260.79840087890625, 596.1655883789062, -53.47567367553711, -544.2947998046875, -425.56292724609375, 559.2869262695312, 1218.1246337890625, -214.5169677734375, 356.3427734375, 294.6678771972656, 645.990478515625, 1352.3829345703125, 1442.3509521484375, 360.5332946777344, -158.8703155517578, 151.33004760742188, 727.4197998046875, -30.729019165039062, 252.54541015625, 946.1390380859375, -416.5463562011719, -27.936622619628906, 658.0015869140625, 504.5752868652344, -131.1670379638672, 208.21917724609375, 1171.207275390625, 783.3640747070312, -133.73489379882812, -591.38818359375, 222.98316955566406, 1403.4947509765625, 433.43701171875, 1953.7083740234375, 955.7714233398438, 368.5371398925781], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000671.npy"}
{"epoch": 0.9853157121879589, "step": 672, "batch_size": 64, "mean": 405.9329833984375, "std": 721.5693359375, "min": -1195.1287841796875, "p10": -467.53300170898416, "median": 342.03662109375, "p90": 1251.7239868164063, "max": 2709.112548828125, "pos_frac": 0.6875, "sample": [438.6201171875, -51.555633544921875, -257.803466796875, 166.22103881835938, -137.5766143798828, 76.25990295410156, 784.5935668945312, 403.62200927734375, 1714.4166259765625, 280.5134582519531, 94.0310287475586, -145.1148681640625, -204.20108032226562, -270.6177978515625, 483.9450988769531, 435.4221496582031, -120.09963989257812, -551.9252319335938, 325.72198486328125, 1579.7225341796875, 88.42323303222656, 793.58837890625, -18.57135772705078, -116.45895385742188, 1245.547119140625, -1195.1287841796875, 1088.69775390625, -901.3316040039062, 527.3380126953125, 2709.112548828125, -136.3335723876953, 904.4392700195312, 997.533935546875, 1207.000732421875, 1164.6258544921875, 728.3499755859375, 546.7110595703125, -623.6564331054688, 155.01437377929688, 1809.2569580078125, -802.1724853515625, -24.910240173339844, 322.50244140625, 1236.74951171875, 836.0936889648438, 737.9290771484375, 886.8018188476562, 197.21142578125, 820.9574584960938, 617.6870727539062, 1513.272705078125, 151.10244750976562, -117.12869262695312, 658.0972900390625, 26.341981887817383, 739.3953857421875, 1254.3712158203125, 46.62804412841797, -103.72630310058594, -712.7000732421875, 562.4451904296875, 358.35125732421875, 1456.8970947265625, -700.838623046875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000672.npy"}
{"epoch": 0.986784140969163, "step": 673, "batch_size": 64, "mean": 374.9658508300781, "std": 620.1729736328125, "min": -1279.8751220703125, "p10": -318.5804229736328, "median": 383.3581237792969, "p90": 1054.1739746093756, "max": 3075.25146484375, "pos_frac": 0.765625, "sample": [-120.13957214355469, -610.519287109375, 594.4288330078125, 908.6944580078125, 368.90802001953125, 532.31103515625, 669.62158203125, 386.7761535644531, 1250.7537841796875, 398.8625183105469, -267.92828369140625, 1425.116455078125, 456.5157775878906, 544.9613037109375, 150.35960388183594, 99.7340087890625, -493.685546875, 802.5551147460938, 1438.89990234375, 343.2370300292969, -196.2552947998047, 364.049072265625, 384.289306640625, -74.58355712890625, 228.5284881591797, -1279.8751220703125, 235.70046997070312, 643.30419921875, -485.3979187011719, 184.50621032714844, 738.3753051757812, 1391.72412109375, 173.4733428955078, -36.45607376098633, 798.2068481445312, 445.21380615234375, 382.42694091796875, -625.2369995117188, 566.4150390625, 702.868896484375, 706.0660400390625, -13.053060531616211, 213.86123657226562, 701.8587646484375, 535.7115478515625, 80.1613540649414, 495.7966613769531, -250.61996459960938, 466.5911560058594, 3075.25146484375, 121.9030990600586, 118.12217712402344, 567.3902587890625, 646.814453125, -694.9681396484375, 519.16015625, 334.17181396484375, 1157.38134765625, 154.385009765625, -340.2884826660156, -63.000938415527344, 581.8034057617188, 1116.5223388671875, 346.0534362792969], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000673.npy"}
{"epoch": 0.9882525697503671, "step": 674, "batch_size": 64, "mean": 482.26348876953125, "std": 651.3732299804688, "min": -512.14892578125, "p10": -235.7015518188476, "median": 452.4680633544922, "p90": 1048.9989074707034, "max": 3179.323486328125, "pos_frac": 0.75, "sample": [-101.9471435546875, 34.31594467163086, 773.1664428710938, 676.1842651367188, 489.7839660644531, 784.8178100585938, 30.324506759643555, -77.16380310058594, -494.09771728515625, 1072.8914794921875, 1319.1280517578125, 938.0855712890625, -12.765361785888672, 480.25592041015625, -178.7385711669922, 453.72607421875, -119.73880004882812, 1895.226318359375, 521.5364990234375, 3179.323486328125, 549.8239135742188, 448.0809020996094, 87.51585388183594, 946.7074584960938, 502.78497314453125, -260.1142578125, 946.1063842773438, 471.2803955078125, 678.7611083984375, -134.0643310546875, 993.2495727539062, 2011.3006591796875, -127.73666381835938, 300.06512451171875, 368.6880798339844, 779.94091796875, -360.71630859375, -370.64569091796875, -296.9378356933594, 822.7401123046875, 578.2537841796875, 451.2100524902344, 353.47332763671875, 1622.0543212890625, 248.03421020507812, 961.4976806640625, 847.73681640625, 962.0291137695312, 536.0609130859375, 171.97267150878906, -512.14892578125, 142.3229522705078, 1792.1380615234375, -300.4345703125, -4.069238662719727, 657.8743286132812, 156.81130981445312, 648.4544067382812, 395.5738830566406, 597.5096435546875, 175.88027954101562, -128.55210876464844, 153.21633911132812, 336.8197021484375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000674.npy"}
{"epoch": 0.9897209985315712, "step": 675, "batch_size": 64, "mean": 491.496826171875, "std": 655.2893676757812, "min": -729.4570922851562, "p10": -74.34495315551756, "median": 336.0592041015625, "p90": 1344.8483276367188, "max": 2703.61865234375, "pos_frac": 0.78125, "sample": [289.18145751953125, 1354.428955078125, 550.9647216796875, 120.37721252441406, 59.26537322998047, 154.238525390625, 756.6256103515625, -44.32867431640625, 595.0240478515625, 309.5913391113281, 839.0430908203125, -218.6329345703125, 633.6002807617188, 995.4619750976562, -60.02611541748047, -32.92967224121094, 118.35467529296875, 1256.7255859375, 412.1894226074219, 411.8114318847656, 704.0120239257812, -22.596473693847656, 2341.574462890625, 788.9863891601562, 4.269889831542969, 318.4560241699219, -42.086795806884766, 666.3773803710938, 544.5447387695312, 395.5419921875, 335.260009765625, 1365.5709228515625, 357.97845458984375, 1580.9990234375, -80.48159790039062, 1116.2972412109375, 2703.61865234375, -109.92744445800781, 1130.72412109375, 2034.159912109375, 183.9225616455078, 153.14340209960938, -44.33924102783203, 141.737060546875, 336.8583984375, -48.808319091796875, -729.4570922851562, 256.1578063964844, 641.985107421875, 226.25747680664062, 69.96144104003906, 1388.737060546875, 637.755859375, -560.2387084960938, -565.8975830078125, 407.596923828125, 115.99374389648438, 666.9852905273438, 6.672943115234375, 1322.4935302734375, 286.5547180175781, 1214.8017578125, 1048.99365234375, -336.3170471191406], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000675.npy"}
{"epoch": 0.9911894273127754, "step": 676, "batch_size": 64, "mean": 470.5931091308594, "std": 586.811767578125, "min": -749.207763671875, "p10": -88.99335632324218, "median": 359.3491668701172, "p90": 1336.697692871094, "max": 2437.353271484375, "pos_frac": 0.765625, "sample": [83.3879623413086, 1558.5245361328125, 816.2260131835938, 935.6979370117188, -57.85960388183594, -87.8666763305664, 388.7267150878906, 679.1243896484375, 25.083499908447266, -749.207763671875, 577.5364990234375, 35.548675537109375, -42.40406799316406, 1081.78759765625, 1636.549560546875, 2437.353271484375, 5.898674011230469, -149.0848846435547, -43.06932067871094, -155.02017211914062, -7.648279190063477, 446.7681884765625, 633.112548828125, -59.390228271484375, -38.723724365234375, 713.0390014648438, 97.94808959960938, 1168.409912109375, 302.0785827636719, 1756.140380859375, -52.388702392578125, 618.0584716796875, 246.77899169921875, 676.7342529296875, 69.94788360595703, 793.1802368164062, 1503.318115234375, 466.573486328125, 611.756591796875, 1463.10595703125, 662.8037109375, -89.4762191772461, 176.3920135498047, -339.96319580078125, 1254.831787109375, 974.673095703125, 58.402069091796875, 931.76220703125, 379.1363525390625, 335.1688232421875, 822.4963989257812, 512.1656494140625, 235.7740936279297, 340.989990234375, 113.34308624267578, 222.3084716796875, 825.161376953125, -220.0255126953125, 1371.7830810546875, 152.68890380859375, 377.7083435058594, 219.16409301757812, -210.8107452392578, 625.7485961914062], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000676.npy"}
{"epoch": 0.9926578560939795, "step": 677, "batch_size": 64, "mean": 451.5909729003906, "std": 714.1398315429688, "min": -828.572998046875, "p10": -220.14972076416015, "median": 342.28759765625, "p90": 1170.911999511719, "max": 3613.611083984375, "pos_frac": 0.78125, "sample": [-16.18383026123047, 2927.469482421875, 582.6582641601562, 63.760032653808594, 444.1300964355469, -291.281494140625, -187.49786376953125, -202.91757202148438, 391.0401916503906, -32.2261962890625, 661.7455444335938, 464.0587463378906, 84.97941589355469, 1283.54736328125, 310.29296875, 806.7924194335938, 696.3026123046875, 139.9021759033203, 248.4794158935547, 215.87388610839844, 1142.1845703125, 342.5192565917969, 996.1509399414062, -199.3555450439453, 345.2735595703125, 1183.2237548828125, 163.79811096191406, 455.5743103027344, 336.92706298828125, 1141.0489501953125, 374.138427734375, 5.905670166015625, -227.53492736816406, 232.86965942382812, 247.06838989257812, -546.0953979492188, 890.3062744140625, 302.9178466796875, 3613.611083984375, 159.84515380859375, 206.28109741210938, 525.2052612304688, 755.6312255859375, 664.6554565429688, -129.94529724121094, 1237.6605224609375, 358.7164001464844, 245.76016235351562, 830.0694580078125, 188.1490020751953, 1365.22412109375, -39.6679573059082, 685.623046875, 342.0559387207031, 553.7158203125, -367.4109802246094, -828.572998046875, 1688.04052734375, 554.8213500976562, 706.2722778320312, 843.739990234375, -300.7200927734375, -753.7098388671875, 18.926605224609375], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000677.npy"}
{"epoch": 0.9941262848751835, "step": 678, "batch_size": 64, "mean": 522.123779296875, "std": 705.8920288085938, "min": -1693.100830078125, "p10": -165.59381332397453, "median": 461.65431213378906, "p90": 1180.2819091796875, "max": 2708.05810546875, "pos_frac": 0.8125, "sample": [260.8841247558594, 948.288330078125, 560.7081298828125, 600.6998901367188, 509.10516357421875, -78.62814331054688, 511.7012634277344, -1693.100830078125, 119.00942993164062, -220.66024780273438, -81.09405517578125, 1131.9659423828125, 39.77128982543945, 1077.4638671875, -85.65596771240234, 609.2594604492188, 887.4722900390625, -297.25811767578125, 893.9497680664062, 801.0662231445312, 950.88818359375, 1159.44921875, 13.693500518798828, 1983.7921142578125, 1189.210205078125, 2708.05810546875, 91.07304382324219, 340.21502685546875, -13.51168441772461, 1060.3792724609375, -441.3829040527344, 55.500572204589844, 284.05963134765625, 772.8001098632812, 285.65283203125, 1012.673095703125, 133.82717895507812, 1100.394287109375, 243.96612548828125, 848.435791015625, -199.85289001464844, 837.9379272460938, 1201.4571533203125, 1545.701904296875, 300.1868896484375, 262.9553527832031, 2287.85888671875, 2266.073974609375, 73.03085327148438, 535.6512451171875, 269.02978515625, -541.93994140625, 414.2034606933594, 31.306289672851562, 53.85643768310547, 133.31629943847656, 576.9398803710938, 1082.6412353515625, 519.7144165039062, 510.4132080078125, 407.6458740234375, -66.76290893554688, 846.769775390625, -206.33639526367188], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000678.npy"}
{"epoch": 0.9955947136563876, "step": 679, "batch_size": 64, "mean": 370.836669921875, "std": 591.6163940429688, "min": -744.60009765625, "p10": -286.72069702148434, "median": 281.3926696777344, "p90": 1282.7878417968752, "max": 1937.953125, "pos_frac": 0.734375, "sample": [509.90252685546875, 66.69574737548828, 1226.958740234375, 1937.953125, 421.8062438964844, 773.7680053710938, 828.7074584960938, 413.8226318359375, 1306.714599609375, 311.6641540527344, 334.20556640625, 456.5741271972656, 382.5476989746094, 130.1742401123047, 984.739990234375, -377.6725158691406, -142.66677856445312, 77.01876831054688, 228.42095947265625, 846.2939453125, 592.5573120117188, 1327.739990234375, -48.49031066894531, 98.99560546875, 141.1273956298828, -303.4957580566406, 472.77880859375, 1441.8876953125, -453.8213806152344, 529.8246459960938, 804.2611083984375, 61.698089599609375, -65.4373779296875, -35.21782684326172, 920.9976806640625, 369.0146484375, 1773.1995849609375, 141.87399291992188, -197.15908813476562, 49.833229064941406, 261.1307678222656, -118.48445129394531, 474.3188781738281, 15.832427978515625, -334.8578796386719, 1350.988525390625, 278.1674499511719, -247.57888793945312, -744.60009765625, -150.78988647460938, 869.4937133789062, -615.0391235351562, 533.46484375, -129.7241668701172, 487.31597900390625, 284.6178894042969, -46.79775619506836, 224.06097412109375, 772.656005859375, 1910.77294921875, 242.16317749023438, 627.0579223632812, -696.1300048828125, 145.7101593017578], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000679.npy"}
{"epoch": 0.9970631424375918, "step": 680, "batch_size": 64, "mean": 497.23138427734375, "std": 729.9412231445312, "min": -2182.092529296875, "p10": -169.07916870117182, "median": 371.7695007324219, "p90": 1545.5087158203128, "max": 2212.216796875, "pos_frac": 0.8125, "sample": [1067.513671875, 435.9790954589844, 391.515869140625, 888.849365234375, 182.77467346191406, 1040.94775390625, 1771.1925048828125, 539.3033447265625, 174.88766479492188, 352.02313232421875, 542.4906005859375, 154.1978302001953, 738.9857177734375, -498.3223571777344, 1087.0006103515625, 1472.989501953125, 655.503662109375, -583.86865234375, -301.08697509765625, 848.7657470703125, 1162.412353515625, 207.61676025390625, -106.59687805175781, 332.08172607421875, -190.59567260742188, 1117.4793701171875, 781.8334350585938, 180.4807586669922, 723.2972412109375, 1595.54443359375, 116.23147583007812, 69.68366241455078, 192.24514770507812, 1713.785888671875, 1017.4112548828125, 172.5500946044922, 192.4812469482422, 467.78253173828125, 2005.0174560546875, -568.1171264648438, 87.7236328125, 338.3077697753906, 497.01092529296875, 545.8482055664062, 152.33834838867188, -2182.092529296875, 1761.9478759765625, -106.18354034423828, -583.2354125976562, 1462.27734375, 595.8468017578125, 2212.216796875, 20.132247924804688, 110.07676696777344, 255.77584838867188, 1370.26904296875, 617.4478759765625, 1576.58837890625, -79.44931030273438, 232.28662109375, -118.87399291992188, 255.63204956054688, -34.305328369140625, 690.9524536132812], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000680.npy"}
{"epoch": 0.9985315712187959, "step": 681, "batch_size": 64, "mean": 392.08013916015625, "std": 638.8037719726562, "min": -879.3240356445312, "p10": -390.3841217041015, "median": 403.74082946777344, "p90": 1129.2626953125002, "max": 2132.628173828125, "pos_frac": 0.6875, "sample": [727.242919921875, -327.1031494140625, 573.0892333984375, 91.70037841796875, 895.6015625, -879.3240356445312, 30.951202392578125, 388.3041687011719, -415.8440246582031, 258.1795654296875, -286.07696533203125, 380.6701965332031, -458.54266357421875, -461.947265625, 1151.43310546875, 1695.936767578125, 470.1982727050781, -76.04961395263672, -264.225830078125, 1000.1819458007812, -184.643798828125, -7.893119812011719, 859.028564453125, 460.2865905761719, 1784.751953125, 606.8716430664062, 196.57696533203125, 990.6368408203125, 979.15283203125, 618.258056640625, 172.6337890625, 445.5313415527344, 132.9505615234375, -130.53182983398438, -137.30780029296875, 419.177490234375, 191.25889587402344, -660.5562744140625, 490.9252014160156, -149.2208709716797, -364.095947265625, 2132.628173828125, -840.5233154296875, 924.3043823242188, -70.69890594482422, 485.90716552734375, -401.6504821777344, 199.6865692138672, 38.57301330566406, -261.66436767578125, 924.9691772460938, -124.76565551757812, 886.3934936523438, 573.0314331054688, 1189.96142578125, 518.697509765625, 1077.53173828125, 1063.4259033203125, 981.7718505859375, 717.3231811523438, 242.96913146972656, 1277.3095703125, 1428.0782470703125, 921.7030029296875], "npy": "/scratch/qu.yang1/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4/margin_logs/step_0000681.npy"}