Files
llama-3-8b-base-margin-dpo-…/margin_logs/margins.jsonl
ModelHub XC 444c4324b6 初始化项目,由ModelHub XC社区提供模型
Model: W-61/llama-3-8b-base-margin-dpo-hh-helpful-8xh200
Source: Original Platform
2026-04-24 11:44:07 +08:00

341 lines
974 KiB
JSON

{"epoch": 0.0, "step": 1, "batch_size": 128, "mean": -0.009443119168281555, "std": 0.3677397072315216, "min": -1.2765426635742188, "p10": -0.41199169158935545, "median": 0.01613903045654297, "p90": 0.38641510009765623, "max": 0.9886322021484375, "pos_frac": 0.515625, "sample": [0.05165863037109375, -0.1942596435546875, -0.1842041015625, -0.13739013671875, -0.043460845947265625, -0.5239105224609375, -0.022459030151367188, -0.3626670837402344, 0.0247039794921875, 0.21996116638183594, 0.08417510986328125, -0.3605918884277344, -0.024652481079101562, -0.1731719970703125, 0.10766220092773438, 0.18875694274902344, -0.15761947631835938, -0.40525054931640625, -0.04624176025390625, 0.23955535888671875, 0.38146209716796875, 0.10079193115234375, -0.032257080078125, -0.024139404296875, 0.1353912353515625, -0.3410797119140625, -0.221160888671875, 0.410308837890625, -0.26479339599609375, 0.015176773071289062, 0.39898681640625, 0.029815673828125, 0.3796043395996094, -1.2765426635742188, -0.2908477783203125, -0.920440673828125, -0.017705917358398438, 0.16484832763671875, 0.9546432495117188, 0.10016250610351562, 0.30112266540527344, -0.03902626037597656, 0.1393890380859375, -0.10420608520507812, 0.0719757080078125, -0.0879364013671875, 0.05355072021484375, -0.09691619873046875, -0.1998291015625, 0.3719482421875, -0.2699851989746094, 0.9886322021484375, -0.0241241455078125, -0.04169273376464844, 0.45626068115234375, -0.40436363220214844, -0.29886436462402344, -0.12025070190429688, -0.2926025390625, 0.24135589599609375, 0.16009521484375, 0.07685470581054688, -0.132568359375, -0.21649551391601562, -0.18459320068359375, -0.22914886474609375, 0.14408493041992188, 0.22299957275390625, 0.1748046875, 0.10573577880859375, -0.63128662109375, 0.269683837890625, 0.6394119262695312, -0.4277210235595703, 0.1701068878173828, 0.192535400390625, -0.6832427978515625, 0.10462570190429688, 0.35161590576171875, -0.14691543579101562, -0.7453193664550781, 0.12618255615234375, -0.3245849609375, 0.1591949462890625, 0.9719696044921875, -0.2711944580078125, 0.1671924591064453, -0.10208892822265625, 0.04135894775390625, -0.030961990356445312, -0.43532562255859375, -0.22634315490722656, 0.2625274658203125, -0.38530731201171875, 0.3914031982421875, 0.029666900634765625, -0.8874282836914062, 0.3341941833496094, 0.05048370361328125, -0.1098785400390625, 0.27718353271484375, 0.3518257141113281, -0.20119857788085938, 0.0345916748046875, -0.2867088317871094, -0.27321624755859375, -0.1220245361328125, 0.002376556396484375, 0.652130126953125, 0.3648490905761719, 0.25287628173828125, -1.00286865234375, 0.017101287841796875, -0.016452789306640625, -0.108184814453125, 0.6773109436035156, -0.5762710571289062, 0.3921051025390625, 0.1959819793701172, 0.38427734375, -0.461700439453125, 0.4051666259765625, 0.0635528564453125, 0.48239898681640625, -0.828521728515625, 0.20518112182617188, 0.15598297119140625, 0.16995620727539062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000001.npy"}
{"epoch": 0.0029411764705882353, "step": 2, "batch_size": 128, "mean": 0.004051461815834045, "std": 0.3557382822036743, "min": -1.2012481689453125, "p10": -0.43282299041748046, "median": 0.0030040740966796875, "p90": 0.4233924865722656, "max": 1.063873291015625, "pos_frac": 0.5078125, "sample": [0.009670257568359375, -0.138275146484375, -0.0161895751953125, 0.25742340087890625, 0.07877731323242188, -0.10441398620605469, 0.7998123168945312, 0.09959602355957031, 0.417510986328125, 0.06683349609375, -0.5860099792480469, -0.4282569885253906, -0.18352508544921875, -0.7701263427734375, -0.2799530029296875, 0.6703414916992188, 0.27788734436035156, -0.71905517578125, -0.09023284912109375, 0.10207366943359375, 0.302276611328125, -0.5335102081298828, -0.0707244873046875, 0.24752235412597656, -0.10830307006835938, 0.0638580322265625, 0.0797119140625, 0.098785400390625, -0.09393692016601562, -0.31207275390625, -0.20440673828125, 0.01297760009765625, -1.2012481689453125, 0.3058948516845703, 0.34563446044921875, -0.1603546142578125, -0.0575408935546875, 0.2288055419921875, -0.34081268310546875, -0.1109771728515625, -0.0415496826171875, -0.06813430786132812, -0.066375732421875, -0.3551063537597656, -0.3550872802734375, -0.2128143310546875, -0.430389404296875, -0.16454315185546875, 0.11841583251953125, 0.240020751953125, 0.1544036865234375, -0.00275421142578125, 0.334808349609375, -0.24801063537597656, 0.0299224853515625, 0.10285568237304688, 0.16043472290039062, 0.0883941650390625, -0.162933349609375, 0.37691497802734375, -0.09174346923828125, 0.1792144775390625, 0.34740447998046875, -0.1779327392578125, 0.5788955688476562, -0.0063018798828125, -0.27352142333984375, -0.15685272216796875, 0.32442474365234375, -0.327301025390625, 0.4293365478515625, 0.6424560546875, 0.1040191650390625, 0.18810272216796875, 0.328155517578125, 0.18640518188476562, -0.09818267822265625, -0.22113037109375, 0.6417388916015625, -0.4548492431640625, 0.663238525390625, -0.2482147216796875, 0.32018280029296875, 0.0030574798583984375, 0.29425048828125, 1.063873291015625, -0.4808807373046875, -0.48004150390625, -0.04152107238769531, -0.2418212890625, 0.023090362548828125, -0.6675910949707031, -0.1906566619873047, 0.03763580322265625, -0.4233894348144531, -0.27348899841308594, -0.13482284545898438, 0.0401611328125, 0.09856796264648438, -0.3931884765625, 0.4796600341796875, 0.12994384765625, 0.6461181640625, 0.23264694213867188, 0.295806884765625, 0.0124969482421875, 0.8393325805664062, 0.06320571899414062, -0.12553977966308594, 0.4916534423828125, -0.08995628356933594, 0.0029506683349609375, 0.3304901123046875, 0.09031486511230469, 0.5645370483398438, 0.42084503173828125, -0.15466690063476562, -0.7456436157226562, -0.5421142578125, -0.525146484375, 0.086517333984375, -0.00798797607421875, -0.26902008056640625, -0.43850135803222656, -0.0977783203125, 0.24516677856445312, 0.1301116943359375, -0.11157417297363281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000002.npy"}
{"epoch": 0.0058823529411764705, "step": 3, "batch_size": 128, "mean": -0.057538121938705444, "std": 0.3695724904537201, "min": -1.4649810791015625, "p10": -0.465363311767578, "median": -0.03724956512451172, "p90": 0.389677619934082, "max": 0.949981689453125, "pos_frac": 0.4375, "sample": [0.10144805908203125, -0.622344970703125, -0.38901519775390625, -0.0726776123046875, -1.4649810791015625, 0.830108642578125, -0.4190216064453125, -0.1839447021484375, 0.1980438232421875, -0.29068565368652344, -0.1940155029296875, 0.4283905029296875, 0.17241287231445312, -0.3378753662109375, -0.2830390930175781, 0.003448486328125, 0.304901123046875, -0.1439056396484375, -0.10996055603027344, 0.22797393798828125, 0.41607666015625, -0.21086883544921875, 0.5649566650390625, -0.1045074462890625, -0.5609893798828125, 0.3975563049316406, 0.12053680419921875, -0.3770465850830078, -0.6513442993164062, -0.6951522827148438, -0.2642364501953125, 0.048004150390625, 0.055103302001953125, -0.257049560546875, -0.0951080322265625, -0.3345489501953125, -0.27167320251464844, 0.5230140686035156, 0.38422393798828125, -0.1737689971923828, -0.27617454528808594, -0.55780029296875, -0.18270111083984375, -0.4293670654296875, -0.06822586059570312, -0.26775360107421875, 0.4661273956298828, -0.2821235656738281, 0.0346221923828125, -0.0264892578125, -0.2253875732421875, -0.18670654296875, -0.621368408203125, -0.1162261962890625, 0.0030517578125, -0.002899169921875, 0.239837646484375, -0.390045166015625, -0.1015472412109375, -0.350067138671875, -0.5679550170898438, -0.005054473876953125, 0.949981689453125, 0.19712066650390625, 0.25414276123046875, 0.021520614624023438, 0.08882522583007812, 0.5009994506835938, 0.06817054748535156, -0.22852325439453125, -0.2954216003417969, 0.06855010986328125, 0.38144683837890625, 0.1819591522216797, 0.1967926025390625, 0.48671722412109375, -0.010066986083984375, 0.00357818603515625, -1.133941650390625, -0.1233978271484375, 0.03580474853515625, -0.036144256591796875, 0.02655792236328125, 0.8213653564453125, -0.0897369384765625, -0.5493545532226562, -0.2787513732910156, 0.40789031982421875, -0.0592193603515625, 0.0012359619140625, 0.0377197265625, 0.0530853271484375, 0.18918609619140625, -0.24596405029296875, 0.3083953857421875, -0.013250350952148438, 0.3331451416015625, 0.328582763671875, 0.044139862060546875, 0.38630104064941406, 0.2465972900390625, -0.03835487365722656, -0.2064208984375, -0.4031829833984375, -0.0955352783203125, 0.288787841796875, -0.22266387939453125, 0.17717742919921875, -0.22348403930664062, -0.07565689086914062, -0.78765869140625, 0.00762939453125, 0.069732666015625, 0.23382568359375, -0.01934051513671875, -0.12923049926757812, -0.014375686645507812, 0.17305564880371094, -0.03887176513671875, -0.09213447570800781, -0.16588592529296875, -1.119598388671875, -0.194091796875, 0.4417724609375, 0.1317138671875, -0.8916549682617188, 0.069580078125, -0.150238037109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000003.npy"}
{"epoch": 0.008823529411764706, "step": 4, "batch_size": 128, "mean": 0.050228431820869446, "std": 0.375901460647583, "min": -0.9838104248046875, "p10": -0.40630340576171875, "median": 0.02771759033203125, "p90": 0.517919921875, "max": 1.3262176513671875, "pos_frac": 0.5234375, "sample": [-0.24005126953125, -0.607818603515625, -0.17103004455566406, 0.044231414794921875, 0.149627685546875, 0.23663330078125, 0.266693115234375, -0.206390380859375, -0.2055225372314453, 0.40251731872558594, 0.11703872680664062, 0.3065948486328125, 0.7473373413085938, -0.420654296875, 0.4176750183105469, -0.020771026611328125, 0.2642974853515625, 0.8284759521484375, -0.7930526733398438, -0.39078521728515625, -0.11805343627929688, 0.2724151611328125, -0.2875213623046875, -0.4104156494140625, 0.02471160888671875, 0.1080322265625, 0.3484344482421875, -0.2805328369140625, -0.02960205078125, -0.30686187744140625, -0.3759307861328125, 0.46758270263671875, 0.46625518798828125, 0.42546844482421875, -0.533843994140625, 0.28574371337890625, -0.15802764892578125, 0.65643310546875, 0.401458740234375, 0.30438232421875, 0.85028076171875, 0.22123146057128906, 0.14104461669921875, 0.03072357177734375, 0.1156463623046875, -0.1378021240234375, -0.048168182373046875, -0.074432373046875, -0.0885162353515625, -0.269287109375, 0.07628631591796875, 0.17551422119140625, 0.7544937133789062, -0.3341217041015625, 0.052692413330078125, -0.19568252563476562, -0.404541015625, 0.30271148681640625, 0.05422210693359375, 0.346466064453125, -0.49211883544921875, 0.292510986328125, 0.21881866455078125, -0.47881317138671875, 0.1153717041015625, -0.6289825439453125, 0.18439483642578125, 0.08760833740234375, -0.3765106201171875, 0.15410995483398438, 0.2985076904296875, -0.09609222412109375, -0.10410881042480469, 0.17995452880859375, -0.19652938842773438, -0.03375244140625, 0.6239166259765625, -0.1356658935546875, -0.02152252197265625, 0.51556396484375, -0.164154052734375, 0.2717437744140625, -0.6049995422363281, 0.6483535766601562, 0.3743438720703125, 0.2254772186279297, 0.012338638305664062, -0.07500457763671875, 0.25086212158203125, -0.09348297119140625, 0.745361328125, 0.6807937622070312, -0.9838104248046875, -0.20344161987304688, -0.22202301025390625, 0.20139694213867188, 0.829345703125, -0.034236907958984375, -0.11115646362304688, 0.05266571044921875, -0.12269973754882812, 0.3251075744628906, -0.2510223388671875, -0.1521148681640625, -0.05257225036621094, -0.132659912109375, 0.5228118896484375, 0.08492851257324219, -0.51239013671875, 0.461029052734375, 0.119598388671875, -0.21009063720703125, -0.3506431579589844, -0.5699462890625, 0.23123931884765625, 0.34273529052734375, -0.15668487548828125, 0.5158233642578125, 1.3262176513671875, -0.15847015380859375, -0.2068939208984375, -0.09090423583984375, -0.26385498046875, -0.0577392578125, -0.435394287109375, 0.18474197387695312, 0.00289154052734375, 0.575225830078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000004.npy"}
{"epoch": 0.011764705882352941, "step": 5, "batch_size": 128, "mean": -0.07273375988006592, "std": 0.3525700867176056, "min": -1.07342529296875, "p10": -0.4923675537109375, "median": -0.026477813720703125, "p90": 0.348492431640625, "max": 0.8504867553710938, "pos_frac": 0.4140625, "sample": [0.1482086181640625, -0.487579345703125, -0.11639404296875, -0.11304473876953125, -0.22769546508789062, -0.447296142578125, -0.35369873046875, -0.2646827697753906, -0.32299041748046875, -0.3984222412109375, -0.8149871826171875, -0.376922607421875, 0.16815185546875, -0.009105682373046875, -0.41436004638671875, 0.005645751953125, 0.5761337280273438, 0.071990966796875, -0.019313812255859375, -0.22956085205078125, -0.0043964385986328125, -0.041339874267578125, 0.47943115234375, 0.32579803466796875, -0.5984878540039062, 0.16539573669433594, 0.46212005615234375, -0.4528236389160156, -0.8724517822265625, -0.024303436279296875, 0.24483871459960938, -0.046966552734375, -0.23116302490234375, 0.00386810302734375, 0.290069580078125, -1.07342529296875, -0.869415283203125, -0.014312744140625, 0.0374755859375, -0.0827484130859375, 0.423004150390625, -0.910003662109375, -0.2881584167480469, -1.042510986328125, 0.3386268615722656, -0.4561004638671875, -0.16674041748046875, -0.2965106964111328, 0.3152008056640625, -0.4573822021484375, 0.4986572265625, 0.2999153137207031, 0.17818450927734375, 0.6454391479492188, 0.344757080078125, -0.39630889892578125, 0.0113372802734375, -0.1579132080078125, -0.17509078979492188, -0.1090545654296875, -0.11263275146484375, -0.6251449584960938, -0.309539794921875, -0.07094192504882812, -0.12265777587890625, 0.135284423828125, 0.11234474182128906, -0.02526092529296875, -0.08004188537597656, -0.18589019775390625, -0.014081954956054688, 0.8504867553710938, -0.1114501953125, 0.24826431274414062, 0.015779495239257812, -0.0086669921875, -0.05181121826171875, -0.26074981689453125, 0.12963104248046875, 0.04094123840332031, -0.07233810424804688, 0.41817474365234375, -0.2014312744140625, 0.0086212158203125, -0.359527587890625, -0.20721435546875, 0.0010700225830078125, -0.3420906066894531, 0.08594131469726562, 0.416961669921875, 0.39591217041015625, 0.007913589477539062, -0.593505859375, 0.15863418579101562, -0.0167083740234375, -0.1680164337158203, -0.179443359375, -0.3637580871582031, -0.0276947021484375, 0.1886749267578125, -0.4817352294921875, 0.108642578125, -0.29546356201171875, 0.257568359375, 0.2416229248046875, 0.21941757202148438, 0.02375030517578125, -0.36940765380859375, 0.1799468994140625, 0.357208251953125, -0.537261962890625, -0.22676849365234375, -0.6842212677001953, 0.19011688232421875, 0.3820838928222656, 0.620330810546875, 0.22756195068359375, -0.54461669921875, 0.21051025390625, 0.2150115966796875, -0.0444183349609375, -0.34954833984375, 0.1889801025390625, 0.2486419677734375, -0.5035400390625, -6.866455078125e-05, -0.007472991943359375, -0.3114166259765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000005.npy"}
{"epoch": 0.014705882352941176, "step": 6, "batch_size": 128, "mean": -0.0020265579223632812, "std": 0.3377120792865753, "min": -1.069549560546875, "p10": -0.3856779098510742, "median": -0.017879486083984375, "p90": 0.36659240722656244, "max": 0.878448486328125, "pos_frac": 0.4765625, "sample": [0.10745620727539062, 0.063873291015625, 0.15780067443847656, 0.09568023681640625, -0.02579498291015625, -0.17816925048828125, 0.02382659912109375, -0.11262702941894531, 0.03745269775390625, 0.14256668090820312, 0.3631591796875, -0.01020050048828125, 0.2647533416748047, 0.29510498046875, -0.3759021759033203, 0.09036636352539062, -0.2747383117675781, 0.10009765625, 0.24643707275390625, 0.0969696044921875, 0.8126144409179688, -0.23876190185546875, -0.43982696533203125, 0.6402263641357422, 0.0587158203125, 0.3457908630371094, 0.33786964416503906, 0.072174072265625, -0.060886383056640625, 0.40384674072265625, -0.23358917236328125, 0.18721389770507812, 0.04073524475097656, 0.1527099609375, -0.13031768798828125, -0.1112823486328125, 0.3057441711425781, 0.27020263671875, 0.0118408203125, -0.00901031494140625, 0.26209259033203125, 0.7458572387695312, -0.178192138671875, -0.0354766845703125, -1.069549560546875, 0.34639739990234375, -0.11069488525390625, -0.50592041015625, -0.028371810913085938, 0.20774078369140625, 0.14654922485351562, 0.2740478515625, -0.3230400085449219, 0.15892791748046875, -0.6328887939453125, 0.020313262939453125, 0.3392677307128906, -0.3930625915527344, 0.3290061950683594, -0.32794189453125, -0.1147613525390625, -0.0255584716796875, -0.09814262390136719, -0.13748550415039062, 0.3575897216796875, 0.7032852172851562, 0.05478668212890625, -0.90478515625, -0.14310455322265625, 0.04097747802734375, 0.044086456298828125, -0.26983642578125, 0.4937591552734375, -0.3679962158203125, 0.2522926330566406, -0.1781024932861328, -0.43035888671875, -0.22375106811523438, -0.781829833984375, 0.7264328002929688, -0.036376953125, -0.21027374267578125, 0.54827880859375, 0.28381919860839844, -0.10749435424804688, -0.15352630615234375, 0.4296417236328125, -0.06654548645019531, -0.38251304626464844, -0.4484291076660156, -0.037281036376953125, 0.2891674041748047, -0.10913848876953125, -0.07401084899902344, -0.10662841796875, -0.429656982421875, 0.28762054443359375, -0.22524642944335938, -0.3331756591796875, -0.15423583984375, 0.44672393798828125, -0.0854949951171875, -0.07171249389648438, -0.249481201171875, -0.4405860900878906, -0.007572174072265625, -0.3246002197265625, 0.2114715576171875, -0.23516845703125, 0.7988052368164062, -0.672943115234375, 0.374603271484375, 0.0234527587890625, -0.320587158203125, 0.26151275634765625, -0.14165496826171875, -0.3469200134277344, 0.0395355224609375, -0.3174591064453125, 0.0162200927734375, 0.230377197265625, -0.04187774658203125, -0.10348701477050781, -0.15651702880859375, 0.006809234619140625, -0.2187042236328125, 0.878448486328125, -0.5232696533203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000006.npy"}
{"epoch": 0.01764705882352941, "step": 7, "batch_size": 128, "mean": 0.09031324088573456, "std": 0.3801402449607849, "min": -0.8350906372070312, "p10": -0.3403774261474609, "median": 0.09013175964355469, "p90": 0.5456226348876952, "max": 1.785858154296875, "pos_frac": 0.5703125, "sample": [-0.230316162109375, -0.12811851501464844, -0.058990478515625, 0.15240478515625, 0.6170578002929688, 0.16998672485351562, 0.141571044921875, -0.18986129760742188, -0.15153884887695312, -0.153472900390625, 0.2571220397949219, -0.42020416259765625, 0.2077484130859375, -0.6241073608398438, 0.7876739501953125, -0.2226085662841797, -0.10908126831054688, -0.32649993896484375, -0.337615966796875, 0.2201080322265625, -0.0531005859375, 0.04052734375, -0.011142730712890625, 0.829010009765625, -0.1224822998046875, 0.308258056640625, 0.11928558349609375, -0.1716461181640625, 0.2838134765625, -0.430389404296875, 0.6971969604492188, 0.8443756103515625, 0.5517082214355469, -0.3912353515625, -0.1661376953125, -0.5232391357421875, 0.22310256958007812, -0.3261222839355469, 0.241912841796875, 0.799713134765625, -0.06961822509765625, 0.314666748046875, 0.09288406372070312, 0.34862327575683594, 0.11278915405273438, -0.421966552734375, 0.58538818359375, 0.245819091796875, 0.252777099609375, 0.2822399139404297, 0.26404571533203125, 0.24444961547851562, 0.11646270751953125, -0.16667747497558594, -0.29584312438964844, -0.14923095703125, -0.21733474731445312, -0.5682830810546875, 0.04718017578125, -0.003345489501953125, 0.6624755859375, -0.03318023681640625, -0.2424774169921875, -0.1960296630859375, -0.8350906372070312, -0.2127246856689453, 0.05536842346191406, 0.7924346923828125, 0.3557281494140625, 0.16823577880859375, 1.785858154296875, 0.2911376953125, 0.31011962890625, 0.32628631591796875, 0.5312118530273438, 0.24328994750976562, 0.1446990966796875, 0.08737945556640625, -0.2322978973388672, 0.1801910400390625, -0.29105377197265625, 0.33075714111328125, -0.16965866088867188, -0.0774993896484375, 0.5380935668945312, 0.2144317626953125, -0.09342193603515625, 0.21539688110351562, -0.484283447265625, 0.5308494567871094, 1.1743545532226562, 0.05544281005859375, 0.0418853759765625, 0.053318023681640625, -0.015979766845703125, -0.135009765625, 0.0632476806640625, 0.12412643432617188, 0.1043853759765625, 0.156768798828125, -0.008270263671875, 0.4013671875, 0.2995758056640625, 0.119598388671875, 0.18500518798828125, 0.4936485290527344, 0.62420654296875, 0.09864425659179688, -0.04555511474609375, -0.39536285400390625, -0.3468208312988281, -0.21547317504882812, -0.63525390625, -0.23068618774414062, 0.4891815185546875, 0.5228271484375, -0.0876617431640625, -0.5525360107421875, -0.0006389617919921875, -0.14171600341796875, 0.251220703125, 0.5430145263671875, 0.24025726318359375, -0.30410003662109375, 0.054798126220703125, 0.09370040893554688, -0.03945159912109375, 0.26811790466308594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000007.npy"}
{"epoch": 0.020588235294117647, "step": 8, "batch_size": 128, "mean": -0.04206228256225586, "std": 0.43526118993759155, "min": -2.67303466796875, "p10": -0.5314582824707031, "median": -0.014682769775390625, "p90": 0.4412366867065429, "max": 0.8199462890625, "pos_frac": 0.484375, "sample": [-0.09015083312988281, -0.581634521484375, 0.191253662109375, 0.018537521362304688, -0.10504150390625, -0.01995086669921875, -0.34440040588378906, -0.497467041015625, 0.06568527221679688, 0.6868820190429688, -0.0094146728515625, -0.1879119873046875, 0.01190185546875, 0.7447967529296875, -0.374420166015625, 0.30515289306640625, 0.097076416015625, 0.20729637145996094, -0.22238922119140625, 0.8199462890625, -0.5257110595703125, 0.3616752624511719, 0.04447174072265625, -0.025033950805664062, -0.2689971923828125, 0.3196144104003906, 0.5707626342773438, -0.165679931640625, -0.2491302490234375, 0.23597335815429688, -0.832672119140625, -0.0227203369140625, 0.11697006225585938, 0.27809906005859375, 0.2598419189453125, -0.11209297180175781, 0.398223876953125, 0.18936920166015625, 0.03550148010253906, 0.16546630859375, 0.44976806640625, 0.17888450622558594, -0.20425033569335938, -0.0205230712890625, -0.35153961181640625, -0.4441871643066406, -0.108489990234375, 0.2129974365234375, -0.09724617004394531, 0.35182952880859375, -0.00234222412109375, 0.10735130310058594, -0.5448684692382812, -0.059551239013671875, -0.031024932861328125, 0.0009441375732421875, -2.67303466796875, -0.8400421142578125, 0.3360729217529297, 0.16010284423828125, 0.08455848693847656, 0.4462127685546875, -0.42482757568359375, 0.5223464965820312, 0.00738525390625, 0.60205078125, 0.811676025390625, -0.45633697509765625, -0.09590530395507812, -0.592987060546875, -0.3652496337890625, 0.00319671630859375, 0.494964599609375, 0.1942291259765625, -0.4816131591796875, -0.24609375, 0.16257476806640625, -0.16088104248046875, 0.40135955810546875, -0.9416275024414062, -0.07875633239746094, 0.6269912719726562, -0.137603759765625, 0.3761749267578125, 0.20134735107421875, -0.3883056640625, 0.35808563232421875, 0.012453079223632812, 0.0969085693359375, -0.23250579833984375, 0.3211784362792969, -0.22829437255859375, -0.3434906005859375, 0.4391040802001953, -0.08218765258789062, -0.1133575439453125, 0.248626708984375, 0.06096649169921875, -0.0828704833984375, 0.04338836669921875, -0.7054977416992188, -0.21465301513671875, -0.05881500244140625, -0.8934249877929688, -0.08164215087890625, 0.052379608154296875, -0.377899169921875, -0.11709785461425781, -0.6106739044189453, -0.033290863037109375, 0.1660022735595703, 0.037601470947265625, -0.18256378173828125, 0.24275588989257812, 0.20050048828125, -0.635986328125, -0.3109588623046875, -0.6296005249023438, -0.36534881591796875, 0.14248275756835938, 0.46442604064941406, 0.176055908203125, 0.7246932983398438, -0.2419586181640625, -0.1454925537109375, -0.24390411376953125, 0.181488037109375, -0.8969650268554688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000008.npy"}
{"epoch": 0.023529411764705882, "step": 9, "batch_size": 128, "mean": -0.06047630310058594, "std": 0.4046947658061981, "min": -2.17535400390625, "p10": -0.5130676269531249, "median": -0.051711082458496094, "p90": 0.3736114501953124, "max": 1.254669189453125, "pos_frac": 0.40625, "sample": [0.00124359130859375, -0.030061721801757812, -0.4720916748046875, -0.25490570068359375, 0.1720428466796875, 0.06972122192382812, 0.748443603515625, -0.1155853271484375, 0.10800552368164062, -0.007640838623046875, 0.05249977111816406, -0.43384552001953125, -0.14269065856933594, -0.28394317626953125, -0.15882301330566406, 0.20827484130859375, 0.471893310546875, -1.0011978149414062, -0.16405487060546875, 0.6664886474609375, 0.7737579345703125, 0.26726341247558594, -0.7531394958496094, 1.254669189453125, 0.09222793579101562, -0.03714752197265625, -0.4563179016113281, 0.25981903076171875, 0.5826416015625, 0.10408782958984375, -0.18125534057617188, 0.361663818359375, -0.0358123779296875, -0.011814117431640625, 0.03516387939453125, 0.7895889282226562, 0.158905029296875, 0.05712890625, 0.1656036376953125, -0.5689010620117188, 0.11309814453125, -0.27776336669921875, 0.22486114501953125, -0.31768798828125, 0.139862060546875, -0.10124969482421875, 0.24952316284179688, 0.13867950439453125, -0.452301025390625, -0.20952987670898438, -0.19000244140625, -0.05535316467285156, -0.20066070556640625, -0.3173675537109375, -0.2042388916015625, 0.2649269104003906, -0.284088134765625, -0.03551483154296875, 0.093994140625, -0.7991180419921875, -0.06106758117675781, 0.0142059326171875, -0.1301422119140625, -0.06501007080078125, 0.2697410583496094, -0.581207275390625, -0.13451194763183594, -0.546600341796875, -0.0835723876953125, -0.4224510192871094, -0.1375732421875, -0.0377197265625, 0.1846027374267578, -0.010128021240234375, 0.53729248046875, -0.01786041259765625, -0.1505718231201172, 0.13706588745117188, -2.17535400390625, 0.11100578308105469, -0.09374618530273438, -0.5775527954101562, 0.0621185302734375, 0.16800308227539062, -0.175048828125, 0.46663856506347656, -0.9783172607421875, -0.34053802490234375, 0.63287353515625, -0.059253692626953125, -0.11703109741210938, -0.07073783874511719, -0.23363494873046875, 0.20399856567382812, 0.19248008728027344, 0.4577789306640625, -0.369049072265625, -0.14568519592285156, -0.4391326904296875, -0.05592918395996094, -0.5362777709960938, -0.91510009765625, -0.5615234375, -0.17168426513671875, 0.28942108154296875, -0.3530845642089844, 0.23232269287109375, -0.21321868896484375, -0.07996177673339844, 0.32000732421875, 0.10422134399414062, -0.05315589904785156, -0.050266265869140625, -0.44156646728515625, 0.21016693115234375, -0.2825164794921875, -0.5929107666015625, -0.21363067626953125, -0.16376876831054688, -0.5031204223632812, 0.4014892578125, 0.10763931274414062, 0.0926361083984375, 0.030017852783203125, -0.03309440612792969, -0.12591934204101562, 0.49555206298828125, -0.03399467468261719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000009.npy"}
{"epoch": 0.026470588235294117, "step": 10, "batch_size": 128, "mean": 0.0181284099817276, "std": 0.3916047513484955, "min": -1.3045806884765625, "p10": -0.42264862060546876, "median": -0.0005550384521484375, "p90": 0.5539871215820311, "max": 1.0146484375, "pos_frac": 0.5, "sample": [0.3276481628417969, -0.22358322143554688, -0.23519515991210938, 0.16955947875976562, 0.01143646240234375, 0.7831268310546875, 0.00174713134765625, -0.009515762329101562, -0.077545166015625, 0.761016845703125, -0.357879638671875, -1.3045806884765625, 0.0835723876953125, -0.019622802734375, -0.057468414306640625, 0.031402587890625, -0.021139144897460938, 0.4356117248535156, -0.5839385986328125, -0.22911643981933594, -0.5415802001953125, -0.053371429443359375, 0.40332794189453125, 0.98651123046875, 0.748016357421875, -0.3890113830566406, 0.3128204345703125, -0.21707916259765625, 0.4389190673828125, 0.33873939514160156, 0.100341796875, -0.021871566772460938, 0.26500511169433594, 0.731292724609375, -0.4006977081298828, 0.2227020263671875, 0.3884239196777344, -0.4302825927734375, -0.238800048828125, 0.07697296142578125, -0.03997802734375, -0.02088165283203125, -0.2864532470703125, -0.156707763671875, -0.14422607421875, 0.4387359619140625, 1.0146484375, 0.4313507080078125, 0.06193733215332031, 0.652801513671875, 0.2521514892578125, -0.769775390625, 0.5105934143066406, 0.5457611083984375, 0.04306221008300781, -0.3671112060546875, 0.0850677490234375, -0.30999755859375, -0.2024688720703125, 0.675445556640625, 0.1932373046875, -0.42217254638671875, 0.21858978271484375, 0.26947021484375, 0.107635498046875, -0.14032745361328125, -0.15987396240234375, 0.19091796875, -0.3193511962890625, -0.7233200073242188, 0.6524887084960938, -0.5276222229003906, 0.2001190185546875, -0.129974365234375, 0.199981689453125, 0.06459808349609375, 0.276397705078125, 0.07173919677734375, 0.27246856689453125, -0.24312973022460938, -0.32221221923828125, -0.42375946044921875, -0.4035682678222656, 0.17351531982421875, -0.1263427734375, 0.8789825439453125, -0.5781784057617188, -0.22192764282226562, -0.42522430419921875, -0.19712448120117188, 0.09920501708984375, 0.67181396484375, 0.17174530029296875, -0.10346221923828125, 0.57318115234375, -0.13494873046875, 0.2924232482910156, 0.5195693969726562, 0.1593475341796875, -0.0591888427734375, -0.34348297119140625, -0.4072093963623047, -0.18187713623046875, -0.18952560424804688, 0.7151336669921875, -0.13933372497558594, -0.13303375244140625, -0.19393157958984375, 0.06085968017578125, 0.055233001708984375, -0.15980148315429688, -0.002857208251953125, 0.1530303955078125, -0.36324310302734375, 0.0463714599609375, 0.042514801025390625, 0.2818756103515625, 0.03967857360839844, -0.8224639892578125, -0.6752471923828125, -0.22344970703125, -0.5959014892578125, -0.19191932678222656, 0.18921470642089844, 0.18719863891601562, -0.01751708984375, 0.11789703369140625, -0.14336585998535156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000010.npy"}
{"epoch": 0.029411764705882353, "step": 11, "batch_size": 128, "mean": 0.05830053985118866, "std": 0.42452675104141235, "min": -1.2828216552734375, "p10": -0.4028804779052734, "median": 0.06656646728515625, "p90": 0.6144218444824218, "max": 1.1253662109375, "pos_frac": 0.5625, "sample": [-0.33258819580078125, -0.09848785400390625, 0.2706146240234375, 0.5462646484375, 1.1253662109375, 0.09619140625, -0.0012760162353515625, 0.08661079406738281, -0.0275115966796875, 0.7187004089355469, -0.14542198181152344, 0.01580047607421875, -0.374725341796875, 0.49554443359375, 0.789886474609375, 0.19205856323242188, -0.12618255615234375, -0.3328361511230469, 0.441436767578125, 0.063262939453125, 0.23035430908203125, -0.19264793395996094, -0.4878082275390625, 0.2349395751953125, 0.08184051513671875, 0.12636566162109375, -0.30327606201171875, 0.2092113494873047, 0.7880592346191406, 1.0238037109375, 0.14405059814453125, 0.24314117431640625, -0.19953155517578125, 0.277740478515625, 0.70611572265625, 0.18463134765625, 0.1534442901611328, 0.19130516052246094, 0.3957347869873047, 0.36474609375, -0.010524749755859375, 0.09622573852539062, -0.149566650390625, -0.16693496704101562, 0.08123016357421875, -0.1113433837890625, -0.9889678955078125, 0.021087646484375, -0.43868255615234375, -0.04209709167480469, 0.4730072021484375, -0.000919342041015625, 0.04442596435546875, 0.0417327880859375, 0.4777374267578125, -0.0049591064453125, -1.2828216552734375, 0.2534523010253906, -0.24080657958984375, -0.37993621826171875, 0.6516189575195312, 0.3861961364746094, 0.2579460144042969, 0.3148059844970703, -0.14013290405273438, -0.78118896484375, 0.598480224609375, 0.029397964477539062, 0.2528533935546875, 0.5316009521484375, -0.6357574462890625, 0.2906074523925781, 0.19782257080078125, -0.2707843780517578, -0.5071487426757812, -0.3050537109375, 0.4730949401855469, -0.5729141235351562, -0.40367889404296875, 0.295928955078125, 0.09722137451171875, 0.5511016845703125, 0.2836151123046875, 0.23197174072265625, -0.12509536743164062, 0.3021697998046875, -0.11600875854492188, -0.17264175415039062, -1.0906829833984375, 0.4105987548828125, -0.12454414367675781, -0.330657958984375, 0.2971916198730469, -0.21582794189453125, -0.36077880859375, 0.780364990234375, 0.15558242797851562, -0.4205322265625, -0.125885009765625, 0.94232177734375, -0.031902313232421875, -0.09632492065429688, 0.3712806701660156, 0.7369842529296875, -0.31374168395996094, -0.4025382995605469, 0.231536865234375, 0.7360000610351562, 0.688079833984375, -0.544708251953125, -0.296112060546875, 0.12152862548828125, 0.0716094970703125, -0.1839752197265625, 0.042781829833984375, -1.2349700927734375, -0.14739036560058594, 0.0698699951171875, -0.09514999389648438, -0.3299560546875, -0.100341796875, 0.1731433868408203, 0.1440582275390625, -0.006866455078125, 0.874420166015625, 0.21199798583984375, -0.13478851318359375, 0.028497695922851562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000011.npy"}
{"epoch": 0.03235294117647059, "step": 12, "batch_size": 128, "mean": 0.09537617862224579, "std": 0.3944438695907593, "min": -0.8853836059570312, "p10": -0.36160278320312494, "median": 0.06817626953125, "p90": 0.5610832214355469, "max": 1.7156524658203125, "pos_frac": 0.5859375, "sample": [-0.0366668701171875, 0.12163543701171875, -0.294464111328125, -0.4082069396972656, 0.5581741333007812, 0.26851654052734375, -0.0555572509765625, -0.245941162109375, -0.07295608520507812, 0.32453155517578125, -0.26374053955078125, -0.12346076965332031, 0.5473480224609375, 0.06539154052734375, 0.6867523193359375, 0.15120315551757812, 0.2709465026855469, -0.15276718139648438, 0.044605255126953125, -0.107391357421875, 0.0106048583984375, 0.01184844970703125, 0.30416107177734375, 0.2573871612548828, -0.23777008056640625, -0.071014404296875, 0.6229591369628906, -0.14092445373535156, -0.15983200073242188, 0.31777191162109375, -0.08622932434082031, 0.38471221923828125, -0.08275985717773438, -0.4697723388671875, -0.6392364501953125, -0.2251129150390625, 1.7156524658203125, 0.017108917236328125, -0.39075660705566406, 0.6467819213867188, -0.04673004150390625, 0.342315673828125, -0.07118988037109375, -0.6639785766601562, -0.2808837890625, -0.0687255859375, -0.12468719482421875, 0.079864501953125, -0.029451370239257812, 0.07096099853515625, 0.17380142211914062, 0.1194915771484375, 0.49668121337890625, 1.4358978271484375, 0.1225738525390625, 0.958282470703125, 0.10768890380859375, 0.92303466796875, -0.11202430725097656, 0.513946533203125, 0.42966461181640625, 0.1218109130859375, -0.047100067138671875, 0.07316207885742188, -0.47686767578125, 0.22284317016601562, -0.1486492156982422, -0.03575897216796875, 0.178863525390625, 0.028680801391601562, 0.56787109375, 0.26885223388671875, -0.350250244140625, -0.3387489318847656, -0.10219573974609375, 0.21704483032226562, 0.08767318725585938, 0.08931350708007812, 0.027173995971679688, 0.2775115966796875, -0.6813583374023438, 0.29378509521484375, -0.1274566650390625, -0.1846923828125, -0.5634765625, -0.388092041015625, 0.7295989990234375, 0.712982177734375, 0.532073974609375, 0.009790420532226562, 0.047023773193359375, 0.10152435302734375, -0.5892829895019531, 0.279388427734375, 0.601409912109375, 0.3492889404296875, 0.5486984252929688, -0.21324920654296875, -0.1580677032470703, 0.316314697265625, 0.04468727111816406, -0.13080596923828125, -0.64727783203125, 0.04232978820800781, 0.339813232421875, 0.32366180419921875, 0.17553138732910156, 0.313079833984375, 0.34267425537109375, -0.10418701171875, 0.67303466796875, 0.23138427734375, 0.2999420166015625, -0.08030509948730469, -0.5528488159179688, 0.27518463134765625, -0.08936691284179688, 0.24151611328125, -0.31685638427734375, -0.001476287841796875, -0.164520263671875, 0.1346588134765625, 0.28110313415527344, 0.33252716064453125, -0.8853836059570312, 0.2732391357421875, 0.8177642822265625, 0.3215484619140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000012.npy"}
{"epoch": 0.03529411764705882, "step": 13, "batch_size": 128, "mean": 0.03294284641742706, "std": 0.3996337950229645, "min": -2.044219970703125, "p10": -0.4053524017333984, "median": 0.07863998413085938, "p90": 0.45723419189453113, "max": 0.99737548828125, "pos_frac": 0.59375, "sample": [0.02734375, 0.21107101440429688, 0.98455810546875, 0.08966064453125, 0.14274978637695312, 0.2034587860107422, -0.0815277099609375, 0.28607177734375, -0.2664318084716797, 0.5689468383789062, 0.241363525390625, 0.1902618408203125, 0.82550048828125, 0.22779464721679688, 0.37832069396972656, -0.0582427978515625, -0.34674835205078125, -0.0031585693359375, 0.6991729736328125, -0.2708015441894531, 0.19050025939941406, 0.0245819091796875, 0.1442108154296875, -0.12967300415039062, 0.3545074462890625, 0.32656097412109375, 0.4097442626953125, 0.11013412475585938, -0.15529251098632812, 0.2235107421875, -0.18675994873046875, -0.43178367614746094, 0.37542724609375, -0.20919227600097656, -0.1165008544921875, -0.31219482421875, 0.20864295959472656, 0.8106307983398438, 0.2292327880859375, 0.03912162780761719, -0.47452545166015625, 0.00263214111328125, 0.12537384033203125, -0.8289108276367188, -0.25286865234375, 0.21155357360839844, 0.48613739013671875, 0.18209457397460938, -0.15762710571289062, 0.2709197998046875, 0.22359466552734375, 0.42723846435546875, 0.03975677490234375, 0.1199798583984375, 0.1724853515625, -0.32952880859375, 0.5705795288085938, -0.6230926513671875, -0.6295413970947266, 0.16054534912109375, -0.05268096923828125, 0.32610321044921875, -0.30327606201171875, 0.1063385009765625, -0.5466461181640625, 0.07761383056640625, 0.05646324157714844, -0.32212066650390625, 0.715972900390625, 0.2126312255859375, -0.00574493408203125, -0.0591278076171875, 0.0796661376953125, 0.0635833740234375, 0.99737548828125, -0.44818878173828125, 0.3573760986328125, -0.21593856811523438, 0.25856781005859375, -0.23901748657226562, -0.14513206481933594, 0.09320831298828125, -0.10161018371582031, 0.1986541748046875, 0.285736083984375, -0.15314483642578125, -0.5106964111328125, 0.1219482421875, -0.12837982177734375, 0.07281112670898438, -0.22718048095703125, -0.3969306945800781, 0.230743408203125, -0.13671875, -0.07708740234375, -0.341766357421875, 0.0570831298828125, 0.020006179809570312, 0.77606201171875, 0.01708984375, -0.92303466796875, -0.2459869384765625, 0.51824951171875, -2.044219970703125, 0.35678863525390625, 0.44484710693359375, -0.7316436767578125, -0.4250030517578125, 0.29570579528808594, 0.5161514282226562, -0.0417938232421875, -0.6192626953125, -0.38291168212890625, 0.37286376953125, -0.3297576904296875, -0.2732086181640625, 0.5729103088378906, -0.1821746826171875, -0.24915313720703125, 0.1011810302734375, 0.17659759521484375, 0.1105499267578125, -0.23479461669921875, 0.39475250244140625, 0.1437835693359375, 0.202850341796875, 0.19891357421875, 0.12826919555664062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000013.npy"}
{"epoch": 0.03823529411764706, "step": 14, "batch_size": 128, "mean": 0.0748911201953888, "std": 0.4016353189945221, "min": -0.9925384521484375, "p10": -0.3939701080322265, "median": 0.050685882568359375, "p90": 0.565362548828125, "max": 1.69110107421875, "pos_frac": 0.5859375, "sample": [0.20325469970703125, 0.6518974304199219, 0.5826873779296875, -0.6095504760742188, -0.1663970947265625, -0.3788604736328125, -0.9925384521484375, -0.000152587890625, 0.040252685546875, 0.606353759765625, -0.17955780029296875, -0.6070938110351562, 0.4036521911621094, 0.2266693115234375, -0.0643310546875, -0.12664794921875, -0.05159759521484375, -0.9097900390625, 0.7434921264648438, 0.7238693237304688, -0.0059967041015625, -0.0347900390625, -0.4292259216308594, -0.1563720703125, 0.10192108154296875, 0.5204887390136719, 0.18377685546875, -0.13624954223632812, -0.5722732543945312, 0.23791122436523438, 0.10162353515625, 0.19080352783203125, 0.14572906494140625, 0.38045692443847656, 0.11057281494140625, -0.519378662109375, -0.04781341552734375, 0.22789764404296875, -0.3031158447265625, 0.21063232421875, 0.11175537109375, -0.24264144897460938, 0.3348426818847656, -0.09142112731933594, 0.135955810546875, 0.5579376220703125, 0.22948074340820312, 0.22556304931640625, -0.21799087524414062, 0.00992584228515625, 0.0596466064453125, 0.8584671020507812, 0.056446075439453125, 0.29815673828125, -0.199920654296875, -0.06902885437011719, 0.14238739013671875, 0.04224967956542969, -0.13792800903320312, -0.23894500732421875, 0.1639862060546875, 0.25981903076171875, 0.5408096313476562, 0.0148773193359375, 0.3831520080566406, 0.35521697998046875, 0.030792236328125, -0.2320384979248047, -0.051116943359375, 0.4605712890625, 0.020734786987304688, -0.10143852233886719, -0.0668792724609375, 0.43701171875, 0.07577133178710938, -0.30503082275390625, 0.4600982666015625, -0.6978302001953125, 0.3865814208984375, 0.6217803955078125, 0.04578399658203125, 0.03145599365234375, 0.37027931213378906, 0.5209560394287109, 0.024694442749023438, 0.27355194091796875, 0.12567901611328125, -0.23035430908203125, 0.8196182250976562, 0.2733306884765625, 0.0555877685546875, -0.21954345703125, -0.6273651123046875, 0.20659637451171875, -0.27974700927734375, -0.0358428955078125, -0.26976776123046875, -0.2146453857421875, 0.7201080322265625, -0.262420654296875, 0.8746337890625, 0.3519134521484375, 0.2547454833984375, 0.18393325805664062, 0.235748291015625, -0.3009834289550781, -0.3270416259765625, 0.10018157958984375, 0.0319671630859375, 0.3724632263183594, 0.036289215087890625, -0.439483642578125, 0.4214935302734375, -0.6024990081787109, -0.02989959716796875, -0.2779998779296875, 1.69110107421875, 0.8385162353515625, -0.7834014892578125, -0.25848388671875, 0.09955978393554688, 0.8989715576171875, -0.47991943359375, -0.0043354034423828125, 0.50048828125, 0.168182373046875, -0.02425384521484375, -0.19779586791992188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000014.npy"}
{"epoch": 0.041176470588235294, "step": 15, "batch_size": 128, "mean": 0.09207721054553986, "std": 0.4155099391937256, "min": -1.0469512939453125, "p10": -0.38952007293701174, "median": 0.08037567138671875, "p90": 0.5812225341796874, "max": 1.77099609375, "pos_frac": 0.6015625, "sample": [-0.38938331604003906, -0.16505813598632812, 0.2202911376953125, -0.3161430358886719, 0.1685943603515625, 0.0547637939453125, -0.5173873901367188, 0.04389190673828125, 0.31851959228515625, 0.3323936462402344, -0.11292648315429688, 0.21603012084960938, -0.28533172607421875, 0.20799636840820312, -0.2137603759765625, -0.04917144775390625, -0.14746856689453125, -0.5113754272460938, 0.3584632873535156, 0.3497486114501953, -0.09625244140625, 0.6270751953125, -0.3649139404296875, 1.77099609375, 0.1554412841796875, -0.235198974609375, 0.14282608032226562, 0.0547637939453125, 0.4059600830078125, -0.03015899658203125, -0.21431350708007812, -0.0020599365234375, 0.539459228515625, 1.2611083984375, -0.03440093994140625, -0.14608001708984375, 0.1710357666015625, 0.0509033203125, -0.13597869873046875, 0.64569091796875, -0.3041839599609375, 0.16248321533203125, 0.013111114501953125, 0.29467010498046875, 0.735137939453125, -0.55169677734375, 0.3866767883300781, 0.20199203491210938, -1.0469512939453125, -0.9005126953125, 0.40540504455566406, 0.4824714660644531, -0.10049057006835938, 0.24561309814453125, 0.912261962890625, 0.0926513671875, 0.9126434326171875, -0.38983917236328125, 0.00687408447265625, 0.07818984985351562, -0.27001190185546875, 0.18755340576171875, -0.0949554443359375, 0.242156982421875, -0.621795654296875, 0.2529449462890625, -0.019550323486328125, -0.4221649169921875, 0.33234405517578125, 0.110260009765625, -0.06645965576171875, 0.5974578857421875, -0.221038818359375, 0.20015716552734375, -0.0931549072265625, 0.1272125244140625, 0.13348388671875, -0.53619384765625, 0.609832763671875, 0.2382049560546875, 0.006805419921875, 0.05387115478515625, 0.007915496826171875, -0.3516845703125, 0.9416046142578125, 0.3035430908203125, 0.696624755859375, -0.3792877197265625, 0.07493209838867188, 0.4242401123046875, -0.0066242218017578125, -0.03189849853515625, 0.2246990203857422, -0.04996490478515625, 0.08256149291992188, -0.43436431884765625, 0.2270946502685547, 0.14728546142578125, 0.5164680480957031, -0.31107330322265625, 0.1927165985107422, 0.12273597717285156, -0.77081298828125, -0.44355010986328125, 0.8554534912109375, -0.0008029937744140625, -0.3394775390625, 0.5742645263671875, -0.054351806640625, -0.3341197967529297, 0.44309234619140625, 0.07102394104003906, -0.11396026611328125, 0.51080322265625, 0.2949943542480469, 0.32568359375, 0.11186981201171875, 0.1558074951171875, -0.0689849853515625, 0.5619182586669922, 0.2497997283935547, 0.22212982177734375, 0.5635147094726562, 0.07198715209960938, -0.7650718688964844, -0.3705291748046875, 0.7607574462890625, 0.3648643493652344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000015.npy"}
{"epoch": 0.04411764705882353, "step": 16, "batch_size": 128, "mean": 0.10639196634292603, "std": 0.41042613983154297, "min": -1.0148468017578125, "p10": -0.30853309631347653, "median": 0.02309417724609375, "p90": 0.5963150024414062, "max": 1.4870147705078125, "pos_frac": 0.53125, "sample": [-0.04805183410644531, -0.3150138854980469, -0.06719970703125, 0.5495529174804688, -0.05345916748046875, 0.17066192626953125, -0.08176422119140625, 0.07851028442382812, -0.23157501220703125, -0.026485443115234375, -0.1682586669921875, -0.33055686950683594, 0.1905364990234375, -0.035552978515625, -0.14907073974609375, 0.4730072021484375, 0.5915679931640625, -0.868438720703125, 0.01929473876953125, 0.0954742431640625, 0.09371566772460938, -0.1458892822265625, 0.1436920166015625, -0.27874755859375, 0.3623847961425781, 0.22721099853515625, -0.24859237670898438, 0.539154052734375, 1.4870147705078125, 0.818603515625, -0.11527633666992188, -0.20085906982421875, 0.47447967529296875, 0.0186767578125, 0.066986083984375, 0.20237350463867188, 0.8551483154296875, 0.2452564239501953, 0.23180389404296875, -0.47435569763183594, 0.527557373046875, -0.065032958984375, 0.0675201416015625, -0.09389495849609375, 0.39697837829589844, -0.64251708984375, 0.82281494140625, 0.262939453125, 0.26145172119140625, 0.000946044921875, -0.00390625, -0.1276683807373047, 0.10962295532226562, -0.0784759521484375, 0.2274322509765625, 0.43471527099609375, -0.02869415283203125, -0.6716766357421875, 0.479949951171875, 0.1294403076171875, -0.1424713134765625, 0.8880691528320312, 0.7776641845703125, 0.02689361572265625, 0.19329071044921875, 0.5230522155761719, -0.127838134765625, 0.7716102600097656, 0.398162841796875, -0.1672821044921875, -0.15665435791015625, -0.2193603515625, -0.22732162475585938, -0.17101669311523438, -0.10480880737304688, -0.252685546875, -0.06870269775390625, 1.0691452026367188, -1.0148468017578125, -0.12949371337890625, 0.14799880981445312, 0.4963836669921875, 0.607391357421875, 0.36710357666015625, 0.3136444091796875, -0.1296234130859375, -0.03225135803222656, -0.17783355712890625, 0.381622314453125, -0.15530776977539062, 0.17293930053710938, 0.409515380859375, -0.015380859375, -0.404144287109375, -0.3859825134277344, -0.1266613006591797, 0.4759502410888672, 0.164276123046875, 0.030548095703125, 0.21010589599609375, 0.2679443359375, 0.28754615783691406, -0.427734375, 0.7479934692382812, -0.0667724609375, -0.5335235595703125, -0.13411331176757812, -0.15084075927734375, 0.165924072265625, 1.2309341430664062, 0.5756988525390625, 0.03302955627441406, 0.18145751953125, -0.18095970153808594, -0.000904083251953125, -0.066497802734375, 0.93646240234375, -0.692901611328125, -0.4122314453125, 0.4087677001953125, 0.1705780029296875, 0.4891510009765625, -0.0493927001953125, -0.305755615234375, -0.0598907470703125, 0.003204345703125, 1.1075057983398438, -0.22766876220703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000016.npy"}
{"epoch": 0.047058823529411764, "step": 17, "batch_size": 128, "mean": 0.1834641546010971, "std": 0.4796707034111023, "min": -0.7208709716796875, "p10": -0.3644733428955078, "median": 0.1534271240234375, "p90": 0.6780120849609375, "max": 3.073974609375, "pos_frac": 0.6640625, "sample": [0.19365310668945312, -0.3567085266113281, 0.16562271118164062, 0.4314537048339844, 0.3581695556640625, 0.5644378662109375, -0.486572265625, -0.2265625, 0.3321075439453125, -0.13400650024414062, 0.3393402099609375, -0.433074951171875, 0.1231689453125, -0.2326507568359375, -0.1008148193359375, 0.10881805419921875, 0.7833747863769531, 0.23975563049316406, 0.4672412872314453, 0.10079193115234375, 0.7352714538574219, 0.28173828125, -0.42024993896484375, -0.0102081298828125, 0.3095436096191406, 0.38542938232421875, -0.0516815185546875, 0.11551094055175781, -0.2657623291015625, -0.6709880828857422, 0.74127197265625, 0.14416122436523438, 0.4522895812988281, 0.21847152709960938, -0.1282501220703125, 0.37279510498046875, 0.2059173583984375, 0.8521537780761719, 0.2512664794921875, -0.4313621520996094, 1.2701492309570312, 0.14990997314453125, 0.139892578125, 0.37465667724609375, -0.4207916259765625, -0.5897216796875, 0.62420654296875, 0.38822174072265625, -0.24147796630859375, 0.6088714599609375, 0.23651885986328125, 0.18596649169921875, 0.0957794189453125, 0.0943756103515625, 0.547027587890625, 0.15694427490234375, -0.11155319213867188, -0.5167770385742188, 0.01493072509765625, 0.21591949462890625, 0.21881103515625, 0.4072418212890625, -0.24855422973632812, 0.575469970703125, 0.2089691162109375, -0.13454437255859375, 0.28218841552734375, -0.13034820556640625, 1.449951171875, 0.36617279052734375, 0.9855804443359375, 0.2421722412109375, 0.07304954528808594, -0.18749618530273438, 0.541595458984375, -0.38259124755859375, -0.2678070068359375, -0.4051666259765625, 0.5399093627929688, 0.41788673400878906, 0.15850830078125, 0.13452720642089844, 0.982147216796875, -0.5680770874023438, 0.05214118957519531, 3.073974609375, -0.007904052734375, 0.0492706298828125, 0.11544036865234375, 0.46295166015625, -0.24884033203125, 0.353546142578125, -0.04582977294921875, 0.3951835632324219, 0.89208984375, -0.03708648681640625, -0.16837692260742188, 0.1779937744140625, 0.27159881591796875, 0.020145416259765625, 0.9844512939453125, -0.1356964111328125, -0.5279541015625, 1.4781112670898438, 0.1584014892578125, -0.14630889892578125, 0.6979217529296875, -0.07178497314453125, -0.0389251708984375, 0.12255096435546875, -0.1738433837890625, 0.6694793701171875, 0.5541038513183594, 0.04888153076171875, -0.040599822998046875, -0.32772064208984375, 0.078338623046875, -0.21054649353027344, 0.22763633728027344, 0.21562576293945312, 0.26743316650390625, 0.08876419067382812, 0.14251708984375, 0.485382080078125, -0.7208709716796875, 0.381195068359375, -0.08439445495605469, 0.19745635986328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000017.npy"}
{"epoch": 0.05, "step": 18, "batch_size": 128, "mean": 0.213575080037117, "std": 0.38510578870773315, "min": -0.5474014282226562, "p10": -0.21368370056152342, "median": 0.15880584716796875, "p90": 0.8458282470703125, "max": 1.2717132568359375, "pos_frac": 0.734375, "sample": [0.18654251098632812, 0.001018524169921875, 1.198974609375, 0.9068183898925781, 0.06792831420898438, 0.13577651977539062, 0.564788818359375, -0.328277587890625, 0.03460693359375, 0.5041084289550781, 0.3343048095703125, -0.036487579345703125, 0.12780189514160156, 0.5727691650390625, 0.5849533081054688, -0.1316986083984375, 0.196319580078125, -0.0674285888671875, -0.22514724731445312, 0.29846954345703125, 0.33473968505859375, 0.03095245361328125, 1.251007080078125, 0.013092041015625, 0.15265655517578125, -0.06584930419921875, 0.24904251098632812, 0.0394134521484375, 0.08476066589355469, 0.874664306640625, 0.39835357666015625, -0.10321426391601562, 0.46427154541015625, 0.2407684326171875, 1.07769775390625, 0.05589485168457031, 0.0636749267578125, 0.25055694580078125, 0.4706764221191406, 0.2794036865234375, 0.30228424072265625, -0.10186004638671875, -0.30394744873046875, 0.07330322265625, 0.466400146484375, 0.47338104248046875, 0.9204254150390625, -0.3606910705566406, 0.10143661499023438, -0.0272216796875, -0.17716217041015625, 0.16979217529296875, 0.05162811279296875, 0.21855735778808594, 0.744384765625, 0.34291839599609375, -0.3401031494140625, -0.1345844268798828, 0.25214385986328125, -0.11154937744140625, 0.23319435119628906, 0.5108184814453125, 0.25482940673828125, 0.002460479736328125, 0.34838104248046875, -0.004703521728515625, 0.242919921875, 0.00469207763671875, 0.0045623779296875, 0.2684459686279297, 0.316986083984375, -0.23723602294921875, 0.14266395568847656, -0.5438156127929688, 0.6685256958007812, 0.18787384033203125, -0.29132080078125, -0.17133331298828125, -0.208770751953125, 0.04534721374511719, -0.3558998107910156, 0.842742919921875, -0.06938362121582031, 0.378936767578125, 0.095672607421875, 0.3977813720703125, 0.85302734375, 0.00919342041015625, 0.16495513916015625, -0.5474014282226562, 0.9376449584960938, 0.01898193359375, 0.10166168212890625, -0.07976531982421875, 0.37880706787109375, -0.1180419921875, -0.5096988677978516, 0.9875946044921875, 0.31673431396484375, 0.19049644470214844, 0.3154296875, 0.2906970977783203, 0.056926727294921875, 0.573638916015625, 0.604736328125, -0.16460418701171875, 1.1039810180664062, 1.0596694946289062, 1.2717132568359375, -0.01972198486328125, 0.08218002319335938, -0.34479522705078125, -0.11610031127929688, 0.5390167236328125, 1.0462226867675781, 0.2939109802246094, 0.0843658447265625, 0.20111083984375, -0.3443336486816406, 0.1841278076171875, 0.4144439697265625, -0.08264350891113281, 0.5481719970703125, 0.06899833679199219, -0.20528793334960938, 0.1470947265625, 0.03464508056640625, 0.27721405029296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000018.npy"}
{"epoch": 0.052941176470588235, "step": 19, "batch_size": 128, "mean": 0.2835448980331421, "std": 0.601743221282959, "min": -1.4133758544921875, "p10": -0.2724441528320312, "median": 0.17012977600097656, "p90": 1.1048851013183594, "max": 2.696685791015625, "pos_frac": 0.6640625, "sample": [0.5216293334960938, -0.034694671630859375, -0.0660247802734375, 0.40238189697265625, 0.16222381591796875, -0.30865478515625, -0.33007049560546875, 0.8502044677734375, 0.5119991302490234, 0.0223388671875, 0.19187164306640625, 1.895904541015625, 0.01041412353515625, 0.4974822998046875, 0.8936080932617188, 0.27386474609375, 0.07598114013671875, 0.13431549072265625, 0.28155517578125, -0.21322250366210938, -0.1916370391845703, 0.8577423095703125, 0.8199234008789062, 1.440460205078125, 0.4832305908203125, -0.22616958618164062, 1.251800537109375, 0.162689208984375, 0.8388175964355469, -0.0045166015625, 0.40151214599609375, 0.75335693359375, -0.08257293701171875, -0.08230400085449219, 0.330963134765625, 1.357025146484375, -0.001743316650390625, -1.108154296875, 0.0088653564453125, -0.4251518249511719, 0.5665512084960938, -0.12012481689453125, -0.03955841064453125, 0.19112014770507812, 1.3204421997070312, 0.06813430786132812, -0.07828903198242188, 0.0107421875, 0.24738693237304688, 1.8317337036132812, -0.1182403564453125, -0.2655181884765625, 0.214996337890625, -0.14871597290039062, -0.11421966552734375, 0.00128173828125, 0.4867668151855469, -0.864410400390625, -0.16243934631347656, -0.243133544921875, 0.46775054931640625, 1.095489501953125, 0.1850910186767578, 0.1008453369140625, -0.19808578491210938, 2.696685791015625, 1.244384765625, 0.7556877136230469, 0.4192962646484375, 0.06668472290039062, 0.429107666015625, -1.4133758544921875, -0.068267822265625, 0.11729812622070312, -0.10706138610839844, 0.5123138427734375, -0.1394195556640625, -0.561676025390625, 0.7302417755126953, -0.09305763244628906, -0.1299591064453125, 0.1589202880859375, 0.43541717529296875, -0.17290115356445312, 0.14286041259765625, 1.36700439453125, 0.17757034301757812, -0.5815505981445312, 0.08752822875976562, 0.9364013671875, -1.1936264038085938, 0.5526161193847656, 0.22429656982421875, 0.5902175903320312, 0.3524971008300781, 0.9617843627929688, -0.43064117431640625, 1.3914794921875, 0.1314544677734375, 0.9076061248779297, 1.2491607666015625, 0.7794265747070312, 1.1268081665039062, -0.06484413146972656, -0.288604736328125, -0.4414825439453125, 0.720947265625, -0.0327606201171875, 0.862701416015625, 0.0464324951171875, 0.147735595703125, 0.2016143798828125, 0.09943389892578125, 0.73492431640625, 0.339324951171875, 0.3583221435546875, 0.3328399658203125, -0.6256027221679688, -0.07722282409667969, -0.12233352661132812, 0.46559906005859375, 1.146209716796875, 0.8742141723632812, 0.1072845458984375, 0.2976531982421875, -0.03867340087890625, 0.257476806640625, 0.5465011596679688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000019.npy"}
{"epoch": 0.05588235294117647, "step": 20, "batch_size": 128, "mean": 0.3201494812965393, "std": 0.47352734208106995, "min": -1.27490234375, "p10": -0.20035896301269532, "median": 0.2619590759277344, "p90": 0.9865406036376952, "max": 1.513763427734375, "pos_frac": 0.75, "sample": [0.13057327270507812, 0.192352294921875, 0.8727569580078125, 0.7258071899414062, 0.0175323486328125, 0.15103912353515625, 0.1326904296875, 0.4669151306152344, 0.6355171203613281, 0.058685302734375, 0.1544189453125, -0.7010650634765625, 0.13903045654296875, 1.0947799682617188, 0.7172813415527344, 0.27770042419433594, 0.8928794860839844, 0.9607124328613281, -1.27490234375, 0.2149829864501953, 0.276397705078125, 0.43560028076171875, -0.1877269744873047, 0.5106964111328125, 1.2027969360351562, -0.03983306884765625, 0.7742691040039062, 1.226776123046875, 0.49707794189453125, 1.38623046875, 0.08159255981445312, 0.0301055908203125, 0.083251953125, 0.2516803741455078, 0.00353240966796875, 0.13934326171875, 0.1823883056640625, -0.3257942199707031, 1.0555782318115234, 0.6618614196777344, 0.5711612701416016, 0.26096343994140625, 0.43708038330078125, 0.31140708923339844, -0.07623291015625, 0.161865234375, 0.20465469360351562, 0.3709526062011719, 0.1911487579345703, 0.6304473876953125, 0.9575881958007812, 0.30521392822265625, 0.17775344848632812, -0.07379150390625, 0.8336639404296875, 0.1532440185546875, 0.523193359375, 0.36268043518066406, -0.28667449951171875, 0.4459552764892578, -0.13600730895996094, -0.0869293212890625, 0.2776050567626953, 0.5958137512207031, -0.203277587890625, 0.7060546875, -0.14236831665039062, -0.1158294677734375, 0.287109375, -0.03290557861328125, 0.14342498779296875, 0.42308807373046875, -0.12306594848632812, 1.169769287109375, 0.06279563903808594, 1.01556396484375, 0.598480224609375, -0.14313125610351562, -0.2294769287109375, 0.6883087158203125, 0.0112762451171875, -0.178924560546875, 0.07940101623535156, 1.4955596923828125, -0.19910812377929688, 0.610443115234375, -0.06700897216796875, 0.9741020202636719, 0.12157821655273438, 0.4529457092285156, -0.2430419921875, -0.001251220703125, 0.6594085693359375, 0.04283905029296875, -0.4634246826171875, 0.18267440795898438, -0.3004627227783203, 0.3760490417480469, 0.7771377563476562, 1.513763427734375, 0.09686279296875, 1.3163223266601562, 0.3724994659423828, -0.35573577880859375, 0.1226043701171875, 0.00029754638671875, 0.6599197387695312, 0.39897918701171875, -0.28385162353515625, -0.0922698974609375, 1.0405006408691406, -0.09716224670410156, 1.1532821655273438, 0.6629467010498047, 0.3289070129394531, 1.4635467529296875, -0.03674125671386719, 0.3153266906738281, 0.39897918701171875, 0.790924072265625, 0.6092529296875, -0.24382781982421875, 0.5804901123046875, -0.26747894287109375, 0.2629547119140625, 0.37825775146484375, 0.3300895690917969, -0.093505859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000020.npy"}
{"epoch": 0.058823529411764705, "step": 21, "batch_size": 128, "mean": 0.5072986483573914, "std": 0.6399627327919006, "min": -0.69708251953125, "p10": -0.25793952941894527, "median": 0.4616241455078125, "p90": 1.3460189819335937, "max": 2.5855865478515625, "pos_frac": 0.7421875, "sample": [0.350006103515625, -0.2735099792480469, 1.2066116333007812, 0.37425994873046875, 0.6289520263671875, 0.278228759765625, 1.5655670166015625, 0.8469352722167969, -0.39876556396484375, 0.4487152099609375, -0.16375732421875, -0.69708251953125, -0.20796966552734375, 0.9910163879394531, -0.03472900390625, -0.13032150268554688, 0.972564697265625, 0.4626502990722656, 0.3853282928466797, 0.7376480102539062, 1.7226943969726562, 0.7716217041015625, -0.15081787109375, 1.2175750732421875, 0.4401836395263672, 0.04305458068847656, 0.80657958984375, 0.1190643310546875, -0.3658447265625, -0.07381439208984375, 0.10030746459960938, 0.8940525054931641, -0.287567138671875, 0.5416946411132812, 1.5371475219726562, -0.4662322998046875, 0.657562255859375, 0.321563720703125, 0.4492149353027344, -0.17731475830078125, 1.6866302490234375, 0.5854759216308594, 0.5136871337890625, -0.169158935546875, -0.038089752197265625, 0.2516822814941406, 1.0509719848632812, -0.2512664794921875, 1.1656875610351562, -0.3808879852294922, 0.47222900390625, 0.9290618896484375, -0.11476898193359375, 0.820343017578125, 0.7813568115234375, 0.10279083251953125, 0.8758354187011719, 1.6944389343261719, 0.4934234619140625, -0.3309211730957031, -0.5506210327148438, 0.2271251678466797, 0.5004615783691406, 1.90167236328125, 0.7508773803710938, 0.31769561767578125, -0.085540771484375, 2.5855865478515625, 2.055389404296875, 0.6174163818359375, 0.6029891967773438, 2.3615875244140625, 1.1507797241210938, -0.1762847900390625, 1.1289520263671875, 0.749237060546875, 0.33111572265625, 0.15806007385253906, 0.9407806396484375, -0.3419532775878906, 0.13588905334472656, 1.316436767578125, -0.04351043701171875, 0.9613800048828125, -0.5043907165527344, -0.2983512878417969, 1.5406494140625, 0.3048362731933594, 0.18049240112304688, -0.119720458984375, 0.6721839904785156, 0.9495086669921875, -0.239898681640625, 0.3884239196777344, 0.2723236083984375, -0.4402313232421875, 0.5005111694335938, 1.3830718994140625, 0.1590423583984375, 0.8501548767089844, 0.7788543701171875, 0.99530029296875, 0.31793975830078125, 0.744537353515625, 0.4752922058105469, 0.4622344970703125, -0.0385284423828125, 0.4788970947265625, 0.3591461181640625, -0.031219482421875, 0.6324501037597656, 1.33013916015625, 0.6469573974609375, -0.0627899169921875, 0.06513214111328125, 1.0213775634765625, 0.049816131591796875, 0.26915740966796875, 0.24267005920410156, -0.11061859130859375, 0.11262893676757812, 0.6768798828125, 1.58184814453125, 0.8129425048828125, 1.0492935180664062, 1.8750839233398438, 0.4610137939453125, 0.962066650390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000021.npy"}
{"epoch": 0.061764705882352944, "step": 22, "batch_size": 128, "mean": 0.5777749419212341, "std": 0.6328703761100769, "min": -1.7684860229492188, "p10": -0.05478420257568358, "median": 0.4982032775878906, "p90": 1.350177001953125, "max": 2.4744186401367188, "pos_frac": 0.8671875, "sample": [-0.01320648193359375, 0.1480560302734375, 0.0058135986328125, 0.3948211669921875, 0.3028831481933594, 0.6941375732421875, -0.10335540771484375, 1.033935546875, 0.9332962036132812, 0.709381103515625, -0.10192108154296875, -0.05081367492675781, 0.93359375, 0.3973579406738281, 0.8738555908203125, 1.3325309753417969, 0.2037811279296875, 1.088470458984375, -0.23834991455078125, 0.9389877319335938, 0.7434616088867188, 0.18465232849121094, 1.3383712768554688, 0.516510009765625, 0.617919921875, 0.8030242919921875, 0.7394142150878906, 0.9442062377929688, 0.10693740844726562, 0.0070590972900390625, 0.888671875, 2.4302978515625, 0.4274749755859375, 1.0658721923828125, 0.8519515991210938, 0.042388916015625, 0.5169506072998047, 2.1126976013183594, 0.4770355224609375, 0.10174942016601562, 0.3099479675292969, -0.2953605651855469, -0.17383575439453125, 0.45246124267578125, 0.16309356689453125, 0.8199005126953125, 0.23430633544921875, 0.7733535766601562, 0.9553070068359375, 0.3814888000488281, 1.339630126953125, 0.40293121337890625, 0.29862213134765625, -1.7684860229492188, 0.26601409912109375, 0.5118484497070312, 1.5102386474609375, 1.6986751556396484, -0.13860130310058594, 0.19473838806152344, -0.597869873046875, 0.5763015747070312, 0.7847900390625, 0.17791175842285156, 0.7935981750488281, 0.2065277099609375, 0.6016464233398438, -0.12147140502929688, 1.7769393920898438, 0.18642425537109375, 0.997802734375, -0.02213287353515625, 0.6319313049316406, 0.81744384765625, 1.4087028503417969, 0.0209503173828125, 0.06416130065917969, 0.9712982177734375, 0.8597640991210938, 2.4744186401367188, 0.3273639678955078, 0.01381683349609375, 0.4324760437011719, 0.471527099609375, 0.6720504760742188, 0.5249614715576172, 0.487274169921875, 1.149688720703125, 0.1618499755859375, 0.09824371337890625, 0.058380126953125, 0.6229095458984375, 0.9350738525390625, 1.2936058044433594, 0.11558151245117188, 2.41259765625, 0.5541572570800781, 0.4312286376953125, 1.0045700073242188, 0.17705154418945312, 1.374786376953125, 1.747161865234375, 1.7071533203125, -0.5012969970703125, 0.26397705078125, 1.133392333984375, -0.06404876708984375, -0.7797164916992188, 1.5603866577148438, 0.7880725860595703, 0.3355865478515625, 0.3665027618408203, 0.3959808349609375, 0.8846817016601562, 1.6964187622070312, 1.0196609497070312, 0.25565338134765625, 1.1088638305664062, 1.0506439208984375, 0.5091323852539062, 0.13460540771484375, 0.5611343383789062, 0.6330757141113281, -0.14500045776367188, -0.047611236572265625, 0.20743370056152344, 0.46375274658203125, 0.40509033203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000022.npy"}
{"epoch": 0.06470588235294118, "step": 23, "batch_size": 128, "mean": 0.49932557344436646, "std": 0.8443185091018677, "min": -2.459014892578125, "p10": -0.326755142211914, "median": 0.41878509521484375, "p90": 1.555519104003906, "max": 3.881591796875, "pos_frac": 0.765625, "sample": [0.44995880126953125, 0.366241455078125, -0.6723556518554688, 1.5338287353515625, -0.02396392822265625, 0.7771759033203125, 0.4383544921875, 0.6161956787109375, -0.11772918701171875, 0.525787353515625, -0.07424354553222656, 0.6373062133789062, 0.08887481689453125, -0.09437370300292969, 0.539794921875, 0.06916046142578125, 0.04595184326171875, 1.111572265625, -0.25785064697265625, -0.5592498779296875, 1.73931884765625, 0.832061767578125, 0.3752021789550781, 0.0126495361328125, 0.4876823425292969, -0.3063926696777344, 0.7040271759033203, 0.42609405517578125, 0.6731491088867188, 0.2839164733886719, 0.4982795715332031, 0.9990081787109375, 0.560638427734375, 0.574188232421875, 0.10683441162109375, 1.7141799926757812, 0.15145111083984375, 0.2836036682128906, -0.049457550048828125, 0.058872222900390625, 0.00818634033203125, 0.18662643432617188, 0.6992874145507812, 0.8743782043457031, -0.12810516357421875, 0.25498390197753906, 2.3285980224609375, 0.1042327880859375, 0.2619781494140625, 0.22658538818359375, -1.3353042602539062, 0.8745651245117188, 0.1692352294921875, 2.413970947265625, 0.016387939453125, 1.6820144653320312, -0.686767578125, 0.59857177734375, 1.215414047241211, 0.9952621459960938, 0.10808563232421875, 0.5563201904296875, 0.1363506317138672, 0.41147613525390625, -0.011867523193359375, -0.08697509765625, 0.5791702270507812, 1.48223876953125, 0.9171981811523438, -0.9824752807617188, 0.48535919189453125, 0.30503082275390625, -0.11823654174804688, -0.3755912780761719, 1.4269523620605469, 3.549591064453125, -0.03816986083984375, 0.6484832763671875, 1.59808349609375, 1.6199951171875, -2.459014892578125, -0.0243682861328125, 1.1209869384765625, 0.007568359375, 0.9677543640136719, 0.3970146179199219, 1.2630767822265625, 0.4464263916015625, 0.18301010131835938, -0.41243743896484375, 0.5438232421875, 2.3966522216796875, 0.9013824462890625, -0.733123779296875, 0.6519737243652344, 2.0855178833007812, 2.3114395141601562, 0.9700164794921875, 0.2543754577636719, 0.6425228118896484, 0.23406982421875, 0.5821914672851562, 1.050567626953125, -0.7912940979003906, -0.37803077697753906, 0.6707000732421875, 1.5372772216796875, -0.11926841735839844, 0.5192909240722656, 0.017004013061523438, 0.7528152465820312, 0.4008159637451172, -0.1807079315185547, -0.374267578125, 0.9094352722167969, 0.3252410888671875, 3.881591796875, -0.6364059448242188, 0.565948486328125, 0.776153564453125, -0.09299850463867188, 0.6089019775390625, 0.9555130004882812, 2.1241912841796875, 0.18939208984375, -0.00540924072265625, 0.13300323486328125, 0.246490478515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000023.npy"}
{"epoch": 0.06764705882352941, "step": 24, "batch_size": 128, "mean": 0.6397730112075806, "std": 0.95405113697052, "min": -1.573516845703125, "p10": -0.2521171569824219, "median": 0.5143566131591797, "p90": 1.6613800048828122, "max": 5.60723876953125, "pos_frac": 0.8125, "sample": [1.512237548828125, 1.8089218139648438, -0.322998046875, 0.22402572631835938, 1.1842575073242188, -0.5690155029296875, 0.46660614013671875, 0.18282318115234375, 0.3282623291015625, 0.3955230712890625, 0.2369842529296875, 1.1210784912109375, 0.8507614135742188, 0.799224853515625, -0.29877281188964844, 1.0860519409179688, 1.4708786010742188, 0.0819549560546875, -0.730224609375, 0.2696647644042969, -0.26399993896484375, 0.07924652099609375, 1.242584228515625, 1.81927490234375, -0.887939453125, -1.111053466796875, 0.5218887329101562, 0.17035675048828125, 0.276580810546875, 0.6143150329589844, 0.265289306640625, 0.5943222045898438, 1.048065185546875, 0.5839767456054688, -0.21134185791015625, 1.05279541015625, 0.9702873229980469, 0.7746429443359375, 0.5768699645996094, 0.4805259704589844, 1.7386627197265625, 0.21371841430664062, -0.5850448608398438, -0.0119476318359375, 1.642913818359375, 0.43737030029296875, 0.05460929870605469, 0.7301979064941406, 3.9271087646484375, 0.5625114440917969, 0.49027252197265625, 0.982818603515625, 1.8013839721679688, 1.62261962890625, 0.73974609375, 0.3670310974121094, 1.1772384643554688, 0.7880935668945312, 1.7044677734375, 0.5068244934082031, 0.612091064453125, -0.1319122314453125, -0.8897781372070312, 0.08403778076171875, 1.1787185668945312, 0.7756004333496094, 1.877166748046875, 1.1942291259765625, 1.8762893676757812, 0.9695701599121094, -0.08160400390625, 0.02693939208984375, 2.97161865234375, 0.1406097412109375, 0.5726699829101562, 0.2606086730957031, -0.646484375, 0.8316268920898438, 0.3216686248779297, 0.24280548095703125, -0.005584716796875, 0.2780570983886719, 0.694427490234375, 0.6567153930664062, 0.63812255859375, 0.7004470825195312, 2.71966552734375, 0.10408782958984375, -0.2905387878417969, 1.39300537109375, 0.7148780822753906, 0.8502960205078125, 0.0386810302734375, 5.60723876953125, 0.04566192626953125, -0.1708984375, 0.3094482421875, 0.5741119384765625, 0.4276580810546875, 2.3736419677734375, 0.6093978881835938, 0.2601318359375, -0.2926788330078125, 0.09772872924804688, 1.1377944946289062, -1.573516845703125, 0.7155609130859375, -0.18190765380859375, -0.015106201171875, -0.2470245361328125, 0.22991180419921875, 0.6502113342285156, -0.06679344177246094, 0.8569889068603516, 0.25380706787109375, 0.7696762084960938, 0.26971435546875, 1.0137042999267578, 1.2469825744628906, 0.3863372802734375, 0.47527503967285156, 0.5023136138916016, 0.8514785766601562, 0.1392669677734375, 4.43121337890625, -0.008213043212890625, 0.6984138488769531, 1.2491493225097656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000024.npy"}
{"epoch": 0.07058823529411765, "step": 25, "batch_size": 128, "mean": 0.8033162355422974, "std": 0.9725300669670105, "min": -1.01812744140625, "p10": -0.3310817718505859, "median": 0.7132186889648438, "p90": 2.0990226745605467, "max": 3.7619781494140625, "pos_frac": 0.7734375, "sample": [0.3221244812011719, 0.024463653564453125, 0.5170345306396484, 1.1236724853515625, 0.8513660430908203, 1.9072036743164062, 1.0305023193359375, 0.4457740783691406, 0.7245635986328125, -0.11905670166015625, 0.5197296142578125, -0.1166534423828125, 1.2838802337646484, -0.8982353210449219, 2.3453903198242188, -0.6878604888916016, 0.4923095703125, 0.6434478759765625, -0.43134307861328125, -0.46689605712890625, 0.24186325073242188, 2.1606369018554688, -0.35176849365234375, 2.0726165771484375, 2.384918212890625, 1.7012786865234375, 1.1488189697265625, 0.36215972900390625, 0.9318084716796875, 0.8747673034667969, 1.725921630859375, 0.2138519287109375, 0.210235595703125, -0.020904541015625, -0.14667892456054688, 1.2451457977294922, -0.40651702880859375, 1.5323333740234375, 0.6280593872070312, 1.0221176147460938, 2.63336181640625, 2.490264892578125, -0.04116058349609375, 1.68792724609375, 0.5732269287109375, 0.2912254333496094, 1.4789543151855469, 1.290435791015625, 0.9147758483886719, -0.029275894165039062, 0.13470458984375, 1.3215694427490234, 1.3369865417480469, 3.7619781494140625, 0.49192047119140625, 1.994537353515625, -0.2357177734375, 0.50714111328125, -0.0128173828125, 0.39395904541015625, 1.0220184326171875, 0.915863037109375, 1.5203170776367188, 0.488922119140625, 3.160919189453125, 1.3454742431640625, 1.4596405029296875, 1.4504928588867188, -0.3089408874511719, 1.2434921264648438, 0.14191818237304688, -0.14003753662109375, 2.252105712890625, 1.2565269470214844, 1.524200439453125, 0.30872344970703125, -0.2447509765625, 1.4815139770507812, 2.196807861328125, 1.3103256225585938, 2.6364974975585938, 3.35321044921875, 1.3198013305664062, 0.17327880859375, -0.399627685546875, 1.2364654541015625, 0.1921977996826172, 1.4310531616210938, 0.36932373046875, -1.01812744140625, 0.88677978515625, 0.9163627624511719, 0.9660224914550781, -0.54681396484375, 0.7696113586425781, 1.4219512939453125, -0.213531494140625, 0.3036766052246094, -0.8019447326660156, 1.7197685241699219, 0.8721847534179688, 2.848419189453125, -0.0402374267578125, 0.701873779296875, 0.056346893310546875, 0.9070320129394531, 0.6684341430664062, 0.36899566650390625, 0.8388919830322266, 1.5030746459960938, 0.5704727172851562, 0.8287162780761719, -0.3222160339355469, -0.2721977233886719, 1.3964004516601562, 0.0798492431640625, 1.2024002075195312, 0.8641777038574219, -0.3032073974609375, 0.6019363403320312, 0.11446380615234375, -0.7581710815429688, 3.724822998046875, -0.5414657592773438, 2.0402297973632812, 0.302947998046875, -0.7050933837890625, 0.14783477783203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000025.npy"}
{"epoch": 0.07352941176470588, "step": 26, "batch_size": 128, "mean": 1.0894641876220703, "std": 1.28699791431427, "min": -2.0165863037109375, "p10": -0.12041950225830078, "median": 0.6921710968017578, "p90": 3.138469696044922, "max": 5.1049652099609375, "pos_frac": 0.8203125, "sample": [1.9703865051269531, 1.2398529052734375, -0.12040519714355469, 0.07226181030273438, 1.2730484008789062, -0.7498626708984375, 1.38446044921875, -0.15664100646972656, -0.16715621948242188, 0.5206737518310547, 2.4478759765625, -0.8147048950195312, 0.3090057373046875, -0.03192138671875, -0.13616943359375, 0.99005126953125, 0.6136665344238281, 1.0246467590332031, 0.20253753662109375, 4.7912750244140625, -0.11379623413085938, 3.3263702392578125, 1.512176513671875, 1.0521087646484375, -0.4652290344238281, 3.0799560546875, 4.0867156982421875, 0.5813369750976562, 0.4083976745605469, 0.380584716796875, -0.05098152160644531, 1.1971702575683594, -0.120452880859375, -0.08074951171875, 0.70501708984375, 2.66668701171875, 0.5067863464355469, 0.5950698852539062, 0.04193878173828125, 2.3376617431640625, -0.1597747802734375, 0.4432525634765625, 0.21626853942871094, 0.9368133544921875, -0.001811981201171875, -0.17888641357421875, 3.450347900390625, 1.4113826751708984, 0.2291107177734375, 0.29193115234375, 2.1395435333251953, 5.1049652099609375, 1.0073204040527344, 0.3230571746826172, 0.7362060546875, -0.9193038940429688, 0.16841697692871094, 0.16715621948242188, 0.5402984619140625, -2.0165863037109375, -0.07238006591796875, 0.6513824462890625, 1.7098884582519531, 0.6556396484375, 0.10730743408203125, 3.33416748046875, 0.6793251037597656, 0.488800048828125, 1.7828216552734375, 2.11065673828125, 1.2707176208496094, 1.6100234985351562, 2.010894775390625, 0.6090431213378906, 2.4931182861328125, 2.3000564575195312, 0.6301441192626953, -0.01934814453125, 2.1325302124023438, 0.3831520080566406, 4.973136901855469, 0.4613685607910156, 1.0012245178222656, 1.6340103149414062, 1.5719375610351562, 1.2908782958984375, 0.8713912963867188, 0.76470947265625, 2.7219390869140625, 0.19862937927246094, -0.08559799194335938, 1.1332244873046875, 2.0526885986328125, 0.3667640686035156, 1.00140380859375, 3.1362152099609375, 1.356536865234375, 3.7636566162109375, 0.6016387939453125, 1.4642372131347656, 0.3899383544921875, 1.7302665710449219, 1.2754859924316406, 2.3424034118652344, -0.22795677185058594, 1.0278778076171875, 0.6411972045898438, 0.5116024017333984, 0.7107944488525391, 4.725730895996094, 3.6754302978515625, 3.2250442504882812, -0.23551559448242188, 0.38382720947265625, 0.573974609375, 0.18645477294921875, 0.8442115783691406, 1.9611663818359375, 0.3114166259765625, 3.273834228515625, 0.915985107421875, 0.4394989013671875, 3.1437301635742188, -0.041370391845703125, 1.0831375122070312, 0.2874107360839844, 0.23787879943847656, 0.7106571197509766], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000026.npy"}
{"epoch": 0.07647058823529412, "step": 27, "batch_size": 128, "mean": 1.3742213249206543, "std": 1.588990569114685, "min": -0.9681167602539062, "p10": -0.1356389999389648, "median": 1.0255441665649414, "p90": 3.6612472534179688, "max": 9.125152587890625, "pos_frac": 0.859375, "sample": [-0.69879150390625, 2.0229415893554688, 0.483428955078125, 0.906585693359375, 0.16385269165039062, 0.7359943389892578, 3.0286941528320312, 0.0198822021484375, 0.8631134033203125, 4.6827392578125, 0.47917938232421875, 3.2558822631835938, 0.6786651611328125, 0.1412811279296875, 5.491188049316406, 1.1538543701171875, 3.181629180908203, 2.3893508911132812, 1.1070842742919922, 0.08917236328125, 0.076690673828125, 0.18357086181640625, 0.05714225769042969, 0.8008136749267578, 1.428466796875, 0.75885009765625, 0.5366935729980469, 0.7958049774169922, 0.229339599609375, 0.5092010498046875, 0.0370635986328125, 1.8282737731933594, 1.3966617584228516, -0.0154266357421875, 1.7743072509765625, 9.125152587890625, -0.12227630615234375, 1.3658065795898438, 1.5892181396484375, 2.8643035888671875, 1.7961807250976562, -0.32745361328125, 4.4732513427734375, -0.47197723388671875, 1.5198516845703125, 0.6619110107421875, 4.6110992431640625, 0.5085601806640625, -0.18536376953125, 0.6578369140625, 0.15662384033203125, 1.4549713134765625, -0.32877349853515625, 2.0021743774414062, 4.453887939453125, -0.939208984375, 0.24738502502441406, 4.325431823730469, 0.5742416381835938, 5.4483642578125, 1.7713241577148438, 0.9775390625, 4.2041015625, -0.27610015869140625, 3.0268173217773438, 1.2986297607421875, 1.1091880798339844, 0.3603096008300781, 0.17393875122070312, 1.0779571533203125, -0.3967742919921875, 0.011669158935546875, 0.058013916015625, -0.03697776794433594, 1.7051315307617188, 1.6595916748046875, 0.0195465087890625, -0.35755157470703125, 1.7198562622070312, 0.4552154541015625, 2.5075531005859375, 0.6050376892089844, -0.23201751708984375, 0.9643440246582031, 3.409271240234375, 1.384246826171875, 1.7026824951171875, 1.720306396484375, -0.013652801513671875, 1.81964111328125, 0.8102951049804688, 0.5345802307128906, 2.5968551635742188, 0.6083602905273438, 0.927459716796875, 3.7637939453125, 0.9694442749023438, 3.70391845703125, -0.0588836669921875, 1.732330322265625, 1.7639083862304688, 0.6285285949707031, 1.2103118896484375, 2.465862274169922, 1.5547637939453125, -0.9681167602539062, 1.0735492706298828, 1.9253387451171875, 1.3317794799804688, -0.33040618896484375, 0.4407157897949219, 0.38287353515625, 1.3972797393798828, 2.3133621215820312, 6.082000732421875, 2.472076416015625, -0.16681861877441406, 1.274932861328125, 0.07438468933105469, 2.1339263916015625, 1.1612720489501953, 1.7094497680664062, 1.214263916015625, 3.9210357666015625, 0.17804527282714844, 2.4053955078125, 0.5562801361083984, 3.6429595947265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000027.npy"}
{"epoch": 0.07941176470588235, "step": 28, "batch_size": 128, "mean": 1.3809640407562256, "std": 1.729494333267212, "min": -2.621795654296875, "p10": -0.5640609741210938, "median": 1.0227794647216797, "p90": 3.883592224121094, "max": 6.278663635253906, "pos_frac": 0.8203125, "sample": [3.5181427001953125, 2.999774932861328, 1.7230911254882812, -0.919708251953125, 5.8598785400390625, -0.6308517456054688, 1.722076416015625, 1.1801109313964844, 1.3649444580078125, 0.34674835205078125, 5.280223846435547, 0.4632987976074219, 4.3412322998046875, 0.06781005859375, -0.33683013916015625, 1.6617965698242188, 0.9662265777587891, 0.14009475708007812, 2.571685791015625, 0.5267372131347656, 0.3734283447265625, 1.2294235229492188, -1.276519775390625, -0.6429290771484375, 0.17395401000976562, -2.10919189453125, 1.0851612091064453, 3.407440185546875, -0.3552284240722656, 1.0324821472167969, 2.2188491821289062, 1.2270565032958984, 3.0713958740234375, -0.260345458984375, 0.2740364074707031, 0.8974933624267578, 0.49466705322265625, 1.2831649780273438, 4.7197265625, 3.5391845703125, 4.12469482421875, 0.5124225616455078, 1.94097900390625, 0.26016998291015625, 0.09111404418945312, 2.0412635803222656, -0.9809722900390625, 1.825897216796875, 1.3864631652832031, 1.4716110229492188, 0.20992279052734375, 4.0396881103515625, 2.1894798278808594, -0.5629692077636719, -0.5666084289550781, 3.2259674072265625, -0.11117744445800781, 0.5672454833984375, 1.7455902099609375, 3.4398956298828125, 2.1292800903320312, -0.3549041748046875, 1.2665367126464844, 3.9672698974609375, 0.5157928466796875, 0.8373870849609375, -2.621795654296875, 3.288837432861328, -0.19013404846191406, -0.32495880126953125, 6.278663635253906, 0.796630859375, 0.5517578125, 1.1036758422851562, 3.4023704528808594, 0.9095077514648438, 0.2578392028808594, 3.3515777587890625, -1.5785446166992188, 5.183319091796875, 1.8095321655273438, 0.3550739288330078, 1.6001091003417969, 3.275360107421875, -0.7489280700683594, 3.319549560546875, 1.8149871826171875, 1.1231231689453125, 0.199859619140625, 3.3117828369140625, -0.030618667602539062, -0.20129013061523438, 0.4085693359375, 3.8712005615234375, -0.6393585205078125, 0.6260147094726562, -0.783660888671875, 0.14844894409179688, 1.0114364624023438, 0.8417510986328125, 5.0720672607421875, 3.912506103515625, 1.1760711669921875, 0.5057621002197266, 0.5919342041015625, 1.711700439453125, 0.524261474609375, 0.06475067138671875, 1.8764457702636719, 1.3574771881103516, 0.9426803588867188, 3.8591995239257812, 0.5316486358642578, 0.8415985107421875, 1.6167678833007812, 5.9652099609375, 1.0425567626953125, 4.067314147949219, 0.5027790069580078, 0.521820068359375, 0.0391082763671875, 2.94329833984375, 2.332874298095703, -0.8918685913085938, 1.9817352294921875, 1.0130767822265625, 0.5553855895996094, 1.9417648315429688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000028.npy"}
{"epoch": 0.08235294117647059, "step": 29, "batch_size": 128, "mean": 1.5245532989501953, "std": 1.9853209257125854, "min": -2.9737548828125, "p10": -0.5444110870361327, "median": 1.1709976196289062, "p90": 3.899256896972656, "max": 9.095260620117188, "pos_frac": 0.8203125, "sample": [2.918304443359375, 0.8961620330810547, 0.7078361511230469, 0.2747039794921875, 4.479766845703125, -0.84454345703125, 0.5517425537109375, 0.0036449432373046875, 0.2003192901611328, 0.34564208984375, -0.18017578125, 4.240028381347656, 3.019256591796875, 2.5965499877929688, 2.8032073974609375, 2.4395294189453125, 1.0708694458007812, 0.1401824951171875, 0.978607177734375, 1.2138519287109375, -0.6373825073242188, 0.7903060913085938, -0.4970855712890625, 6.1817779541015625, 1.7758636474609375, -0.28125953674316406, -0.5045661926269531, 1.3012237548828125, 1.5938377380371094, 1.4312477111816406, 0.968475341796875, 1.0754165649414062, 3.558837890625, 1.49871826171875, 1.169281005859375, 1.0898170471191406, 3.9391937255859375, 1.9502105712890625, 1.265899658203125, 1.9934234619140625, 0.5902271270751953, 0.7406730651855469, 7.0470123291015625, -1.5546035766601562, 1.389669418334961, -0.40906524658203125, 0.0474090576171875, 1.6544132232666016, 2.86578369140625, 1.1178817749023438, 3.7275733947753906, 1.9826545715332031, 4.827880859375, -0.053264617919921875, 1.3535308837890625, 1.8645515441894531, 0.7438831329345703, 0.9016838073730469, 1.9819412231445312, -1.4489212036132812, 1.7326736450195312, 3.6318817138671875, 0.9609565734863281, 0.7823448181152344, 2.0569915771484375, -0.415496826171875, 0.26663780212402344, -0.35382080078125, 3.629302978515625, -0.7163715362548828, 1.581451416015625, 1.1232986450195312, 2.790203094482422, 1.1525001525878906, 1.5438575744628906, -0.7610397338867188, 0.9876861572265625, 0.7543449401855469, 5.357688903808594, -0.6632843017578125, 1.3358821868896484, 0.2327709197998047, 3.3791847229003906, 0.05419731140136719, 2.718963623046875, 0.4375457763671875, 3.2345046997070312, 0.2714996337890625, -2.9737548828125, 6.46539306640625, 0.31701087951660156, 1.1727142333984375, 0.09980010986328125, 2.375774383544922, 2.161396026611328, -1.6379547119140625, 3.3485870361328125, 1.2010688781738281, 0.5274887084960938, 2.4441146850585938, 0.41687774658203125, 2.354106903076172, 1.23199462890625, 2.7727203369140625, 2.0295562744140625, 1.9895248413085938, 0.543487548828125, 0.7747707366943359, 4.259838104248047, 1.0142536163330078, 9.095260620117188, 1.6748809814453125, 0.16001510620117188, 0.8176345825195312, 7.1252899169921875, -1.8854522705078125, 3.7417678833007812, 2.6852264404296875, 1.9619197845458984, 3.2135467529296875, 6.70263671875, -2.0399169921875, -0.426788330078125, -1.4474334716796875, 4.4803466796875, -0.42812538146972656, -1.0548858642578125, 3.88214111328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000029.npy"}
{"epoch": 0.08529411764705883, "step": 30, "batch_size": 128, "mean": 1.6477129459381104, "std": 2.2027907371520996, "min": -5.009674072265625, "p10": -0.5323724746704102, "median": 1.2416725158691406, "p90": 4.882352066040038, "max": 8.561956405639648, "pos_frac": 0.828125, "sample": [3.3686904907226562, 2.8995819091796875, 3.3019256591796875, 7.43487548828125, 2.4037704467773438, 0.270843505859375, 4.2397918701171875, 5.9599151611328125, 0.6585121154785156, 2.8153610229492188, 2.1299285888671875, 6.8705902099609375, 0.12076568603515625, 0.8737754821777344, -1.5470733642578125, -0.49143218994140625, 1.4634933471679688, 1.9246330261230469, 2.1020584106445312, 3.1735458374023438, 1.4733848571777344, 0.35980224609375, 5.290580749511719, 0.15831375122070312, 3.478302001953125, -0.2055225372314453, 0.4607963562011719, 0.48382568359375, 4.040863037109375, 0.2640228271484375, 2.497896194458008, 2.5935211181640625, -1.1781120300292969, 4.772064208984375, 1.1510848999023438, -0.5418319702148438, 0.41049957275390625, 0.4170265197753906, 4.695411682128906, 0.11917877197265625, 0.05303955078125, 5.736701965332031, 0.8014373779296875, 0.2868766784667969, 3.2113914489746094, -2.406890869140625, 1.3940200805664062, -0.57159423828125, -1.88421630859375, -2.868072509765625, 0.32826805114746094, -1.2290420532226562, 1.028341293334961, 1.7353782653808594, 1.2131423950195312, 0.5313949584960938, 2.2243194580078125, 1.1961135864257812, 1.3166122436523438, -0.521331787109375, 5.3217620849609375, 3.7024383544921875, 1.4141044616699219, 8.46771240234375, 2.1558494567871094, -0.19146728515625, 0.1762676239013672, 2.4380035400390625, 1.2108345031738281, 0.04902076721191406, 8.561956405639648, 5.6587677001953125, 1.147918701171875, 1.9921798706054688, -0.9988861083984375, 3.1549415588378906, 1.9972381591796875, 2.9891281127929688, 5.5955657958984375, 1.702880859375, 0.9846954345703125, 5.807197570800781, 1.5284576416015625, 0.015140533447265625, 1.3743209838867188, 5.6338958740234375, 0.930999755859375, -0.4402313232421875, 4.6494903564453125, 1.685302734375, 3.591388702392578, -0.6112613677978516, -0.8552474975585938, 0.06142425537109375, 0.8164310455322266, 1.5450592041015625, 1.947540283203125, -5.009674072265625, 0.4166221618652344, 0.33492279052734375, -0.12326431274414062, 0.7757797241210938, 5.139690399169922, 1.27020263671875, 0.682586669921875, 1.2879562377929688, -0.31986236572265625, 0.8430252075195312, 1.6809616088867188, -0.5283184051513672, 0.8505706787109375, 1.7156219482421875, 1.0431442260742188, 0.7005996704101562, 2.8149261474609375, 3.1972122192382812, 2.4227218627929688, -0.1297454833984375, 1.0241775512695312, 0.7695465087890625, 2.7881622314453125, 2.4000015258789062, -0.9564056396484375, 3.6716995239257812, 1.9545211791992188, 0.71820068359375, 1.5419292449951172, 0.4283714294433594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000030.npy"}
{"epoch": 0.08823529411764706, "step": 31, "batch_size": 128, "mean": 1.5083717107772827, "std": 2.5508828163146973, "min": -5.593048095703125, "p10": -0.8550369262695312, "median": 1.0608510971069336, "p90": 4.052104187011718, "max": 13.7911376953125, "pos_frac": 0.78125, "sample": [0.3656005859375, 0.4120197296142578, -1.6944198608398438, 1.248626708984375, 1.6483306884765625, 0.15861129760742188, -0.17478561401367188, 0.7448234558105469, -0.3235626220703125, 0.06645965576171875, 3.8687591552734375, 3.6486129760742188, 1.0413589477539062, 4.130546569824219, 6.4520263671875, 0.9052581787109375, 7.612434387207031, -0.248687744140625, 2.61944580078125, 3.004486083984375, 5.4365234375, 0.23287200927734375, -1.03863525390625, 3.989501953125, 13.7911376953125, 1.8282394409179688, 2.790302276611328, -0.08264923095703125, -0.88482666015625, 3.0868492126464844, 1.004547119140625, -0.5136871337890625, -0.8422698974609375, 2.279693603515625, 0.085723876953125, 11.203903198242188, 4.300651550292969, 2.8500213623046875, 1.0593795776367188, 1.1866569519042969, 2.3703689575195312, 2.846527099609375, 1.5937538146972656, 0.5348014831542969, 1.536529541015625, 2.2057876586914062, -0.09116363525390625, 3.97998046875, 0.053165435791015625, -0.3653144836425781, 1.975799560546875, 0.6501522064208984, 0.4785480499267578, 1.756561279296875, -0.1637420654296875, 0.6952247619628906, 0.23254013061523438, 1.8932266235351562, 1.35626220703125, 0.8905792236328125, 2.1135787963867188, -2.642719268798828, -1.447998046875, 0.15130996704101562, 0.7189826965332031, 0.6118392944335938, -0.3115234375, 1.4027175903320312, 0.8663883209228516, 1.6620979309082031, 1.6761665344238281, 0.4462127685546875, 3.00982666015625, 1.140655517578125, 0.166015625, 1.9663772583007812, -0.06270599365234375, 2.0985641479492188, 1.2384414672851562, 1.0623226165771484, 1.000091552734375, 0.61199951171875, -1.061431884765625, 1.631631851196289, -1.7974853515625, 4.172088623046875, -5.593048095703125, -3.034027099609375, 3.4647750854492188, 2.5029144287109375, 2.426576614379883, 0.07620429992675781, -1.101654052734375, 0.6186351776123047, 4.295082092285156, -0.6645030975341797, 0.7264633178710938, -1.64892578125, 3.3966064453125, 2.0065155029296875, 0.8111228942871094, -0.7362022399902344, 2.2833251953125, 0.46206092834472656, -1.8617706298828125, 4.018486022949219, 0.4578285217285156, 2.787761688232422, 0.5032577514648438, 5.109291076660156, 4.73681640625, 3.730621337890625, -1.5871734619140625, 0.5234375, 0.23412322998046875, 1.108285903930664, 3.43487548828125, 0.11437416076660156, 2.9056625366210938, 1.8356666564941406, -0.22335052490234375, 2.4982681274414062, 12.004608154296875, 4.7955474853515625, 1.3965892791748047, 1.3232002258300781, 1.5738067626953125, -0.74346923828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000031.npy"}
{"epoch": 0.09117647058823529, "step": 32, "batch_size": 128, "mean": 2.1006791591644287, "std": 3.2457480430603027, "min": -5.612831115722656, "p10": -0.6414485931396482, "median": 1.4411239624023438, "p90": 5.879647064208984, "max": 23.187255859375, "pos_frac": 0.7734375, "sample": [4.176513671875, 2.0726089477539062, 1.1791648864746094, 2.2859344482421875, -2.111968994140625, 0.8270187377929688, 0.13327789306640625, -0.25331878662109375, 5.330535888671875, -0.5716590881347656, 4.5535888671875, 6.6420135498046875, 4.312049865722656, 6.002998352050781, 1.4488983154296875, 0.12767410278320312, 5.0342254638671875, 2.07366943359375, 6.2222900390625, 3.6939010620117188, 3.9249725341796875, 4.3045196533203125, 2.4128494262695312, 4.802238464355469, 1.0960235595703125, -0.5039825439453125, -0.804290771484375, 2.4948577880859375, 8.258033752441406, -1.193572998046875, 9.047407150268555, 0.11186981201171875, 0.221710205078125, 1.7790031433105469, 1.4878082275390625, 4.086097717285156, 3.4905624389648438, 1.2692489624023438, -0.46340370178222656, 1.0003242492675781, -1.322906494140625, 2.9393997192382812, -0.488922119140625, -0.06741905212402344, 1.6321563720703125, 1.243021011352539, 2.130828857421875, 2.2891464233398438, 0.1996612548828125, 0.60784912109375, -0.25335693359375, 1.433349609375, -5.612831115722656, 2.3468456268310547, 1.321685791015625, 0.491912841796875, 2.3329219818115234, 9.123748779296875, -0.5289230346679688, -0.14698219299316406, 3.253448486328125, 3.169708251953125, 4.5538330078125, 5.396751403808594, -0.2957916259765625, 3.1966629028320312, 6.3844451904296875, 1.5375823974609375, 0.005237579345703125, 4.4178314208984375, 0.12681007385253906, 4.598808288574219, 1.3455657958984375, -0.8955535888671875, 2.111297607421875, -0.331451416015625, 2.9699440002441406, -2.29168701171875, 6.0723724365234375, 1.1147994995117188, 23.187255859375, 1.670175552368164, 0.572601318359375, 0.7108078002929688, 0.2055206298828125, 4.485260009765625, 3.1416778564453125, -1.3483734130859375, 7.43927001953125, -1.0152816772460938, 3.80810546875, 5.8267822265625, 4.8492584228515625, 0.5967826843261719, 0.587371826171875, -0.07699775695800781, -1.47552490234375, 1.0821151733398438, -0.08782958984375, 2.8974342346191406, 0.7932662963867188, 0.46092987060546875, 1.4124374389648438, 7.156494140625, -1.8026504516601562, -0.12625885009765625, 1.221435546875, 0.4431037902832031, -2.951904296875, 1.6669082641601562, 2.4041099548339844, -0.16005325317382812, 0.07447242736816406, 7.2193603515625, 2.71405029296875, 1.6556549072265625, -0.4602813720703125, -4.4624786376953125, 7.977958679199219, 1.1619796752929688, 0.5775432586669922, 0.3887901306152344, 4.806060791015625, 5.4608612060546875, 2.4090309143066406, 1.5722503662109375, 1.4100112915039062, 2.6939315795898438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000032.npy"}
{"epoch": 0.09411764705882353, "step": 33, "batch_size": 128, "mean": 2.0138237476348877, "std": 2.361499547958374, "min": -3.797271728515625, "p10": -0.390823745727539, "median": 1.5579519271850586, "p90": 4.726065826416015, "max": 9.689422607421875, "pos_frac": 0.828125, "sample": [5.993133544921875, 8.557731628417969, 2.7865142822265625, 4.610893249511719, 1.433380126953125, 1.6502532958984375, 0.5532455444335938, 1.5380172729492188, 4.658119201660156, 1.0894031524658203, -2.35443115234375, 0.3135223388671875, 1.5344867706298828, 4.3749847412109375, 3.599334716796875, -2.7471084594726562, -0.8659286499023438, 0.855987548828125, 4.8428802490234375, 1.0802001953125, 2.7205123901367188, 1.9914474487304688, 2.1473007202148438, 0.8106307983398438, -1.8438873291015625, 3.4409942626953125, 2.673492431640625, -0.321197509765625, 3.71478271484375, 1.1094512939453125, 6.2708740234375, 1.0838470458984375, 2.3270797729492188, 5.966361999511719, 7.1445465087890625, 0.3427581787109375, -0.14340591430664062, 7.0689697265625, 2.578460693359375, 0.03855133056640625, 3.8996429443359375, 2.6097564697265625, 6.139312744140625, 1.5133514404296875, 4.676002502441406, 0.9984016418457031, 2.606884002685547, 1.167724609375, 0.1297454833984375, -3.797271728515625, 2.7224273681640625, -0.23256874084472656, 1.816986083984375, 3.24322509765625, 3.46929931640625, 1.5262298583984375, 1.2137222290039062, 0.8287353515625, 7.2501220703125, 3.8491134643554688, 1.319122314453125, 3.1300430297851562, 1.1413593292236328, 1.7704925537109375, 2.08343505859375, 4.394439697265625, 4.339466094970703, 2.295501708984375, 2.0459213256835938, 2.5206680297851562, -0.1626911163330078, 0.07171630859375, 0.6961898803710938, -0.27933311462402344, 1.1328277587890625, 6.0613250732421875, 0.9067153930664062, -0.5953445434570312, 0.2573432922363281, 2.686901092529297, 6.458385467529297, 2.9883270263671875, 1.5778865814208984, 5.6536865234375, 1.8122596740722656, -0.0705413818359375, 0.4508094787597656, 4.5261383056640625, -3.615203857421875, 1.9546966552734375, 0.5670089721679688, 0.5986709594726562, 2.82330322265625, 3.8340988159179688, 0.3879966735839844, 4.1255035400390625, 1.1227989196777344, 1.612335205078125, -0.11114501953125, 0.6947479248046875, 0.5750637054443359, 1.5327644348144531, 0.8313369750976562, 1.1193923950195312, 1.4288215637207031, -0.7134475708007812, 0.5069313049316406, 3.6608963012695312, 0.22841644287109375, -2.561981201171875, 2.2977066040039062, 3.0528488159179688, 4.0600128173828125, -0.276275634765625, -0.667999267578125, 2.9937973022460938, 1.1665401458740234, -0.5722198486328125, 2.3321075439453125, 1.3468475341796875, -1.4146728515625, 3.9215736389160156, 4.627704620361328, -0.4554634094238281, 3.379302978515625, -0.36312103271484375, 9.689422607421875, 4.578235626220703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000033.npy"}
{"epoch": 0.09705882352941177, "step": 34, "batch_size": 128, "mean": 2.397696018218994, "std": 3.2145180702209473, "min": -4.17938232421875, "p10": -0.5628776550292969, "median": 1.661717414855957, "p90": 5.772261047363282, "max": 16.404014587402344, "pos_frac": 0.8515625, "sample": [5.7628173828125, 0.5140647888183594, 7.257102966308594, 3.2449798583984375, 0.02629852294921875, -0.5509872436523438, 6.1238861083984375, 1.6552715301513672, 2.9793853759765625, 2.8225021362304688, 0.07381439208984375, -4.17938232421875, 3.1554107666015625, 3.2606964111328125, 0.37230682373046875, -2.1177520751953125, 1.1840248107910156, 0.7537155151367188, 1.3958358764648438, 4.732078552246094, 2.9957809448242188, 0.761383056640625, 4.434844970703125, -2.1084136962890625, 3.242889404296875, 3.1093368530273438, 3.18072509765625, 2.9489574432373047, 1.3508224487304688, 7.684478759765625, 2.3402557373046875, 0.027780532836914062, -2.2010421752929688, 0.8437938690185547, 2.1499500274658203, 1.3337821960449219, 1.6048355102539062, 2.7496490478515625, 1.5559234619140625, -0.9126758575439453, 0.8373031616210938, -0.81005859375, 3.4664306640625, 1.7302360534667969, -0.48345947265625, -0.7091140747070312, 1.352874755859375, 1.9993667602539062, 0.7016067504882812, 5.724449157714844, 0.9403171539306641, 4.381723403930664, 3.6818618774414062, 6.0159912109375, 1.3598098754882812, 3.7629165649414062, 12.091506958007812, 0.502166748046875, 1.0297927856445312, 16.404014587402344, 13.082740783691406, 1.3871212005615234, 0.3123626708984375, 0.19597625732421875, 3.03839111328125, 1.5300750732421875, 4.620697021484375, 3.580596923828125, -0.11524772644042969, 1.011322021484375, 2.4615859985351562, 7.833858489990234, 2.8402938842773438, 0.8094596862792969, 3.5258865356445312, 2.4969100952148438, 1.4496231079101562, 1.9796123504638672, 2.6026878356933594, 4.2039642333984375, 1.8073959350585938, 0.6344757080078125, -0.439666748046875, -0.9615459442138672, 0.17734336853027344, 0.023746490478515625, 2.6698684692382812, 1.0106735229492188, 0.5183258056640625, 1.7169513702392578, 1.2675228118896484, 15.866287231445312, 3.6335067749023438, 3.71405029296875, 1.42218017578125, -1.1939010620117188, -0.9440460205078125, 0.08959579467773438, 5.631885528564453, 10.619659423828125, 2.1599197387695312, 3.2830047607421875, 3.4362411499023438, -0.5906219482421875, 1.6681632995605469, 5.349235534667969, 5.7942962646484375, 6.186859130859375, 2.4587478637695312, 9.521217346191406, 3.0247344970703125, 1.0444068908691406, 4.512386322021484, 1.606475830078125, 0.10916328430175781, -2.40887451171875, 1.9986572265625, 0.04717254638671875, 1.3203449249267578, 5.245353698730469, 0.05804443359375, 1.7447147369384766, 4.208717346191406, 0.5761833190917969, 0.21474456787109375, -0.11991500854492188, -0.844268798828125, -0.351104736328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000034.npy"}
{"epoch": 0.1, "step": 35, "batch_size": 128, "mean": 2.657531261444092, "std": 3.3806161880493164, "min": -6.68450927734375, "p10": -1.0621028900146483, "median": 2.319368362426758, "p90": 6.97841796875, "max": 13.1363525390625, "pos_frac": 0.828125, "sample": [2.8438720703125, 1.0092086791992188, 8.076324462890625, 1.5381393432617188, 5.661048889160156, 4.698432922363281, -5.587249755859375, -0.08502197265625, 6.4320526123046875, 3.5200729370117188, 6.9641265869140625, 5.966094970703125, 4.22821044921875, -6.518150329589844, 1.4889488220214844, 5.785346984863281, 1.2650222778320312, 11.687973022460938, 3.2592926025390625, 5.299430847167969, 3.1698455810546875, -6.68450927734375, 2.366466522216797, 0.8081531524658203, 4.1356201171875, -0.06633949279785156, 2.94091796875, -2.5223617553710938, -1.37701416015625, 1.5774154663085938, 6.238471984863281, 0.43035888671875, 0.21536636352539062, -0.10951995849609375, 2.259490966796875, 2.2970008850097656, 0.276763916015625, 1.8362808227539062, 2.4240798950195312, 8.88558578491211, 4.378475189208984, 8.510971069335938, 0.39098358154296875, 4.105167388916016, 4.7250823974609375, 2.9112091064453125, -1.3938407897949219, 0.5871620178222656, 0.5862503051757812, 1.90460205078125, 0.692352294921875, -2.822277069091797, 0.2760963439941406, 4.2820892333984375, 0.831390380859375, 6.1187896728515625, 4.5240478515625, 10.628265380859375, 0.766510009765625, 5.118438720703125, 0.2023792266845703, 1.1475944519042969, -1.0071563720703125, 0.968658447265625, 2.0522632598876953, 13.1363525390625, -1.1903114318847656, 4.74249267578125, 2.390716552734375, 5.715663909912109, -0.392333984375, -0.739013671875, 9.27880859375, 1.740264892578125, 2.6168060302734375, 9.809661865234375, 6.475196838378906, 3.9093894958496094, 0.196929931640625, 2.34173583984375, 1.7863426208496094, -0.18086624145507812, 2.028156280517578, -1.6467704772949219, 2.0672569274902344, 2.4214859008789062, 4.277595520019531, 0.35631561279296875, -3.5748291015625, 3.314495086669922, 2.0352783203125, 0.09232330322265625, 0.6753158569335938, 6.5188140869140625, 3.306377410888672, 3.554943084716797, 5.953681945800781, 0.04917335510253906, -2.168060302734375, 2.71649169921875, 0.11577224731445312, -1.3942718505859375, 7.51361083984375, -0.15503311157226562, -0.3898353576660156, 7.853725433349609, 1.7823562622070312, 4.287883758544922, 2.2082557678222656, 0.6565361022949219, -1.3570938110351562, 3.8774566650390625, 0.8618278503417969, 0.32544898986816406, 2.979156494140625, 1.61920166015625, 2.865571975708008, 6.4532623291015625, 8.432640075683594, 5.588569641113281, 5.4010772705078125, 3.1836929321289062, 7.0117645263671875, 2.2744693756103516, 2.68804931640625, 3.3023605346679688, 7.029380798339844, 4.411521911621094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000035.npy"}
{"epoch": 0.10294117647058823, "step": 36, "batch_size": 128, "mean": 4.022802352905273, "std": 4.735154628753662, "min": -12.001590728759766, "p10": -0.694226837158203, "median": 3.2248010635375977, "p90": 9.818479919433592, "max": 19.80706787109375, "pos_frac": 0.8515625, "sample": [-1.8274917602539062, 12.016921997070312, 2.5158157348632812, 0.6255874633789062, 0.214630126953125, 0.15407943725585938, 3.8125343322753906, 6.626224517822266, 9.6593017578125, 8.895523071289062, -0.4947662353515625, 3.2048587799072266, -0.4228973388671875, 11.40240478515625, 3.6306991577148438, 2.0654144287109375, 7.464141845703125, -0.07470512390136719, -0.8512096405029297, 9.211570739746094, 17.91485595703125, 0.099090576171875, 7.029487609863281, 5.0937957763671875, 19.80706787109375, 11.66168212890625, 1.0906753540039062, 10.257179260253906, -0.25485801696777344, -0.8111934661865234, 1.8544425964355469, 1.6122016906738281, 2.29095458984375, 2.425443649291992, 6.762725830078125, 2.3851356506347656, 7.131752014160156, 0.1547527313232422, 0.8718109130859375, 3.1396636962890625, 5.007545471191406, 1.666656494140625, 2.4406280517578125, 1.8556861877441406, 3.7114810943603516, -1.0384292602539062, 2.2943878173828125, 5.8866424560546875, 0.41710662841796875, 8.534400939941406, 5.43682861328125, 3.538116455078125, 1.9117584228515625, 5.229820251464844, 0.8247528076171875, 10.189895629882812, 0.559356689453125, 4.693077087402344, 6.637214660644531, 1.5430450439453125, 11.7303466796875, -2.9703598022460938, -0.6440982818603516, 2.686676025390625, 7.832792282104492, 5.09033203125, 2.7081680297851562, -2.6458206176757812, 9.24273681640625, 0.14755630493164062, 3.2447433471679688, 7.095420837402344, 2.069000244140625, 15.749439239501953, 7.171882629394531, 12.598358154296875, 6.088127136230469, 1.1741218566894531, 5.931877136230469, 3.1719131469726562, 3.9567718505859375, -4.172943115234375, 7.6969451904296875, 10.628349304199219, 9.54254150390625, 2.5899105072021484, 3.6558074951171875, 4.5237579345703125, 0.05911064147949219, 7.876487731933594, -2.75103759765625, 1.6370391845703125, 4.101104736328125, 8.698554992675781, 2.4254913330078125, 5.8701171875, -3.1056976318359375, 6.3394927978515625, 8.153511047363281, 0.011075973510742188, 4.995555877685547, 0.700439453125, -8.403617858886719, 4.7244110107421875, 6.824272155761719, 4.5606689453125, 4.145811080932617, 6.28045654296875, 5.9539947509765625, 3.7265548706054688, -3.1871795654296875, -2.6437911987304688, 2.2200584411621094, 8.757171630859375, 11.652481079101562, -12.001590728759766, 3.200803756713867, 14.8876953125, 4.2816009521484375, 2.308246612548828, 1.8685646057128906, 8.282094955444336, 7.461944580078125, 2.7030582427978516, 2.134387969970703, 2.7934646606445312, -0.08258819580078125, 1.8788604736328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000036.npy"}
{"epoch": 0.10588235294117647, "step": 37, "batch_size": 128, "mean": 4.069901466369629, "std": 5.969326019287109, "min": -13.13848876953125, "p10": -1.0675910949707028, "median": 2.7696456909179688, "p90": 11.209390258789062, "max": 32.189483642578125, "pos_frac": 0.8046875, "sample": [11.249465942382812, 1.1541061401367188, 2.413238525390625, 10.334152221679688, 0.32198524475097656, 9.300064086914062, 0.7791709899902344, -1.9274520874023438, 10.591476440429688, 1.3150787353515625, 2.2790966033935547, 4.502449035644531, 1.6535148620605469, 3.5803794860839844, 1.8095474243164062, 0.9260616302490234, 10.383163452148438, -0.6283111572265625, -0.9871139526367188, 3.693584442138672, 1.850128173828125, -2.8716506958007812, 1.5780582427978516, 0.9115753173828125, 4.241203308105469, 15.703033447265625, 4.568550109863281, 2.9201202392578125, 13.798980712890625, 1.4189414978027344, 4.792816162109375, -0.6536102294921875, 2.9446029663085938, -1.4346466064453125, 12.133712768554688, 7.449241638183594, 12.026603698730469, -0.233917236328125, 9.291824340820312, -8.005264282226562, -0.8896923065185547, 10.948848724365234, -1.8834915161132812, 4.051994323730469, 11.51385498046875, 0.564117431640625, 6.364616394042969, 0.9964656829833984, -1.4108428955078125, -0.28328704833984375, -2.9012451171875, 0.06708145141601562, 6.47998046875, 0.35440826416015625, 1.7649002075195312, 1.2850818634033203, 5.28485107421875, 14.220794677734375, 2.446502685546875, 26.355850219726562, 1.4237537384033203, 9.980728149414062, 1.2967071533203125, -13.13848876953125, 13.583114624023438, 2.731414794921875, 1.4632911682128906, -0.8460445404052734, 4.3151397705078125, 2.545969009399414, 3.7445125579833984, 11.192214965820312, 5.995704650878906, 10.191207885742188, 3.3876571655273438, 0.28969573974609375, 0.6431427001953125, 10.189849853515625, 5.53436279296875, 6.741291046142578, -0.081146240234375, 5.7787017822265625, -5.394828796386719, 5.001140594482422, 2.8395462036132812, 1.009979248046875, 2.8178138732910156, 1.0692062377929688, 1.6565322875976562, 0.46793365478515625, 3.727100372314453, 17.313751220703125, 4.491767883300781, 6.8695220947265625, 22.341552734375, 5.148983001708984, -0.9496536254882812, 3.0318374633789062, 4.071929931640625, 0.9651393890380859, 11.524600982666016, 0.306732177734375, 7.183815002441406, 32.189483642578125, 7.392768859863281, 3.6847457885742188, -0.7765483856201172, 1.0587005615234375, 2.464632034301758, -1.9911270141601562, -1.25537109375, 0.9665679931640625, 0.6029205322265625, 2.8078765869140625, 10.82476806640625, 8.909011840820312, 1.7448806762695312, 7.867462158203125, 4.285499572753906, -0.5364952087402344, 2.609527587890625, 3.0714111328125, 4.4445648193359375, 8.802322387695312, -2.3565139770507812, -1.5512924194335938, -0.200408935546875, 2.9261093139648438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000037.npy"}
{"epoch": 0.10882352941176471, "step": 38, "batch_size": 128, "mean": 5.168169975280762, "std": 9.001235961914062, "min": -10.682228088378906, "p10": -2.187591552734375, "median": 3.6268482208251953, "p90": 12.376627349853514, "max": 60.73602294921875, "pos_frac": 0.828125, "sample": [-0.8091831207275391, 6.2356109619140625, 3.605121612548828, 10.171043395996094, 12.163215637207031, -6.962181091308594, -4.3341522216796875, 3.514446258544922, 10.243499755859375, 60.73602294921875, 0.9998569488525391, 0.04984474182128906, 7.1923370361328125, 6.917724609375, 4.981775283813477, 6.648826599121094, 0.3296356201171875, 59.21018981933594, 0.5175704956054688, 1.8018341064453125, 13.290878295898438, -3.3644638061523438, 5.776313781738281, -4.370710372924805, 7.6916656494140625, 15.844284057617188, 2.7670440673828125, -2.2789154052734375, 1.0583438873291016, 1.3838348388671875, 4.122081756591797, 1.0730056762695312, 17.858360290527344, 17.1744384765625, 9.605979919433594, 0.6165218353271484, -5.2694091796875, -1.9930686950683594, 23.84613800048828, 3.829303741455078, 10.147598266601562, 9.619483947753906, 12.874588012695312, 1.480783462524414, 4.308111190795898, 20.41857147216797, 9.043594360351562, 1.52728271484375, 0.22159957885742188, 1.6798171997070312, 0.24391937255859375, 3.6485748291015625, -2.773101806640625, 5.5390625, 3.198883056640625, 5.268577575683594, 7.531341552734375, 6.860809326171875, -1.525299072265625, 0.6067352294921875, 11.337776184082031, 16.184722900390625, 14.684890747070312, 2.4906463623046875, -0.8231658935546875, 0.2907867431640625, 1.2199535369873047, 8.552780151367188, 0.9186859130859375, 9.940032958984375, 3.380584716796875, 10.092514038085938, 5.755138397216797, 16.457679748535156, 11.045204162597656, 3.163177490234375, 2.7516021728515625, 0.8805522918701172, 0.18944931030273438, 7.54046630859375, 8.886600494384766, 4.808210372924805, 7.970489501953125, 1.2252769470214844, 2.1611099243164062, 4.8618621826171875, -1.7132110595703125, -2.5563278198242188, -0.8469314575195312, 1.0974197387695312, 3.9689254760742188, -3.1978759765625, 1.2107162475585938, 8.191413879394531, 9.919044494628906, -0.9871673583984375, 6.482151031494141, 1.5784416198730469, 2.598541259765625, 4.293109893798828, -2.981657028198242, 8.505416870117188, 2.0388107299804688, 0.9899387359619141, -10.682228088378906, -2.1484527587890625, 0.92303466796875, 0.1292724609375, 3.8865203857421875, -1.84429931640625, 7.208942413330078, 4.2386627197265625, 0.7043418884277344, 0.03590583801269531, -2.7837142944335938, 1.8534622192382812, 4.8325347900390625, 13.6566162109375, 8.733306884765625, 8.961223602294922, 0.435546875, 8.318809509277344, 10.985095977783203, 4.77349853515625, 4.22686767578125, 7.459022521972656, 8.148635864257812, -6.8802490234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000038.npy"}
{"epoch": 0.11176470588235295, "step": 39, "batch_size": 128, "mean": 4.4183759689331055, "std": 8.538651466369629, "min": -23.179393768310547, "p10": -3.275436210632324, "median": 3.752608299255371, "p90": 13.614930725097654, "max": 35.439910888671875, "pos_frac": 0.75, "sample": [4.105491638183594, 8.027122497558594, 2.1342315673828125, 10.992805480957031, 6.917125701904297, -0.7133712768554688, 26.8157958984375, 4.594242095947266, -11.281143188476562, 12.912376403808594, 8.1546630859375, -22.150917053222656, 4.801874160766602, -5.252910614013672, 5.103166580200195, 0.7819366455078125, -0.1324176788330078, 22.238250732421875, -1.5993881225585938, 0.8070774078369141, 35.439910888671875, 18.874374389648438, 6.905914306640625, 4.902717590332031, -0.7104110717773438, 7.9266510009765625, -1.1139259338378906, 11.179550170898438, 2.920684814453125, 9.195693969726562, -3.130950927734375, -0.5091400146484375, 2.235292434692383, -13.204643249511719, 2.377666473388672, 11.655044555664062, 10.14352035522461, 9.106128692626953, 6.9769134521484375, 3.8919296264648438, -0.04119110107421875, 19.080177307128906, -0.4392814636230469, 14.324974060058594, 1.9230308532714844, 4.037792205810547, 2.3805809020996094, 10.295631408691406, -1.1188201904296875, 1.9313201904296875, -5.77374267578125, 1.526620864868164, 1.7353363037109375, 10.167205810546875, 6.6258087158203125, 12.387752532958984, 2.649749755859375, -7.977180480957031, 16.769927978515625, 7.6000518798828125, 7.197231292724609, 14.779106140136719, 4.762050628662109, -4.6173095703125, 9.281982421875, -1.1034393310546875, 3.996124267578125, 2.0640392303466797, -1.4750137329101562, 24.7235107421875, -0.1177215576171875, 27.723358154296875, 0.26416015625, 0.8024482727050781, 6.020347595214844, 0.254791259765625, 0.2733802795410156, 5.474937438964844, 6.5232391357421875, 4.851951599121094, 6.035192489624023, 3.8683090209960938, -0.43892669677734375, -4.829826354980469, 15.397003173828125, 11.480152130126953, -13.922760009765625, 0.5144271850585938, 9.907079696655273, -0.4492912292480469, 3.1784229278564453, -0.11857032775878906, 7.886083602905273, 1.4562301635742188, 12.743034362792969, 0.8330535888671875, 9.598678588867188, 11.272148132324219, 1.128927230834961, -0.7062034606933594, -2.297027587890625, -3.4963130950927734, 8.0750732421875, 7.380035400390625, -3.180774688720703, -20.32501220703125, -23.179393768310547, 3.33148193359375, 6.7843017578125, 2.3163928985595703, 13.459075927734375, 3.6369075775146484, 12.743976593017578, 2.3824005126953125, 16.316482543945312, 6.449371337890625, 6.804901123046875, 0.11472702026367188, 1.889547348022461, 2.140127182006836, 13.978591918945312, 2.0129013061523438, -5.06719970703125, 9.353988647460938, 5.6610565185546875, 5.196044921875, 2.8636322021484375, 3.2897415161132812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000039.npy"}
{"epoch": 0.11470588235294117, "step": 40, "batch_size": 128, "mean": 5.10268497467041, "std": 8.003689765930176, "min": -7.811883926391602, "p10": -2.940072631835937, "median": 3.1631336212158203, "p90": 14.767324256896973, "max": 41.75604248046875, "pos_frac": 0.7421875, "sample": [1.7993545532226562, 10.4290771484375, 2.2075424194335938, 18.958602905273438, 4.044197082519531, 0.793548583984375, -1.2596874237060547, 0.43470001220703125, 11.654281616210938, 13.6378173828125, 2.77813720703125, 1.2438812255859375, 25.187057495117188, 3.9155426025390625, -1.1706924438476562, 1.030364990234375, 0.57147216796875, 8.630321502685547, -4.32098388671875, 3.1013221740722656, -1.9582557678222656, 4.669384002685547, 26.21478271484375, 10.09747314453125, 3.1020050048828125, 0.41207122802734375, -2.899625778198242, -0.40895652770996094, -3.3925399780273438, 18.485137939453125, -1.1492080688476562, 12.190292358398438, 21.56717300415039, 15.38616943359375, 6.372550964355469, -7.388336181640625, 5.385898590087891, 26.31878662109375, 6.800025939941406, 10.99465560913086, 1.0458297729492188, 0.19423675537109375, 1.9949264526367188, 14.760650634765625, 6.554595947265625, 10.607681274414062, 3.224262237548828, 1.603250503540039, 12.409236907958984, 5.990966796875, 9.244758605957031, 13.485284805297852, 13.675949096679688, -1.3352413177490234, 0.7998123168945312, 7.636497497558594, 2.515228271484375, 1.6016616821289062, -1.5889511108398438, -0.7114219665527344, -1.5021820068359375, 15.403030395507812, 2.513957977294922, 9.038337707519531, 6.160125732421875, 0.7821617126464844, 3.5580596923828125, 6.1566619873046875, -3.763216018676758, 0.04340171813964844, 0.06221771240234375, -0.1717987060546875, 41.75604248046875, 5.880775451660156, 35.37852478027344, 8.739734649658203, 7.606864929199219, 4.316734313964844, 13.059478759765625, 11.57177734375, 2.7681121826171875, -0.29882049560546875, 15.088150024414062, -1.1607666015625, -5.1436004638671875, 14.330818176269531, -3.341503143310547, 1.95111083984375, 4.343160629272461, 4.2076568603515625, 3.725109100341797, 0.8040008544921875, -0.339935302734375, -0.6205062866210938, 3.466604232788086, 5.103172302246094, 6.307464599609375, -1.3866500854492188, 0.8843441009521484, 4.569713592529297, -5.914070129394531, -4.460968017578125, 2.6236114501953125, 0.30646514892578125, 14.782896041870117, -2.226917266845703, -0.9603309631347656, 11.173355102539062, -7.811883926391602, -3.496185302734375, 1.2927627563476562, -5.069122314453125, 4.09571647644043, 5.5315704345703125, 1.299102783203125, -0.7517471313476562, 7.1679229736328125, 3.5449447631835938, 2.0728378295898438, 15.043869018554688, -3.107433319091797, -1.4101142883300781, 8.82632827758789, 9.539695739746094, 6.474700927734375, 5.3585662841796875, -3.0344486236572266, 12.229629516601562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000040.npy"}
{"epoch": 0.11764705882352941, "step": 41, "batch_size": 128, "mean": 5.425037384033203, "std": 8.108176231384277, "min": -8.918174743652344, "p10": -3.390685272216796, "median": 3.9859018325805664, "p90": 15.57981491088867, "max": 35.429107666015625, "pos_frac": 0.7890625, "sample": [7.96844482421875, 6.86474609375, 2.51715087890625, 13.42251968383789, 0.5614185333251953, 13.9891357421875, 3.6262893676757812, -7.5970611572265625, 15.209342956542969, 3.9561843872070312, 25.419464111328125, -3.127532958984375, 0.3790092468261719, 2.0097808837890625, 2.348052978515625, 4.803215026855469, 1.863922119140625, 4.516319274902344, 10.219329833984375, 19.02227783203125, 7.444633483886719, 3.7168960571289062, 1.0144882202148438, 22.3289794921875, 7.6468505859375, 0.191864013671875, 4.015619277954102, 0.19686126708984375, 5.518423080444336, -2.853656768798828, 13.199533462524414, 4.196971893310547, 2.2939834594726562, 5.205543518066406, 3.7125701904296875, 2.2205963134765625, -0.5056304931640625, 12.539993286132812, 22.699127197265625, 1.0633258819580078, 6.19219970703125, 2.9777889251708984, 6.69073486328125, 5.593952178955078, 3.3938751220703125, 2.6296234130859375, 2.879669189453125, 6.000579833984375, 10.897171020507812, 9.368515014648438, 6.689811706542969, -8.918174743652344, -1.0551528930664062, -0.342559814453125, -2.9063034057617188, 7.698923110961914, 26.0467529296875, -0.607818603515625, -7.947052001953125, 14.920402526855469, 5.813560485839844, 3.3102798461914062, -5.878715515136719, 8.936546325683594, -0.7667770385742188, 7.559722900390625, -1.0745086669921875, 5.398159027099609, 1.251230239868164, -2.179656982421875, -4.4135894775390625, 0.02336883544921875, 6.454109191894531, -7.204620361328125, -4.004707336425781, 8.212478637695312, 0.4264373779296875, 5.725120544433594, 16.340103149414062, -5.213321685791016, 5.1105804443359375, -0.16846656799316406, 8.37152099609375, 7.125728607177734, 8.913902282714844, 1.5376930236816406, -8.20189094543457, -1.7035598754882812, 6.91473388671875, -4.342071533203125, 0.9775505065917969, 0.4781951904296875, -6.28759765625, 8.705085754394531, 4.9592437744140625, 4.65643310546875, 7.970207214355469, 12.85791015625, 26.859664916992188, 3.881938934326172, -2.792156219482422, 3.0204086303710938, 4.407741546630859, 31.933883666992188, -8.230697631835938, -2.6178741455078125, 2.2775421142578125, 2.9203033447265625, 5.695930480957031, 8.388839721679688, 1.6084823608398438, 14.534378051757812, 3.026897430419922, 17.879600524902344, 11.837020874023438, -5.0063629150390625, 18.726303100585938, 2.3722457885742188, 3.946186065673828, 12.320152282714844, 2.7829513549804688, 15.3544921875, 16.105567932128906, 35.429107666015625, 6.131797790527344, 8.513008117675781, 1.958160400390625, 20.526931762695312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000041.npy"}
{"epoch": 0.12058823529411765, "step": 42, "batch_size": 128, "mean": 4.522137641906738, "std": 8.330163955688477, "min": -31.94463348388672, "p10": -2.61282386779785, "median": 4.033245086669922, "p90": 13.924740600585936, "max": 28.859817504882812, "pos_frac": 0.7734375, "sample": [8.975452423095703, 8.289085388183594, 6.231636047363281, 19.728042602539062, 7.6071929931640625, 3.6331214904785156, -1.304962158203125, 13.905487060546875, -3.5596466064453125, 4.668182373046875, 1.3331451416015625, 6.648719787597656, 7.8405303955078125, 4.000816345214844, 18.941604614257812, -0.768646240234375, 2.177875518798828, 1.3764724731445312, 5.708656311035156, 2.906707763671875, 5.475761413574219, -0.6557998657226562, 8.686134338378906, 3.94378662109375, -31.94463348388672, -7.484840393066406, 3.9437255859375, 13.644500732421875, -9.297645568847656, 6.490814208984375, 5.193485260009766, 9.766948699951172, 20.4686279296875, 10.179851531982422, 2.4317092895507812, 2.5879058837890625, 11.787063598632812, 1.054168701171875, 4.439502716064453, 28.859817504882812, -1.8795852661132812, -4.258148193359375, 19.968963623046875, -7.640357971191406, 8.672357559204102, 18.57794189453125, 6.35516357421875, 6.2848358154296875, 2.0881118774414062, 8.210769653320312, 15.445640563964844, 19.92388916015625, 4.341907501220703, 9.172142028808594, 0.7606277465820312, -9.230224609375, -1.949798583984375, 0.30677032470703125, 2.3807106018066406, 22.040374755859375, 17.153961181640625, 7.500801086425781, 8.748672485351562, 3.5054588317871094, 3.739471435546875, 3.774810791015625, -1.666412353515625, 2.655794143676758, 18.167205810546875, -1.589569091796875, 3.6053924560546875, 1.3561553955078125, 2.5403060913085938, 26.916259765625, 7.212684631347656, 3.6238861083984375, 3.601459503173828, -0.30463409423828125, 9.313438415527344, -0.48649024963378906, -6.087139129638672, 2.9164352416992188, 4.252544403076172, -1.482421875, 5.464111328125, 5.83197021484375, 2.7383956909179688, 2.616363525390625, 13.680503845214844, 6.544891357421875, -13.367446899414062, 6.645530700683594, 10.159675598144531, 6.1988372802734375, 10.6072998046875, 13.96966552734375, 5.830881118774414, -1.6969146728515625, 8.804996490478516, 12.7294921875, -0.42203521728515625, 4.725669860839844, 1.0654525756835938, -11.689125061035156, -0.27605247497558594, 2.255329132080078, 9.000633239746094, 4.065673828125, -18.25994873046875, 0.4442100524902344, 8.497642517089844, 6.4297637939453125, 1.8412857055664062, 4.4522552490234375, 2.041604995727539, -2.207042694091797, 7.59808349609375, 5.968128204345703, 1.4229679107666016, -4.665578842163086, 10.89077377319336, 1.2297554016113281, -0.9141387939453125, -18.23126983642578, 2.528379440307617, 11.98097038269043, 5.997673034667969, -0.146209716796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000042.npy"}
{"epoch": 0.12352941176470589, "step": 43, "batch_size": 128, "mean": 5.104216575622559, "std": 9.539302825927734, "min": -23.489837646484375, "p10": -3.4874317169189446, "median": 3.9308557510375977, "p90": 15.409455871582031, "max": 33.93780517578125, "pos_frac": 0.796875, "sample": [10.994384765625, 3.6105194091796875, 10.2008056640625, 0.2542839050292969, 3.582977294921875, 6.489736557006836, 2.0758285522460938, -2.7989768981933594, -0.095001220703125, -2.5304946899414062, 5.3446044921875, 4.34393310546875, -8.243537902832031, 30.96795654296875, 5.6031646728515625, -5.845436096191406, 0.220458984375, 2.0013675689697266, -16.055419921875, 8.049402236938477, 24.40326690673828, 11.704055786132812, 8.794208526611328, 1.9548015594482422, 3.9024810791015625, 1.6664066314697266, 4.794319152832031, 17.416038513183594, -0.21746826171875, 2.6793212890625, 5.361885070800781, 3.959230422973633, 33.107093811035156, -1.4731388092041016, -1.8474578857421875, 0.89697265625, 15.657119750976562, 9.457962036132812, 2.2559680938720703, 8.245426177978516, 2.543914794921875, 7.401031494140625, 2.9549942016601562, 3.0294189453125, 3.3529891967773438, -8.854454040527344, 14.900886535644531, 8.766143798828125, -21.134262084960938, 9.15677261352539, -8.241256713867188, 10.545501708984375, 12.785537719726562, 13.067554473876953, 14.537952423095703, 6.317718505859375, 2.5062713623046875, 2.138641357421875, 11.645088195800781, 4.451271057128906, -7.3580322265625, 2.237457275390625, -0.4476814270019531, 17.49737548828125, -3.8657073974609375, 12.756446838378906, 14.445018768310547, 4.446590423583984, -5.485774993896484, 1.2529067993164062, 1.6000175476074219, 3.34783935546875, 17.403884887695312, 7.701667785644531, 4.9989471435546875, -23.489837646484375, 2.145538330078125, 1.6504936218261719, 9.76247787475586, 4.150104522705078, 2.1400299072265625, -1.2990264892578125, 9.513664245605469, 15.303314208984375, 2.0849075317382812, -12.059494018554688, 31.249465942382812, -0.11118316650390625, 17.127288818359375, 1.326080322265625, -21.837890625, 8.245502471923828, 11.283266067504883, 5.274494171142578, 10.960342407226562, 7.260158538818359, 26.28851318359375, 4.314491271972656, 0.4465751647949219, 5.16375732421875, 5.953044891357422, 3.5795822143554688, 4.885101318359375, 9.166828155517578, 2.1027374267578125, -3.3253135681152344, -0.1831817626953125, 4.386329650878906, -1.5894622802734375, 0.08553314208984375, 11.850025177001953, 2.7126426696777344, 33.93780517578125, 0.7668514251708984, 5.616302490234375, 4.581539154052734, 3.34283447265625, 24.455718994140625, 3.1043472290039062, 31.37957763671875, 2.719715118408203, 1.164957046508789, 4.9347076416015625, 1.7999019622802734, -0.8362598419189453, 6.744041442871094, -6.635711669921875, 6.454807281494141], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000043.npy"}
{"epoch": 0.1264705882352941, "step": 44, "batch_size": 128, "mean": 5.212080955505371, "std": 10.136468887329102, "min": -24.7210693359375, "p10": -4.725399017333984, "median": 3.8116912841796875, "p90": 17.10994415283203, "max": 42.69598388671875, "pos_frac": 0.734375, "sample": [3.546365737915039, 6.169517517089844, 1.296091079711914, 5.808620452880859, 7.577880859375, 0.585174560546875, 11.317516326904297, -0.1491546630859375, 14.91680908203125, -3.5124740600585938, 21.117645263671875, 19.022308349609375, -2.4954795837402344, 3.2740478515625, 1.2487335205078125, 13.72879409790039, 22.145339965820312, 5.301856994628906, 16.562583923339844, 6.772968292236328, 4.629127502441406, 6.754142761230469, 0.81103515625, 17.509429931640625, 4.773899078369141, 8.289337158203125, -2.807323455810547, -2.513904571533203, 3.7460498809814453, -5.136081695556641, 0.01026153564453125, 25.728256225585938, -0.20220184326171875, 1.9692535400390625, 5.604759216308594, 0.3129730224609375, -4.1606292724609375, 30.086509704589844, 15.600631713867188, 12.222877502441406, 4.101802825927734, 1.0464534759521484, 2.0451698303222656, 35.989776611328125, 0.7241363525390625, -0.8228797912597656, -8.549430847167969, 12.30523681640625, -0.2707099914550781, 9.507598876953125, 5.133159637451172, -4.629119873046875, -24.7210693359375, 0.3287086486816406, 7.3074951171875, -5.894390106201172, 11.649932861328125, 1.6073379516601562, -9.332855224609375, 6.5289764404296875, 3.8060302734375, 2.052560806274414, 9.249893188476562, -0.033172607421875, -6.410083770751953, 6.847023010253906, 22.189464569091797, 35.658203125, 4.2982177734375, 4.409820556640625, 1.2149906158447266, 14.652381896972656, -4.950050354003906, 15.62738037109375, -6.4221038818359375, -15.376739501953125, -0.29735374450683594, 10.051605224609375, 5.340297698974609, -14.520416259765625, 0.38439178466796875, 21.163856506347656, 2.0412559509277344, 40.13140869140625, 3.949859619140625, 21.036727905273438, 1.7662525177001953, 42.69598388671875, 4.838836669921875, 5.5487518310546875, 6.131065368652344, 9.250991821289062, 0.3259010314941406, 2.0319747924804688, -6.220649719238281, 1.8715133666992188, -3.9775390625, 2.314432144165039, 8.590194702148438, 1.4749565124511719, 7.737403869628906, 2.9660263061523438, -6.6443023681640625, 12.892425537109375, 9.262474060058594, -1.2224559783935547, 16.938735961914062, -2.9059295654296875, 7.721506118774414, 12.254219055175781, 10.791213989257812, 7.3368072509765625, 6.613990783691406, 4.3887786865234375, 0.6433677673339844, -0.9314918518066406, 3.817352294921875, 4.845161437988281, -2.9475460052490234, 4.30476188659668, 0.1429595947265625, -2.0732765197753906, -0.5395431518554688, -0.030391693115234375, 3.4485092163085938, -2.2895355224609375, -9.633224487304688, 6.001373291015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000044.npy"}
{"epoch": 0.12941176470588237, "step": 45, "batch_size": 128, "mean": 5.09078311920166, "std": 9.180158615112305, "min": -16.552734375, "p10": -2.6028364181518553, "median": 4.293193817138672, "p90": 15.890392303466795, "max": 52.9517822265625, "pos_frac": 0.7734375, "sample": [6.23016357421875, 2.287046432495117, 0.3361778259277344, 7.702144622802734, -2.6752891540527344, 11.010101318359375, 7.829986572265625, 8.037891387939453, -0.1781158447265625, 3.380279541015625, -2.2192764282226562, 2.676464080810547, 5.03754997253418, 2.221698760986328, 2.4896602630615234, 3.503896713256836, 3.9854278564453125, 16.145645141601562, 3.459747314453125, -0.6713714599609375, 4.432516098022461, 7.8089447021484375, 0.791778564453125, -4.632335662841797, 6.675872802734375, 5.910614013671875, 10.041561126708984, 8.197860717773438, -3.0521392822265625, 3.110179901123047, 10.419418334960938, -16.552734375, -4.208320617675781, 17.325607299804688, 5.087799072265625, 6.290254592895508, 3.8848037719726562, 5.609092712402344, 7.0407257080078125, -6.6839752197265625, 1.5770111083984375, 0.9406776428222656, -1.9513702392578125, 5.161611557006836, 22.340347290039062, 5.1441650390625, 1.6521987915039062, 31.6103515625, 10.500804901123047, 5.386253356933594, 8.647014617919922, 5.6240234375, 0.24688720703125, -0.7556114196777344, -1.310873031616211, -1.7054100036621094, -1.4345550537109375, 5.393241882324219, -1.6482315063476562, 2.4560680389404297, 6.3230743408203125, 5.674110412597656, 24.985763549804688, 4.844348907470703, 13.082855224609375, -8.798019409179688, 15.780998229980469, 20.8375244140625, 6.3999176025390625, 9.92087173461914, -0.7047042846679688, 0.316925048828125, 1.9758110046386719, 43.400726318359375, 5.7683258056640625, 1.4425506591796875, -0.8340187072753906, 4.348655700683594, 7.268402099609375, 0.7953948974609375, -2.55010986328125, 2.016582489013672, -7.689300537109375, -13.289260864257812, 2.5942916870117188, 2.8373336791992188, 6.0016326904296875, -3.5993118286132812, 1.3424072265625, 2.5667800903320312, 6.162174224853516, 2.018697738647461, 4.4523162841796875, 7.641204833984375, 20.4310302734375, 7.788490295410156, 20.61773681640625, -2.5316333770751953, 9.801998138427734, 4.5608062744140625, 1.5176773071289062, 9.380470275878906, 3.019775390625, 11.007698059082031, 52.9517822265625, 8.704950332641602, 6.450408935546875, 18.784942626953125, 0.7703933715820312, -1.7570381164550781, 14.131500244140625, 5.743736267089844, 5.368358612060547, 2.2985992431640625, -15.500717163085938, 0.6771602630615234, -2.62738037109375, 17.397537231445312, 17.347915649414062, -11.814704895019531, 1.2634754180908203, -2.592317581176758, 4.23773193359375, 2.484302520751953, -1.7292633056640625, 5.458547592163086, 11.42767333984375, 7.251705169677734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000045.npy"}
{"epoch": 0.1323529411764706, "step": 46, "batch_size": 128, "mean": 4.515048027038574, "std": 9.690905570983887, "min": -26.943984985351562, "p10": -5.995756530761717, "median": 2.8196792602539062, "p90": 16.12936477661133, "max": 34.97332763671875, "pos_frac": 0.6953125, "sample": [1.0153045654296875, 13.91522216796875, -8.250640869140625, 3.015106201171875, 18.046646118164062, -0.3734626770019531, -2.4236907958984375, 5.384227752685547, 2.8048858642578125, 0.18265533447265625, 1.9627532958984375, 22.870147705078125, 4.841762542724609, 15.834373474121094, 23.358001708984375, -3.4393234252929688, 16.743896484375, -9.153106689453125, 5.589576721191406, 4.256320953369141, -0.1446990966796875, -13.02374267578125, -0.8328018188476562, 0.10645866394042969, 6.775520324707031, 13.376869201660156, 9.072784423828125, -1.1650772094726562, -1.8141021728515625, 2.6162185668945312, 0.5854377746582031, 11.742603302001953, 10.193061828613281, -1.042144775390625, 14.44488525390625, 6.874622344970703, 11.314437866210938, -1.9021415710449219, -8.677993774414062, 34.97332763671875, 8.7216796875, -6.8115081787109375, 1.1687240600585938, 0.6545276641845703, 0.9172077178955078, -3.8207015991210938, 12.296478271484375, 5.014518737792969, 4.4657745361328125, 7.1993408203125, 14.752845764160156, -4.5420379638671875, -10.400840759277344, -26.943984985351562, -9.080314636230469, 20.84618377685547, -7.648628234863281, 20.281166076660156, 7.425235748291016, 1.1499214172363281, 10.5654296875, -4.202049255371094, 7.890625, 16.125205993652344, -4.618621826171875, 12.13836669921875, 8.87628173828125, 8.541511535644531, -0.8295154571533203, 0.7307662963867188, -5.646148681640625, -0.7056560516357422, 8.492965698242188, 22.693115234375, -14.706085205078125, -2.91497802734375, 15.200061798095703, -4.312446594238281, 9.037826538085938, -2.3429489135742188, -0.12165069580078125, 1.8692741394042969, 6.720516204833984, 1.3282527923583984, 10.081157684326172, 33.21699523925781, 1.1716136932373047, 16.139068603515625, 7.2942657470703125, 4.947736740112305, 7.182210922241211, -12.895919799804688, 12.50967788696289, 1.4442081451416016, 1.0847702026367188, 10.920974731445312, 10.304275512695312, 4.167520523071289, 9.12618637084961, -20.638404846191406, 16.078689575195312, 0.19274330139160156, 2.05267333984375, 18.235496520996094, 1.5537109375, 12.527755737304688, -3.7523040771484375, 15.384918212890625, 17.524742126464844, -5.325687408447266, 12.829689025878906, -0.30574798583984375, 8.988792419433594, -10.319297790527344, -4.893196105957031, 2.3665771484375, 3.2598342895507812, 1.9086551666259766, 1.0892333984375, 12.944221496582031, 16.30914878845215, 0.1834716796875, 15.759193420410156, 2.83447265625, -4.551025390625, 1.0626449584960938, 12.942413330078125, -0.12186431884765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000046.npy"}
{"epoch": 0.13529411764705881, "step": 47, "batch_size": 128, "mean": 4.831465721130371, "std": 8.655977249145508, "min": -26.637115478515625, "p10": -2.786515808105469, "median": 3.0757980346679688, "p90": 14.722028732299805, "max": 46.33436584472656, "pos_frac": 0.7421875, "sample": [2.400054931640625, 0.9485893249511719, 3.3222007751464844, 14.63693618774414, 8.13113021850586, 3.2779998779296875, -2.041473388671875, 3.0767974853515625, 8.087844848632812, -2.9727249145507812, -15.738899230957031, 14.920578002929688, -2.029773712158203, -0.056766510009765625, 3.6224288940429688, -1.8992691040039062, 2.030862808227539, 13.336910247802734, 13.150325775146484, 15.668960571289062, 5.460422515869141, 1.1846542358398438, -0.9583606719970703, 12.707244873046875, 10.220771789550781, -26.637115478515625, -1.789276123046875, 10.339187622070312, 6.526481628417969, 6.469902038574219, -0.13125991821289062, 1.152618408203125, 8.237472534179688, -2.4373855590820312, 27.535598754882812, 1.1286506652832031, 6.090263366699219, 13.689079284667969, 46.33436584472656, 2.054647445678711, 13.680099487304688, 1.9264984130859375, -4.297821044921875, 4.8383941650390625, -1.6084728240966797, 3.7794189453125, 1.4964466094970703, -1.0167007446289062, 2.6335220336914062, -10.0279541015625, 6.652923583984375, 2.4167537689208984, 1.6462326049804688, 0.6720924377441406, 4.779638290405273, 1.5807018280029297, 1.0135116577148438, 10.078605651855469, -2.0302886962890625, 10.016170501708984, 5.672294616699219, 5.438850402832031, -2.445465087890625, 0.29395294189453125, -2.1192550659179688, -0.21544265747070312, 14.524749755859375, -0.5179080963134766, 1.0438385009765625, -2.249969482421875, 2.204376220703125, 17.770599365234375, 8.284690856933594, 2.8354110717773438, 0.26627349853515625, 21.58355712890625, 0.7598190307617188, 4.475742340087891, 13.524391174316406, 10.3482666015625, -0.5417385101318359, 7.32501220703125, 11.455760955810547, 5.418819427490234, 13.929946899414062, 12.273567199707031, 7.698003768920898, -2.801971435546875, 23.087677001953125, -1.487091064453125, -14.992706298828125, 16.28173065185547, 2.9092330932617188, 12.841110229492188, -3.584819793701172, 3.51007080078125, 2.2747840881347656, -4.9251556396484375, 15.037199020385742, 1.8761749267578125, 3.4319515228271484, 4.06744384765625, -0.4014739990234375, -3.745769500732422, 3.074798583984375, 18.515899658203125, 8.136486053466797, 6.968235015869141, 8.096488952636719, 3.0300445556640625, -2.7798919677734375, 6.001682281494141, 1.3464279174804688, 12.198287963867188, 0.644805908203125, 0.2639312744140625, 18.138015747070312, 12.504005432128906, -4.937744140625, 21.6136474609375, -4.1549224853515625, -10.40655517578125, 10.796646118164062, 2.5270004272460938, 7.410833358764648, 9.23895263671875, 17.464080810546875, 3.0394744873046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000047.npy"}
{"epoch": 0.13823529411764707, "step": 48, "batch_size": 128, "mean": 5.934237480163574, "std": 9.335739135742188, "min": -18.664794921875, "p10": -2.41541748046875, "median": 3.998394012451172, "p90": 15.038058471679681, "max": 60.18505859375, "pos_frac": 0.8046875, "sample": [2.21173095703125, 7.2711181640625, 9.894302368164062, -1.6166496276855469, -1.6296768188476562, 6.607147216796875, 1.8456153869628906, 3.2624263763427734, 7.538486480712891, 3.263935089111328, -0.01251983642578125, 9.696529388427734, 3.4807968139648438, 1.8042182922363281, 3.604541778564453, 2.1487083435058594, 14.316238403320312, 21.744125366210938, 14.168724060058594, 17.06756591796875, 1.8347206115722656, 1.0942974090576172, 2.7034988403320312, -3.560333251953125, 1.56036376953125, -18.664794921875, 1.1292743682861328, 1.5440673828125, 1.627471923828125, 7.433586120605469, 10.810426712036133, 1.4433250427246094, 0.17847442626953125, 17.35694122314453, 3.380950927734375, 4.007266998291016, 10.702617645263672, 23.281982421875, 12.936225891113281, 8.228172302246094, 3.989521026611328, 0.123443603515625, 2.1607742309570312, 14.171363830566406, 4.63616943359375, 3.137065887451172, -0.05707550048828125, 0.044586181640625, 8.989246368408203, 12.330780029296875, 14.118621826171875, 7.9019927978515625, 4.867486953735352, 4.220485687255859, -1.4907646179199219, 7.21238899230957, 8.585479736328125, 6.4287109375, 4.523658752441406, 5.156414031982422, -2.4914512634277344, 2.2519760131835938, 0.8453369140625, -5.2930908203125, 10.609420776367188, 8.461875915527344, 20.273330688476562, 6.38299560546875, 16.722305297851562, 23.498046875, -2.4983367919921875, 1.5020427703857422, 13.696134567260742, 13.250110626220703, -8.0509033203125, 13.7772216796875, 3.78033447265625, 7.306644439697266, 3.600494384765625, 29.002151489257812, 9.477010726928711, -0.5076675415039062, 6.440940856933594, 26.596633911132812, 7.4320831298828125, -0.02564239501953125, 6.038669586181641, 2.068265914916992, 1.2339000701904297, 11.008766174316406, 2.9066314697265625, 5.288623809814453, -2.382831573486328, -2.2762985229492188, 13.715652465820312, 8.487480163574219, -2.7529296875, 9.775764465332031, 60.18505859375, 3.7680206298828125, 5.630279541015625, 5.5828094482421875, 0.5365638732910156, 13.694759368896484, 4.1248626708984375, 0.97454833984375, 0.35193634033203125, 5.454742431640625, -3.8979644775390625, 4.445819854736328, -0.4049034118652344, 29.87408447265625, -13.17156982421875, -4.246517181396484, 4.654529571533203, -0.7296485900878906, 2.4051551818847656, -7.494041442871094, -4.4663543701171875, 10.010795593261719, 1.2811832427978516, 24.655303955078125, 28.577362060546875, 2.8143844604492188, 0.8684272766113281, 6.343544006347656, -6.124259948730469, -2.0144882202148438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000048.npy"}
{"epoch": 0.1411764705882353, "step": 49, "batch_size": 128, "mean": 5.836205959320068, "std": 8.299670219421387, "min": -20.367294311523438, "p10": -2.449131202697753, "median": 4.6503095626831055, "p90": 16.241238021850585, "max": 31.703018188476562, "pos_frac": 0.7890625, "sample": [3.2903671264648438, 17.475662231445312, 15.025833129882812, 8.9962158203125, 0.42041778564453125, 3.2251815795898438, 0.2818183898925781, 2.362730026245117, -7.8403472900390625, 6.426963806152344, 5.274040222167969, 3.3448829650878906, 5.14410400390625, -3.3505859375, 0.5762672424316406, -3.9297027587890625, 4.0456695556640625, -0.9214649200439453, 6.292961120605469, 11.616241455078125, -4.2168121337890625, 17.760921478271484, -1.3889617919921875, -2.3150768280029297, 4.410251617431641, 6.021633148193359, 2.7564125061035156, 13.147529602050781, -0.8022308349609375, 12.817325592041016, 6.184745788574219, 4.82855224609375, -0.9218959808349609, 10.778366088867188, 15.161178588867188, 6.378089904785156, 0.074737548828125, 5.498470306396484, 24.932228088378906, 8.951240539550781, 30.495849609375, -20.367294311523438, 13.81512451171875, 9.864547729492188, 12.461162567138672, 9.726476669311523, -2.7619247436523438, -1.4078483581542969, 15.532485961914062, -6.255615234375, 1.7529678344726562, 2.3427886962890625, 15.861732482910156, 13.599952697753906, -1.2409534454345703, 1.1351985931396484, 0.5962066650390625, 1.3499908447265625, 7.9396820068359375, 16.13003921508789, 1.3434906005859375, 1.6078262329101562, 11.835151672363281, 0.29909515380859375, 2.874195098876953, 2.0311546325683594, 17.997146606445312, 16.98448944091797, 4.766965866088867, 7.209510803222656, -0.3268890380859375, 6.681205749511719, -2.1687545776367188, 5.358154296875, -0.5153274536132812, -9.182342529296875, 13.847068786621094, 4.077491760253906, -9.1612548828125, -4.9000244140625, 24.152664184570312, 31.242202758789062, -10.35479736328125, 22.221221923828125, 14.258148193359375, 3.4535675048828125, 8.747848510742188, 5.413726806640625, 7.7223968505859375, 11.25079345703125, 5.254783630371094, 3.98590087890625, 9.951095581054688, 24.197341918945312, 12.572437286376953, 2.7124671936035156, 3.5461578369140625, 2.9629135131835938, 8.74368667602539, 5.70697021484375, 5.936609268188477, 4.104866027832031, 11.573768615722656, 8.170928955078125, 1.2490959167480469, 5.641811370849609, 17.571136474609375, 7.418087005615234, -0.785919189453125, 4.055887222290039, 4.8184814453125, 3.3811492919921875, -3.9225120544433594, 7.502567291259766, -0.9303436279296875, 8.05517578125, 16.500701904296875, 3.018709182739258, 8.976131439208984, 31.703018188476562, 4.533653259277344, 1.1958465576171875, -3.0218772888183594, -2.1269569396972656, 3.4027557373046875, -1.5829620361328125, 0.45125770568847656, 3.358917236328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000049.npy"}
{"epoch": 0.14411764705882352, "step": 50, "batch_size": 128, "mean": 5.005728721618652, "std": 10.851263999938965, "min": -52.736854553222656, "p10": -3.0982452392578126, "median": 3.43247127532959, "p90": 17.10511932373047, "max": 48.50006103515625, "pos_frac": 0.7890625, "sample": [1.419342041015625, -3.070159912109375, 1.462972640991211, 3.502716064453125, -0.3547325134277344, 3.1537933349609375, 8.159933090209961, 7.012115478515625, 3.045501708984375, 9.287620544433594, 2.841339111328125, -0.7073593139648438, 17.168212890625, -10.134223937988281, 5.072530746459961, 42.19822692871094, 4.80743408203125, 1.4148826599121094, 10.745491027832031, 28.406265258789062, 6.7382965087890625, 6.5224761962890625, 12.769393920898438, 2.02862548828125, -2.0964279174804688, -1.7624969482421875, 14.553237915039062, 17.808387756347656, 3.0005340576171875, -3.4467201232910156, 23.154647827148438, 0.8594131469726562, 26.07427978515625, 3.406309127807617, 4.248756408691406, 15.568145751953125, 12.079090118408203, -3.0886688232421875, 17.078079223632812, 1.49365234375, 1.2610950469970703, 26.274246215820312, 3.7878341674804688, -2.7733001708984375, 2.094146728515625, -9.4002685546875, 0.05518341064453125, 8.260330200195312, -3.553089141845703, 3.1940155029296875, 6.905143737792969, 8.647666931152344, 27.228103637695312, 4.975898742675781, 2.9348392486572266, 24.672683715820312, 8.866155624389648, 2.8978347778320312, 31.806976318359375, 2.6337432861328125, 3.200206756591797, 5.3499755859375, 4.985523223876953, 2.2602691650390625, 8.046812057495117, 4.873218536376953, -1.8587417602539062, 3.8329601287841797, 3.711698532104492, 2.8804244995117188, 12.664997100830078, 3.4586334228515625, 4.630832672119141, -12.291450500488281, 0.454681396484375, -8.704940795898438, -1.5940608978271484, 0.9229774475097656, 5.349132537841797, -9.472816467285156, 3.9572982788085938, -2.3153076171875, 23.659530639648438, 1.7828826904296875, 3.273622512817383, -7.0006256103515625, 11.263916015625, 5.0014190673828125, -1.510223388671875, 6.198383331298828, 0.5054397583007812, 19.149017333984375, -52.736854553222656, -7.7310638427734375, 48.50006103515625, 0.8831138610839844, -2.0669727325439453, 5.9174041748046875, 4.6713409423828125, 0.25476837158203125, 10.705589294433594, 2.0202865600585938, 0.08522605895996094, 16.505523681640625, 13.814140319824219, -3.068084716796875, 2.6004772186279297, 3.664581298828125, 0.4552955627441406, 1.2758064270019531, 14.174251556396484, -2.4304046630859375, 0.9124526977539062, 4.609199523925781, 4.3966827392578125, 8.30848503112793, 9.133243560791016, 10.028970718383789, 0.430999755859375, 1.6870269775390625, -3.1205902099609375, 8.081710815429688, -10.993766784667969, -5.2724609375, 6.2928314208984375, 0.9283885955810547, 4.298095703125, 3.661649703979492], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000050.npy"}
{"epoch": 0.14705882352941177, "step": 51, "batch_size": 128, "mean": 4.31201171875, "std": 8.193510055541992, "min": -15.443733215332031, "p10": -3.664046859741211, "median": 2.9647483825683594, "p90": 13.820158386230467, "max": 38.302978515625, "pos_frac": 0.703125, "sample": [8.867813110351562, 21.7806396484375, 7.096675872802734, 2.027973175048828, -0.43051910400390625, 5.7320556640625, 20.83148193359375, 2.9717063903808594, -0.893798828125, 1.1030387878417969, 15.332084655761719, 5.9458465576171875, -15.443733215332031, 10.202743530273438, 12.59124755859375, 10.246273040771484, -3.6552085876464844, 12.350765228271484, -2.4438304901123047, 2.8039627075195312, 6.5596771240234375, 8.613250732421875, -2.38262939453125, -5.370330810546875, 4.1558990478515625, 8.628702163696289, -1.9725875854492188, -1.7006988525390625, -1.748809814453125, -13.8116455078125, 11.936996459960938, 7.070646286010742, 13.980033874511719, 38.302978515625, 0.76708984375, 4.436309814453125, 0.43341064453125, 6.075939178466797, 1.2866382598876953, 0.8876495361328125, 2.8196182250976562, 10.004098892211914, 0.8724994659423828, 22.338104248046875, 21.661483764648438, 7.633354187011719, 11.265777587890625, -1.397848129272461, -0.21222686767578125, -3.6846694946289062, 7.520381927490234, 2.710693359375, 0.6461181640625, 3.349302291870117, -15.073593139648438, 6.219295501708984, -1.812225341796875, 3.9228668212890625, -1.7819137573242188, -1.0314998626708984, 5.904582977294922, -4.268762588500977, 0.1121063232421875, -3.129180908203125, -1.7021427154541016, 12.258285522460938, 13.751640319824219, 2.1883316040039062, 2.9621734619140625, -0.48030662536621094, -10.739776611328125, -4.211212158203125, -1.2036285400390625, 5.572549819946289, 0.1582183837890625, -1.0653533935546875, -0.7472381591796875, -0.9292926788330078, 4.837551116943359, -1.7079505920410156, 11.966941833496094, 10.205610275268555, 0.8850574493408203, -2.0380325317382812, 1.3603553771972656, 1.875244140625, 1.3926239013671875, -3.6297378540039062, 3.8715362548828125, 8.710441589355469, 16.01644515991211, 3.2845993041992188, 2.6884536743164062, -0.70843505859375, 9.755439758300781, -7.850944519042969, 1.2455730438232422, -8.641937255859375, 9.300079345703125, 22.81585693359375, 1.3411903381347656, 2.9673233032226562, 3.1230621337890625, -3.328521728515625, 10.188346862792969, 5.053565979003906, 7.238189697265625, 8.848411560058594, 1.4781341552734375, 0.1509380340576172, 20.003463745117188, 0.2077465057373047, 2.290525436401367, -6.657958984375, -4.675502777099609, 5.923858642578125, 5.7656097412109375, 21.283233642578125, 8.29278564453125, 28.089202880859375, 8.530452728271484, 7.699039459228516, 15.693048477172852, 5.5794830322265625, -3.8502120971679688, 4.034629821777344, 8.137065887451172, 9.329238891601562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000051.npy"}
{"epoch": 0.15, "step": 52, "batch_size": 128, "mean": 5.88363790512085, "std": 9.373187065124512, "min": -22.2208251953125, "p10": -3.1287773132324217, "median": 4.2858123779296875, "p90": 17.872011566162108, "max": 44.123291015625, "pos_frac": 0.734375, "sample": [1.382345199584961, 0.8201332092285156, -0.4754981994628906, 2.4832191467285156, -0.8347320556640625, -0.34224700927734375, 10.200080871582031, 7.624088287353516, 21.32440185546875, 11.420433044433594, 10.596939086914062, 10.674186706542969, -6.1663970947265625, -8.141265869140625, 6.7093353271484375, -0.012298583984375, 3.1465988159179688, 11.436508178710938, -3.722015380859375, 6.063816070556641, -4.889404296875, -4.899566650390625, 0.8733634948730469, 11.744056701660156, 44.123291015625, 17.889854431152344, 32.385887145996094, -3.0254745483398438, 5.1538543701171875, 5.7185211181640625, 11.407363891601562, -0.5454959869384766, 2.3847618103027344, 3.1258468627929688, 7.863189697265625, 4.799659729003906, 2.2318801879882812, 7.0887451171875, -2.801513671875, 13.617443084716797, -8.08935546875, 9.718177795410156, 8.24444580078125, -0.9927520751953125, -12.721431732177734, 0.18346405029296875, 7.6279449462890625, 17.19024658203125, 3.1024322509765625, 8.488410949707031, 13.544189453125, 6.50335693359375, 0.149871826171875, 22.60991668701172, 10.047164916992188, 0.6840057373046875, 9.655460357666016, -1.9220962524414062, 3.346576690673828, 5.7992095947265625, 18.51177978515625, 8.27468490600586, 6.094085693359375, 11.275321960449219, 5.672828674316406, 0.29534149169921875, 1.7754497528076172, 3.429779052734375, 9.420341491699219, 3.7719650268554688, -5.0794830322265625, 1.3084945678710938, 25.584381103515625, 14.776750564575195, -0.447021484375, 19.395050048828125, 7.5885772705078125, -3.0895767211914062, 3.296794891357422, -0.7810401916503906, 1.9291877746582031, 14.987037658691406, 3.2520885467529297, 3.054380416870117, -3.8201675415039062, -0.9597930908203125, 33.078460693359375, -3.2739639282226562, 10.595535278320312, 17.864364624023438, -0.5751399993896484, 8.290046691894531, -1.6016273498535156, 2.4911727905273438, 17.301895141601562, 0.42977142333984375, 1.8669509887695312, 0.6004714965820312, -1.070180892944336, -2.373279571533203, 15.342422485351562, 6.9297027587890625, 21.394012451171875, 1.1722450256347656, 2.280580520629883, 1.2694206237792969, 36.462799072265625, 10.531692504882812, 6.298271179199219, 8.169334411621094, 20.85747528076172, 13.47833251953125, -2.3409500122070312, -3.220245361328125, 9.690818786621094, -4.757377624511719, 12.678390502929688, 4.9680938720703125, 17.9085693359375, 5.446170806884766, -1.5078010559082031, 9.809093475341797, -0.0482330322265625, -2.3070144653320312, 4.856182098388672, 0.2076873779296875, -22.2208251953125, 5.011909484863281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000052.npy"}
{"epoch": 0.15294117647058825, "step": 53, "batch_size": 128, "mean": 7.308060646057129, "std": 9.22751522064209, "min": -44.424652099609375, "p10": -1.3563404083251953, "median": 6.160566329956055, "p90": 19.187646865844723, "max": 30.288314819335938, "pos_frac": 0.859375, "sample": [17.689666748046875, 11.947792053222656, 13.188785552978516, 7.6262969970703125, 11.653156280517578, 16.680213928222656, 13.20697021484375, 30.288314819335938, -44.424652099609375, -1.2520599365234375, 0.1200408935546875, 17.130577087402344, 4.15289306640625, 24.802764892578125, 17.317169189453125, 6.935382843017578, -0.6534423828125, 6.914360046386719, 3.714588165283203, 7.948051452636719, 2.543548583984375, 20.54779815673828, 4.227088928222656, -4.050350189208984, 1.2549896240234375, 25.50836181640625, 21.25769805908203, 5.605319976806641, -3.3305587768554688, 1.9224987030029297, 6.206165313720703, 6.739830017089844, 12.019216537475586, -1.8737640380859375, 2.9185104370117188, 14.773521423339844, 4.281394958496094, 5.782022476196289, 3.5806961059570312, 8.688774108886719, 2.467498779296875, 7.815681457519531, 6.618320465087891, 14.45260238647461, 3.440814971923828, 21.72650909423828, 6.684852600097656, 16.683761596679688, 12.648448944091797, 14.377838134765625, 7.438396453857422, 2.3235511779785156, 1.731109619140625, 6.246990203857422, 1.6795806884765625, 7.834087371826172, 2.6289005279541016, 18.95721435546875, 5.821495056152344, 22.841842651367188, 2.1981163024902344, 0.8219432830810547, 4.490283966064453, 23.241851806640625, 6.427574157714844, 1.6095428466796875, -0.7790203094482422, -1.4496231079101562, 4.879693984985352, 1.331533432006836, 13.195777893066406, -5.8281097412109375, 0.3198699951171875, 4.4591217041015625, 16.43329620361328, 0.42999267578125, 2.9337692260742188, 10.330429077148438, 1.3324966430664062, 23.588836669921875, 1.1029281616210938, 7.162696838378906, -1.3469047546386719, 1.1691131591796875, 1.3776588439941406, 2.7918624877929688, 16.209213256835938, 15.246902465820312, 3.1269989013671875, 3.5693359375, -0.5725612640380859, 11.359527587890625, 10.695068359375, 6.089073181152344, 21.8343505859375, 11.935317993164062, 4.1385650634765625, 17.793434143066406, 11.907888412475586, 5.4391021728515625, 2.672574996948242, 26.402385711669922, 6.114967346191406, 14.77667236328125, 2.8518295288085938, 7.950996398925781, 4.8302459716796875, -3.8563613891601562, 11.552192687988281, -1.37835693359375, 13.169963836669922, -11.814987182617188, 8.884628295898438, 11.650321960449219, -2.3681411743164062, 0.2433319091796875, 12.819694519042969, 5.944366455078125, 14.915508270263672, 0.472900390625, -4.80194091796875, 7.329795837402344, 7.5814056396484375, 17.036205291748047, 23.492904663085938, -4.276336669921875, -5.4964141845703125, 19.725322723388672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000053.npy"}
{"epoch": 0.15588235294117647, "step": 54, "batch_size": 128, "mean": 6.612789630889893, "std": 9.140440940856934, "min": -15.27276611328125, "p10": -2.532569122314453, "median": 5.552192687988281, "p90": 19.271963500976558, "max": 44.54931640625, "pos_frac": 0.8046875, "sample": [10.2437744140625, 22.62767791748047, 16.971176147460938, 6.153308868408203, 4.590969085693359, -3.7420501708984375, 17.263809204101562, 3.0278987884521484, 9.428049087524414, 18.226058959960938, 1.6767997741699219, 0.6147613525390625, 11.035621643066406, -0.1587371826171875, 21.504669189453125, 1.9697151184082031, 12.879989624023438, 9.512481689453125, 3.6336593627929688, 12.158824920654297, 1.6985187530517578, 10.014839172363281, 6.42420768737793, 6.451629638671875, -5.7875823974609375, 4.286346435546875, 1.9522705078125, 6.195440292358398, -0.5611419677734375, 5.763580322265625, 6.1488037109375, 4.598419189453125, -2.20770263671875, 1.8588123321533203, 3.2328758239746094, 14.005294799804688, 0.573272705078125, 3.1049880981445312, 0.663360595703125, 6.4207763671875, -2.2877197265625, 6.8428955078125, 8.751266479492188, 1.3807830810546875, 27.997650146484375, 10.106590270996094, -2.395366668701172, 11.045890808105469, 17.858505249023438, 0.692626953125, 12.54986572265625, 22.148193359375, 6.106819152832031, 22.10305404663086, 3.2963638305664062, 7.0762939453125, 3.984567642211914, 6.580310821533203, -3.7768783569335938, 4.080018997192383, 5.9717864990234375, 11.2176513671875, 6.1191558837890625, 5.6414337158203125, 1.0593643188476562, 23.655670166015625, -5.255878448486328, -6.994384765625, -12.44580078125, 18.367462158203125, 7.859365463256836, -3.128570556640625, 0.3116912841796875, 27.517013549804688, 44.54931640625, 14.144481658935547, 25.3343505859375, 3.1140670776367188, 1.589712142944336, -6.423759460449219, 9.659149169921875, 0.40460205078125, 24.735092163085938, 0.353179931640625, 9.212139129638672, -0.5121002197265625, -15.27276611328125, 6.937892913818359, 6.2985382080078125, -7.8522186279296875, -2.07818603515625, 8.98386001586914, 7.6485443115234375, 10.96905517578125, 0.01604461669921875, 1.9044322967529297, 35.93524169921875, -3.7311668395996094, 4.7997283935546875, 20.133270263671875, -1.3133964538574219, -3.27679443359375, 1.8543128967285156, 10.99200439453125, 8.257972717285156, -0.9943275451660156, 1.2909774780273438, 2.6092453002929688, 18.90283203125, 5.852581024169922, 3.6062068939208984, -1.6808509826660156, 2.0225677490234375, 7.772712707519531, 10.359817504882812, 14.70489501953125, -2.616912841796875, -1.4153308868408203, 14.502769470214844, 20.5958251953125, 4.4370269775390625, 4.546653747558594, 10.593490600585938, -2.4964218139648438, 3.6106643676757812, 2.1637001037597656, 5.46295166015625, 6.746274948120117], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000054.npy"}
{"epoch": 0.1588235294117647, "step": 55, "batch_size": 128, "mean": 7.076693534851074, "std": 9.61003589630127, "min": -14.8109130859375, "p10": -3.2783447265624996, "median": 5.243300437927246, "p90": 19.250421333312985, "max": 49.88621520996094, "pos_frac": 0.796875, "sample": [14.40823745727539, 2.562469482421875, 12.772781372070312, 3.4117431640625, 8.558204650878906, 14.966720581054688, 4.5661468505859375, 5.058223724365234, 3.7336597442626953, 12.654239654541016, -2.0362205505371094, -1.2288761138916016, 1.8823432922363281, 30.5107421875, -1.0016536712646484, -4.5232696533203125, 1.0666923522949219, 9.664016723632812, 9.155593872070312, 8.443506240844727, 16.266326904296875, 20.51673126220703, 7.593841552734375, 22.345733642578125, 3.823627471923828, 14.017074584960938, -3.101593017578125, 5.0579681396484375, 9.90707015991211, 18.544570922851562, 17.02288818359375, -1.8222274780273438, -9.274726867675781, 6.12091064453125, 20.216552734375, 4.770534515380859, 10.810317993164062, 3.2375946044921875, -4.438074111938477, 3.4117794036865234, 5.624935150146484, 3.2782630920410156, 17.727989196777344, 1.52001953125, 9.98760986328125, -3.6372528076171875, 0.686065673828125, 49.88621520996094, 9.743438720703125, 3.16473388671875, -7.44891357421875, -11.512081146240234, 2.47039794921875, 21.165119171142578, 10.811813354492188, 19.034109115600586, 13.464218139648438, 12.482772827148438, 10.98651123046875, 11.6904296875, -13.690902709960938, -3.1245269775390625, 2.4769287109375, 11.867218017578125, 10.997344970703125, 4.437110900878906, 3.6068115234375, 25.566879272460938, 12.121719360351562, 19.755149841308594, 11.903945922851562, 5.748785018920898, 5.328535079956055, 21.619598388671875, 18.810699462890625, 3.5502586364746094, 2.12957763671875, -0.001277923583984375, 29.193145751953125, 16.854217529296875, 1.9883708953857422, -1.3894309997558594, 3.175527572631836, 35.3526611328125, 5.4264984130859375, 12.322948455810547, 5.37823486328125, 2.1268672943115234, 6.3472900390625, -0.721282958984375, 12.623550415039062, 0.21282386779785156, 6.638740539550781, 2.8045730590820312, 7.3645477294921875, 8.459493637084961, -2.5438232421875, 3.9643478393554688, 10.007415771484375, 3.58245849609375, -8.023162841796875, 2.340608596801758, 13.883819580078125, -1.5849018096923828, 11.841865539550781, 18.292617797851562, 4.3138885498046875, 3.4783782958984375, 3.7269248962402344, 8.453361511230469, -1.5699539184570312, -0.7891578674316406, 6.646999359130859, -4.382724761962891, 6.129142761230469, 6.057586669921875, 1.1204299926757812, -5.475578308105469, 24.119796752929688, -14.8109130859375, 3.8703384399414062, -4.223716735839844, 3.036439895629883, 5.1580657958984375, 13.317245483398438, 0.5809116363525391, -3.8119773864746094, 21.068832397460938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000055.npy"}
{"epoch": 0.16176470588235295, "step": 56, "batch_size": 128, "mean": 5.864007949829102, "std": 8.95423412322998, "min": -24.036895751953125, "p10": -3.4121826171875, "median": 4.950433731079102, "p90": 16.27896194458008, "max": 47.14707946777344, "pos_frac": 0.7578125, "sample": [47.14707946777344, -7.7204437255859375, -24.036895751953125, 15.281509399414062, -8.265144348144531, 9.702743530273438, -3.3635635375976562, 13.948272705078125, 17.398422241210938, 12.331939697265625, 11.81683349609375, 2.887584686279297, 7.249492645263672, -8.211601257324219, 3.5124168395996094, 0.6907958984375, 32.1142578125, 4.32844352722168, 16.32257843017578, 10.411575317382812, 5.218471527099609, 6.3610382080078125, 10.784011840820312, 11.740646362304688, 15.194244384765625, 2.0577850341796875, 11.452613830566406, 8.324151992797852, 8.570960998535156, 23.132678985595703, 13.559371948242188, -2.642578125, 2.7304763793945312, 20.641525268554688, 7.676807403564453, 6.989372253417969, 7.1591644287109375, 3.5025100708007812, 14.297088623046875, 15.956771850585938, 16.378097534179688, 10.958023071289062, -3.001129150390625, 10.027290344238281, -3.728546142578125, 4.779483795166016, 9.763870239257812, 4.21699333190918, 4.267894744873047, 0.455657958984375, 3.8314590454101562, -0.8172454833984375, 13.388397216796875, 1.9322967529296875, 5.241729736328125, 5.014854431152344, -7.375099182128906, 5.7989959716796875, 5.991987228393555, 10.619691848754883, -1.4774742126464844, 21.698806762695312, -4.4620361328125, -3.1104812622070312, 0.33905792236328125, 18.409530639648438, 4.3563079833984375, 1.0181198120117188, -5.086822509765625, 1.7131786346435547, -0.5791854858398438, 7.214385986328125, -2.5586910247802734, 18.31253433227539, 8.21451187133789, 12.799736022949219, 0.306396484375, 10.773189544677734, 10.020256042480469, -4.1563262939453125, 2.5301971435546875, 17.107528686523438, 16.260269165039062, -7.129005432128906, 7.084403991699219, 7.146099090576172, 18.674407958984375, 2.4689483642578125, 13.69356918334961, 5.5972137451171875, 1.7995986938476562, -0.42071533203125, 8.328521728515625, -0.0290374755859375, 2.634918212890625, -2.9923858642578125, -2.6666641235351562, -8.116012573242188, 29.65118408203125, 7.680278778076172, 0.8718185424804688, -2.6009597778320312, 11.446159362792969, 8.941219329833984, 2.7304840087890625, 8.392852783203125, 2.589935302734375, 3.7328529357910156, -0.4416961669921875, -0.451019287109375, 4.488956451416016, 5.6122283935546875, 0.547607421875, 15.867103576660156, 4.886013031005859, 4.206333160400391, -10.228271484375, -0.4532432556152344, 9.347930908203125, -0.4706230163574219, 2.0687103271484375, -2.5374717712402344, 5.243019104003906, 0.2707805633544922, 2.1538848876953125, 9.99854850769043, -3.5256271362304688, 10.858993530273438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000056.npy"}
{"epoch": 0.16470588235294117, "step": 57, "batch_size": 128, "mean": 5.7373504638671875, "std": 9.455587387084961, "min": -24.446014404296875, "p10": -3.9030151367187487, "median": 4.9882707595825195, "p90": 16.791925811767577, "max": 40.2056884765625, "pos_frac": 0.7265625, "sample": [-1.6519813537597656, 11.151321411132812, 1.7660369873046875, 16.72900390625, 4.448417663574219, 1.64166259765625, 4.857870101928711, 16.938743591308594, 3.9393081665039062, 3.5981788635253906, 3.7871055603027344, 20.74236297607422, -4.71307373046875, 3.0082969665527344, 14.535579681396484, -2.865814208984375, 4.4372100830078125, -0.5234222412109375, 15.65362548828125, -10.864959716796875, 2.2136917114257812, 12.719741821289062, 5.004425048828125, -2.3512954711914062, 1.015472412109375, -0.4238109588623047, 22.428871154785156, -3.3661842346191406, 5.932153701782227, 4.972116470336914, 6.626640319824219, 9.121471405029297, -0.8011627197265625, -0.6328201293945312, -4.893043518066406, 8.709075927734375, 8.452919006347656, -24.446014404296875, 6.514625549316406, -0.49584197998046875, 8.495254516601562, -3.4079132080078125, -0.4759960174560547, 18.497238159179688, 15.522926330566406, -3.1993637084960938, 4.2011566162109375, 9.246719360351562, -5.480278015136719, 13.703636169433594, -5.22271728515625, 2.0966968536376953, 6.484527587890625, 10.159934997558594, 6.668727874755859, 7.973854064941406, 7.7638702392578125, 20.913421630859375, 2.354595184326172, 5.6691741943359375, 5.371429443359375, 4.153018951416016, 4.358163833618164, -1.15948486328125, 13.06414794921875, 12.250625610351562, 8.928085327148438, 7.453857421875, 6.611553192138672, 30.214019775390625, -3.415924072265625, 11.945873260498047, 2.7163429260253906, -3.55584716796875, 3.2660980224609375, -0.498443603515625, 6.928611755371094, 2.6945438385009766, -5.6066741943359375, 17.945030212402344, -8.653396606445312, 9.216453552246094, -15.059982299804688, 0.10848426818847656, 8.616146087646484, 4.143013000488281, 8.341331481933594, 8.726799011230469, 7.487438201904297, 40.12879943847656, 2.4524402618408203, 8.11541748046875, 3.0471954345703125, -2.3134002685546875, 40.2056884765625, 18.515121459960938, 2.0738372802734375, 6.53912353515625, -8.162338256835938, 7.431488037109375, 23.542007446289062, 1.4965591430664062, -7.008697509765625, 6.648681640625, -7.2699737548828125, 23.472618103027344, 5.055427551269531, 1.1459922790527344, -1.8585433959960938, 14.708908081054688, 13.295940399169922, -0.6994285583496094, 16.228614807128906, 9.613418579101562, -1.107828140258789, 8.644485473632812, 7.631801605224609, 9.422088623046875, 31.90233612060547, -0.027286529541015625, 0.6474170684814453, 6.2235870361328125, 6.82391357421875, -5.7882232666015625, -1.2105789184570312, 16.007583618164062, 0.4484405517578125, 6.8848876953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000057.npy"}
{"epoch": 0.1676470588235294, "step": 58, "batch_size": 128, "mean": 5.391901016235352, "std": 8.96910572052002, "min": -40.346099853515625, "p10": -1.872967529296875, "median": 4.53059196472168, "p90": 17.47440948486328, "max": 38.52398681640625, "pos_frac": 0.7734375, "sample": [0.054443359375, -2.2490901947021484, 8.33492660522461, 6.2035369873046875, -10.92059326171875, 5.386940002441406, 15.9056396484375, 5.7126617431640625, 4.79619026184082, -1.8720016479492188, 38.52398681640625, -2.1307220458984375, 19.95903778076172, 5.349266052246094, 9.907066345214844, -1.2889022827148438, 11.099166870117188, -16.99877166748047, 1.594442367553711, 4.837734222412109, 5.1450958251953125, 2.8433399200439453, 5.6324462890625, 10.994422912597656, -3.2610092163085938, 9.402618408203125, 6.578052520751953, 7.444358825683594, 2.271209716796875, 17.586166381835938, -1.6495380401611328, 12.041576385498047, -0.8866596221923828, 12.581230163574219, 5.631572723388672, 1.2719154357910156, 11.268928527832031, 17.058883666992188, 9.130905151367188, 26.95366668701172, 21.663543701171875, 11.098564147949219, 5.93328857421875, 7.547122955322266, -0.9365386962890625, 17.713211059570312, 19.603561401367188, 0.0342559814453125, 13.755645751953125, 7.193168640136719, -0.64837646484375, -4.109821319580078, 24.060455322265625, 3.08447265625, 4.274200439453125, 0.06610870361328125, 4.540557861328125, -1.1587066650390625, 5.910369873046875, -40.346099853515625, 17.89306640625, -1.2145538330078125, 1.4381999969482422, -1.830596923828125, 3.251626968383789, 4.720586776733398, -5.389545440673828, 1.318939208984375, 0.9175891876220703, 10.439956665039062, -5.556983947753906, 4.000053405761719, 7.665550231933594, -1.5086822509765625, 3.802379608154297, 10.64312744140625, 1.673980712890625, 7.4513397216796875, -3.9855499267578125, -7.232513427734375, 4.996435165405273, 2.9738006591796875, 0.6766719818115234, 1.3968505859375, -1.3027877807617188, 7.914745330810547, 2.9932823181152344, 17.426513671875, 1.7403945922851562, 15.320137023925781, 4.678228378295898, -0.6900463104248047, 24.459617614746094, 2.409698486328125, 8.34170913696289, 2.9359588623046875, 6.739809036254883, -1.8752212524414062, 4.585781097412109, -1.8104877471923828, 9.58438491821289, 0.5955009460449219, 0.17569732666015625, 2.7504501342773438, 1.3622360229492188, 3.1801528930664062, 1.395965576171875, 13.051544189453125, 2.25225830078125, -3.315093994140625, 22.944183349609375, 10.434085845947266, 1.4049968719482422, 18.103775024414062, -1.3105087280273438, 11.959884643554688, 11.040565490722656, 4.520626068115234, 21.24280548095703, 7.249908447265625, 2.5819129943847656, 10.955841064453125, -1.1686553955078125, 0.9269561767578125, -1.3721656799316406, 12.354400634765625, 3.07183837890625, 12.257591247558594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000058.npy"}
{"epoch": 0.17058823529411765, "step": 59, "batch_size": 128, "mean": 5.070147514343262, "std": 7.598785400390625, "min": -16.66476058959961, "p10": -2.6730316162109373, "median": 4.0782470703125, "p90": 15.287488555908203, "max": 26.75787353515625, "pos_frac": 0.7890625, "sample": [5.359870910644531, 2.82330322265625, 8.441299438476562, 16.786731719970703, 2.8090076446533203, 2.293245315551758, 6.210103988647461, 3.321441650390625, -11.692489624023438, 15.064170837402344, 2.29736328125, -14.062957763671875, -2.721160888671875, 3.6103172302246094, 6.142757415771484, 7.729215621948242, 7.952323913574219, -10.02777099609375, 21.045623779296875, 4.551156997680664, 18.337677001953125, 7.0583648681640625, 1.2722702026367188, 4.962085723876953, 14.257152557373047, 5.800298690795898, -7.9636383056640625, 4.563201904296875, 3.1780166625976562, 24.272998809814453, 18.674175262451172, 6.931083679199219, 3.4426956176757812, 7.051441192626953, 12.80721664428711, 15.376777648925781, 7.78131103515625, 25.818241119384766, 6.3871917724609375, 2.7274093627929688, 5.875732421875, 3.5477218627929688, -2.65240478515625, 1.0920124053955078, 5.5499114990234375, 3.667724609375, 6.969249725341797, 18.202171325683594, 12.498504638671875, 1.7071113586425781, 3.6988773345947266, -5.219032287597656, -4.6184539794921875, 8.004478454589844, 9.611093521118164, 14.175064086914062, 3.509031295776367, 3.2392959594726562, 4.404815673828125, -2.484619140625, 6.667484283447266, -1.0896530151367188, -5.2961883544921875, 5.66375732421875, -0.2082366943359375, 5.194614410400391, -1.8650741577148438, -9.285797119140625, 7.277345657348633, 1.4871292114257812, -16.66476058959961, -2.1227474212646484, 3.9887619018554688, 4.2828369140625, 4.768619537353516, 10.478408813476562, 4.167732238769531, -0.2609977722167969, 3.806396484375, -0.5351943969726562, -1.1133499145507812, 1.304117202758789, 2.088348388671875, 3.625640869140625, 0.8903408050537109, 8.464279174804688, 18.77386474609375, -1.2914886474609375, 15.249221801757812, 3.5808372497558594, -3.7260284423828125, 6.813789367675781, 2.811004638671875, 7.874126434326172, 7.708595275878906, 15.577690124511719, 5.959449768066406, 3.294994354248047, 7.9174041748046875, 8.720489501953125, 26.75787353515625, 3.117053985595703, 0.6553192138671875, 6.542396545410156, -4.686614990234375, 4.849084854125977, 0.5814342498779297, 2.581216812133789, 1.0008316040039062, -3.39544677734375, -1.07916259765625, 7.634086608886719, -0.7354660034179688, -1.5787925720214844, -0.5812225341796875, 6.155096054077148, 25.111373901367188, 9.841285705566406, 15.049003601074219, 13.645957946777344, 1.4192485809326172, 3.6444015502929688, 1.5496692657470703, 12.446060180664062, 2.8752975463867188, 5.429527282714844, 22.061569213867188, 1.662200927734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000059.npy"}
{"epoch": 0.17352941176470588, "step": 60, "batch_size": 128, "mean": 6.046588897705078, "std": 8.706686973571777, "min": -18.11822509765625, "p10": -4.1757263183593745, "median": 5.019824981689453, "p90": 18.300782775878904, "max": 37.696807861328125, "pos_frac": 0.7734375, "sample": [6.758394241333008, 9.19439697265625, 4.837745666503906, 37.696807861328125, 16.64483642578125, 0.04217529296875, -6.542015075683594, 5.703769683837891, 0.7504501342773438, 4.755056381225586, 21.14324951171875, 8.949356079101562, 8.576141357421875, 15.928131103515625, 12.009414672851562, -5.579902648925781, 18.406173706054688, 4.934928894042969, 18.990493774414062, -2.5323867797851562, 14.525032043457031, 13.218841552734375, -1.2378883361816406, 1.9714984893798828, 4.315467834472656, 11.820426940917969, -7.3519134521484375, -1.037841796875, 14.440040588378906, 5.2681732177734375, 9.570953369140625, 0.8727207183837891, -1.409423828125, 4.153923034667969, 2.662872314453125, -2.2772598266601562, 20.10369873046875, 17.478599548339844, 2.1886253356933594, 14.729904174804688, 6.892223358154297, 1.7472801208496094, -8.557498931884766, 3.1836090087890625, 8.909576416015625, 9.565887451171875, -4.941123962402344, -4.137237548828125, 5.1047210693359375, 1.3574066162109375, -3.85906982421875, 9.212158203125, 7.7431640625, 2.9133167266845703, -4.56280517578125, -6.468658447265625, 7.907341003417969, 4.002246856689453, 5.8700714111328125, 0.44692230224609375, -7.7567901611328125, 0.7696533203125, 10.139923095703125, 6.757316589355469, 23.111572265625, 13.79144287109375, 13.861679077148438, 11.370803833007812, 3.850482940673828, 8.220834732055664, -4.265533447265625, 22.000152587890625, 8.07830810546875, 6.425952911376953, 9.53509521484375, 0.3893890380859375, -16.138565063476562, -6.99041748046875, 2.315032958984375, -1.7402725219726562, 12.993659973144531, 1.2705326080322266, 22.534912109375, 3.962993621826172, 22.936553955078125, 6.779638290405273, 8.962898254394531, -2.5630321502685547, 14.417190551757812, 6.497108459472656, -0.46009063720703125, 9.043529510498047, 1.1229705810546875, 8.248172760009766, 4.011970520019531, 9.779403686523438, -18.11822509765625, 4.7363433837890625, 19.304645538330078, 22.076400756835938, 14.622798919677734, 1.5849609375, 2.2271785736083984, 18.96759033203125, 0.6699104309082031, -6.293571472167969, 7.97955322265625, 3.7602996826171875, 10.920906066894531, -3.6017074584960938, -2.7533798217773438, 23.901657104492188, -0.3169403076171875, 4.4040374755859375, 4.393333435058594, 18.255615234375, 6.6305694580078125, -0.407196044921875, 8.049896240234375, 14.784469604492188, 1.1789970397949219, 10.365058898925781, 0.1972503662109375, 2.2750930786132812, 13.533279418945312, -1.9217681884765625, 14.530975341796875, -2.242340087890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000060.npy"}
{"epoch": 0.17647058823529413, "step": 61, "batch_size": 128, "mean": 8.436383247375488, "std": 8.750635147094727, "min": -11.939613342285156, "p10": -1.1175430297851563, "median": 7.2992401123046875, "p90": 20.41429443359375, "max": 35.45928955078125, "pos_frac": 0.8359375, "sample": [-1.1768131256103516, 1.6409912109375, 2.0025634765625, 14.913955688476562, -2.081724166870117, 2.9579238891601562, 15.89337158203125, 1.46209716796875, 3.909881591796875, 10.27761459350586, 10.2860107421875, 2.085451126098633, -1.1215057373046875, 6.120594024658203, 0.6476669311523438, 11.961349487304688, 5.101188659667969, 9.607650756835938, -1.1158447265625, 15.11962890625, -0.45009613037109375, 33.24067687988281, 2.080944061279297, 23.663108825683594, 4.64056396484375, 12.241134643554688, 4.592294692993164, -2.0818824768066406, -2.8838043212890625, 20.398895263671875, 19.651153564453125, 11.084003448486328, 9.46540641784668, 13.464141845703125, -0.7735977172851562, 12.134254455566406, 12.33572006225586, 23.951854705810547, 6.602073669433594, 0.14051055908203125, 5.5886077880859375, 8.059814453125, 4.2972564697265625, 4.559436798095703, 14.931915283203125, 2.756490707397461, 6.918731689453125, 1.7884140014648438, -0.27077484130859375, 9.90704345703125, 1.13519287109375, -0.381500244140625, 3.2098388671875, 8.113540649414062, 29.738115310668945, 5.8328094482421875, 0.583221435546875, 10.948272705078125, 2.5419158935546875, 6.502418518066406, 11.302818298339844, 3.406221389770508, -3.743000030517578, -3.020721435546875, 17.982925415039062, -0.6185874938964844, 4.332317352294922, 13.477790832519531, 1.787689208984375, 4.439262390136719, 35.45928955078125, 21.993446350097656, 4.832832336425781, 10.332687377929688, 18.55744171142578, 4.696041107177734, 1.5933589935302734, 13.018199920654297, 8.522392272949219, 6.658172607421875, 8.6201171875, -0.1487903594970703, 7.074211120605469, 21.389892578125, 10.149234771728516, 20.64124298095703, 22.501815795898438, 4.627403259277344, -8.116031646728516, -5.1029815673828125, -1.2964630126953125, 5.2196044921875, 10.952640533447266, 7.963706970214844, 30.906578063964844, -0.41358184814453125, 8.040079116821289, 9.327651977539062, 10.037715911865234, 15.679367065429688, 14.139762878417969, 15.473846435546875, 17.135154724121094, 7.524269104003906, 5.8045806884765625, 8.400543212890625, 10.056024551391602, 19.218429565429688, 20.450225830078125, 14.47711181640625, 13.148382186889648, -7.604286193847656, -6.175668716430664, 13.01165771484375, -11.939613342285156, 19.200302124023438, 3.724029541015625, 7.967445373535156, 10.595947265625, 4.716789245605469, 18.231475830078125, 30.849075317382812, 5.337158203125, 3.719482421875, 14.005523681640625, 22.76953125, 1.3670501708984375, 12.438690185546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000061.npy"}
{"epoch": 0.17941176470588235, "step": 62, "batch_size": 128, "mean": 6.710572719573975, "std": 9.283842086791992, "min": -14.601242065429688, "p10": -4.37735424041748, "median": 6.403141021728516, "p90": 17.05856990814209, "max": 36.83038330078125, "pos_frac": 0.7578125, "sample": [12.250778198242188, 8.971672058105469, 15.680625915527344, 3.996185302734375, 6.8207550048828125, 6.6037445068359375, 23.452041625976562, 10.89886474609375, 4.8816986083984375, -0.25656890869140625, -1.4471588134765625, 15.434494018554688, 3.52154541015625, 16.316925048828125, 10.085189819335938, -6.337005615234375, 11.238807678222656, -3.0289878845214844, 20.81927490234375, 2.3188209533691406, 9.855667114257812, 8.815046310424805, 25.49658203125, -2.6183700561523438, -7.3071441650390625, 17.492111206054688, -9.404083251953125, 10.011398315429688, 0.9623031616210938, 14.222930908203125, -1.1422576904296875, 5.341411590576172, 14.3182373046875, 6.296211242675781, -0.02780914306640625, 4.760551452636719, 2.2971878051757812, 3.3669509887695312, 15.801109313964844, 6.784828186035156, -4.329322814941406, 8.801605224609375, 21.37297821044922, 1.6782264709472656, 32.18177795410156, 15.8768310546875, 0.7324981689453125, 11.214454650878906, 5.398590087890625, 10.625202178955078, -4.48942756652832, -0.5053024291992188, 10.019783020019531, -8.08258056640625, -13.94851303100586, -2.251585006713867, 10.507591247558594, 11.191829681396484, -14.442840576171875, -8.000068664550781, 9.696208953857422, -0.5614204406738281, 0.440704345703125, 1.4375076293945312, 5.871002197265625, -3.6656036376953125, 17.254735946655273, 7.0516204833984375, -1.299062728881836, 11.280166625976562, -4.7191009521484375, 0.011404037475585938, 6.701637268066406, 5.015119552612305, 4.118080139160156, 13.631576538085938, -14.601242065429688, -7.505565643310547, 16.558128356933594, 3.4163131713867188, 2.9488677978515625, 5.2524566650390625, 6.8652191162109375, 31.350387573242188, 16.974498748779297, 9.940887451171875, 7.859519958496094, 14.208259582519531, 3.0989131927490234, 6.51007080078125, 8.36102294921875, 12.376022338867188, -1.27203369140625, 6.744415283203125, 7.4338836669921875, 36.83038330078125, 16.848968505859375, 13.622833251953125, 1.2078170776367188, -5.204166412353516, -0.1090850830078125, 4.455596923828125, 5.422935485839844, 24.727584838867188, 24.960731506347656, 27.83972930908203, 18.93677520751953, 8.571273803710938, 11.45233154296875, 8.821853637695312, -2.321971893310547, -0.8946437835693359, 14.9471435546875, 5.4536895751953125, 5.562828063964844, -6.4006195068359375, 3.2279434204101562, 10.480979919433594, 1.81585693359375, -0.6259918212890625, 4.2341156005859375, 16.83843994140625, -0.8349094390869141, 7.260463714599609, 1.6522674560546875, 9.632682800292969, 6.929718017578125, 3.728849411010742], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000062.npy"}
{"epoch": 0.18235294117647058, "step": 63, "batch_size": 128, "mean": 7.020439147949219, "std": 8.270289421081543, "min": -13.5499267578125, "p10": -1.272707366943359, "median": 5.478788375854492, "p90": 19.724241638183592, "max": 38.231170654296875, "pos_frac": 0.8671875, "sample": [0.7278289794921875, 11.248199462890625, 9.712615966796875, 3.5130462646484375, 1.3750801086425781, -0.9935531616210938, 16.270706176757812, 0.4501190185546875, 6.542366027832031, 4.35943603515625, 1.2561302185058594, -4.4754180908203125, 17.62200927734375, 4.52598762512207, 6.7567901611328125, 9.580093383789062, 1.4783401489257812, 1.3356571197509766, 14.961212158203125, 22.542144775390625, -3.327808380126953, 21.816452026367188, 6.765472412109375, 5.017478942871094, 23.81707000732422, 7.876951217651367, -1.444091796875, 6.098995208740234, 8.121631622314453, 1.4039020538330078, 7.1981353759765625, 2.6010055541992188, 8.557281494140625, -2.6863021850585938, 3.7744808197021484, -4.657794952392578, -2.601428985595703, 0.7218551635742188, 11.738929748535156, 10.574226379394531, 19.7723388671875, 38.231170654296875, -4.0963134765625, 2.3370532989501953, 8.542961120605469, 7.655097961425781, -3.856670379638672, -8.979820251464844, 2.6677627563476562, 5.7428741455078125, 14.549484252929688, 22.544414520263672, 24.260528564453125, 8.31817626953125, 11.105453491210938, 8.343547821044922, 9.327011108398438, 5.7194671630859375, 4.139947891235352, 7.135162353515625, 19.703628540039062, 17.214096069335938, 7.109901428222656, -1.1992568969726562, 17.1951904296875, 4.2610321044921875, 1.206634521484375, 14.612060546875, 9.037612915039062, 3.395336151123047, 0.8480110168457031, 1.3141002655029297, 6.0233612060546875, 7.717926025390625, 12.516494750976562, 8.487144470214844, 4.790971755981445, 5.505939483642578, 0.9195232391357422, 5.033195495605469, 1.8858108520507812, 6.231315612792969, 9.721603393554688, 1.1555099487304688, 1.6307449340820312, -4.245140075683594, 5.429351806640625, 5.451637268066406, 3.1084518432617188, 11.393569946289062, -13.5499267578125, 27.932640075683594, 11.297225952148438, 11.180740356445312, 4.6498870849609375, 11.200180053710938, 14.124202728271484, 20.791168212890625, -2.3872222900390625, 9.930477142333984, 16.94854736328125, 5.1455535888671875, 4.300422668457031, 2.3681793212890625, -0.78350830078125, 24.718856811523438, 2.710674285888672, 22.766708374023438, 6.468971252441406, -0.6717090606689453, 0.49920082092285156, 2.7528934478759766, 3.9388580322265625, 1.63592529296875, 29.72515869140625, 0.6655120849609375, 3.5648765563964844, 1.2350120544433594, 4.430309295654297, 0.7716484069824219, 3.9523696899414062, 6.49462890625, 6.649576187133789, -5.7841949462890625, 7.714576721191406, 16.077346801757812, 1.55340576171875, 22.52642059326172], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000063.npy"}
{"epoch": 0.18529411764705883, "step": 64, "batch_size": 128, "mean": 5.583160400390625, "std": 7.043980121612549, "min": -10.56146240234375, "p10": -2.042976188659668, "median": 4.3181304931640625, "p90": 14.533536529541013, "max": 27.592281341552734, "pos_frac": 0.7890625, "sample": [2.6568870544433594, 4.2752227783203125, 1.1494884490966797, 11.445732116699219, 0.0893096923828125, -10.56146240234375, 16.92644500732422, -2.069854736328125, 5.382083892822266, 1.5793018341064453, -0.6195068359375, 8.874309539794922, -3.26849365234375, 27.592281341552734, 4.180351257324219, 12.788955688476562, 10.753395080566406, 2.255228042602539, -1.1058807373046875, 2.6959762573242188, -0.5513477325439453, 0.07037353515625, 0.3519248962402344, -2.036548614501953, 21.88109588623047, 5.016410827636719, -0.0623626708984375, 9.71261215209961, 1.8025741577148438, 23.82061767578125, 0.20218276977539062, 5.8341217041015625, 17.631332397460938, 5.110107421875, -0.9135761260986328, 1.8683547973632812, 13.60491943359375, -1.934427261352539, 1.3924198150634766, 5.781436920166016, 4.741657257080078, 6.312591552734375, 10.42230224609375, 17.44342041015625, -5.7786865234375, 2.0692214965820312, 11.594776153564453, 13.062393188476562, 1.707977294921875, 14.932151794433594, 0.9070892333984375, 6.704673767089844, 7.057525634765625, 13.139684677124023, 5.847217559814453, 12.972320556640625, 9.020042419433594, 12.835678100585938, 5.951133728027344, 3.566099166870117, 2.4005775451660156, 11.818771362304688, -3.084003448486328, 4.029808044433594, 4.13560676574707, 0.3417835235595703, 11.342979431152344, -1.14385986328125, 9.837913513183594, -7.486167907714844, 15.205940246582031, 15.19189453125, 6.622222900390625, 2.624713897705078, 1.3581676483154297, 8.134956359863281, 1.4943695068359375, 0.9932022094726562, 24.901336669921875, -2.2816162109375, 9.170953750610352, -0.8742523193359375, -1.3777236938476562, 1.6753120422363281, -2.247802734375, -3.4909515380859375, 5.040538787841797, 4.0216522216796875, 6.140739440917969, 2.7645301818847656, 3.6573524475097656, 7.258571624755859, 14.362701416015625, -1.3481769561767578, 4.775606155395508, 13.51348876953125, 1.658782958984375, 6.8289947509765625, 17.70867919921875, 0.9177436828613281, 12.339897155761719, 4.897609710693359, 0.21370697021484375, 9.920124053955078, 20.752777099609375, 1.6463165283203125, 14.284271240234375, 7.181266784667969, 9.379638671875, 4.3610382080078125, -5.1453399658203125, -4.845611572265625, 10.670326232910156, -1.3665180206298828, 16.71685028076172, -4.455114364624023, 1.4366340637207031, 13.180419921875, -2.057973861694336, 7.7806396484375, 1.1134109497070312, 9.668968200683594, 6.860084533691406, -1.385782241821289, -0.536163330078125, 13.198663711547852, 13.931755065917969, 0.19608688354492188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000064.npy"}
{"epoch": 0.18823529411764706, "step": 65, "batch_size": 128, "mean": 6.967798233032227, "std": 7.961244106292725, "min": -16.464813232421875, "p10": -1.7271995544433592, "median": 6.625146865844727, "p90": 17.79688148498535, "max": 28.09129524230957, "pos_frac": 0.78125, "sample": [4.856378555297852, 6.523162841796875, -1.65435791015625, 27.91992950439453, 1.0372238159179688, 11.715805053710938, -3.9956932067871094, -0.6994781494140625, 7.66033935546875, -0.6376571655273438, -4.40869140625, 10.242959976196289, 14.607177734375, 11.434768676757812, 14.668502807617188, -2.9932403564453125, 5.20086669921875, 7.317314147949219, -2.4373092651367188, 1.439260482788086, -0.77227783203125, 8.533363342285156, 5.729682922363281, -0.030582427978515625, -0.3608360290527344, -2.478384017944336, 13.58935546875, 8.493392944335938, 4.282632827758789, 8.560882568359375, 11.340633392333984, -0.6103973388671875, 9.218460083007812, 1.40264892578125, 9.746688842773438, 20.449874877929688, 3.9933624267578125, 12.548622131347656, 5.531745910644531, 4.385602951049805, 3.766204833984375, 0.381988525390625, -5.6494293212890625, 5.730550765991211, 19.1158447265625, 13.022933959960938, 5.479572296142578, 13.056365966796875, 4.63812255859375, 4.491363525390625, 6.831809997558594, 11.464569091796875, 23.901317596435547, -16.464813232421875, 8.801328659057617, 1.8204345703125, 6.727130889892578, 3.6598663330078125, -1.0392303466796875, 8.402214050292969, 19.53638458251953, -2.2986984252929688, 10.880661010742188, -1.1396331787109375, 7.898349761962891, 0.3360557556152344, -1.0705718994140625, 19.76708984375, -1.83099365234375, 0.37646484375, 13.454330444335938, 15.255304336547852, 26.780426025390625, 0.2690277099609375, 5.867832183837891, 13.540328979492188, 8.928993225097656, 6.1612091064453125, 0.8798751831054688, 18.91252899169922, -0.040866851806640625, 8.193313598632812, 10.204231262207031, -8.291168212890625, -0.494903564453125, 5.761896133422852, 18.070049285888672, -2.6166610717773438, -0.3827781677246094, 3.5452232360839844, 0.3881378173828125, 4.879547119140625, 8.206474304199219, 9.196197509765625, 8.128612518310547, -9.22637939453125, 1.3748931884765625, 0.36711883544921875, -0.16812896728515625, 17.45935821533203, 8.483219146728516, 16.816558837890625, 9.91046142578125, 6.454700469970703, 28.09129524230957, -1.6827163696289062, 7.0374603271484375, 11.102537155151367, 10.358150482177734, 17.6798095703125, 7.836997985839844, 16.454803466796875, 3.9029006958007812, 7.692802429199219, 19.739322662353516, 19.279273986816406, 8.7828369140625, 0.2917671203613281, 16.604537963867188, -3.208282470703125, 11.589141845703125, 2.1389198303222656, 12.462677001953125, 13.703872680664062, 15.493545532226562, 11.298271179199219, 0.964019775390625, 26.05030059814453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000065.npy"}
{"epoch": 0.19117647058823528, "step": 66, "batch_size": 128, "mean": 7.3798508644104, "std": 7.988501071929932, "min": -13.779327392578125, "p10": -1.031245613098144, "median": 5.989717483520508, "p90": 16.92130126953125, "max": 32.4072265625, "pos_frac": 0.8671875, "sample": [6.963846206665039, 3.009408950805664, 9.254892349243164, 4.005096435546875, 22.092178344726562, -8.812210083007812, 2.606201171875, 6.901397705078125, 2.034282684326172, -2.105133056640625, 20.121246337890625, 3.226337432861328, 5.962917327880859, 18.809776306152344, 6.292255401611328, 1.6966266632080078, 11.246772766113281, 2.334850311279297, 29.924484252929688, 0.6928386688232422, 11.068199157714844, 5.446380615234375, 32.35926818847656, -1.8058395385742188, 4.356945037841797, 4.967021942138672, 2.4177474975585938, 7.5610504150390625, 2.5977783203125, 16.11504364013672, 9.556053161621094, 14.638389587402344, 3.0989761352539062, 16.87786865234375, 20.732711791992188, -3.9597549438476562, 7.9608001708984375, 1.837636947631836, 6.9671783447265625, 1.9057083129882812, 10.697052001953125, -0.0336456298828125, 12.788063049316406, 4.209638595581055, 16.416061401367188, 4.630352020263672, 0.004302978515625, -5.5239715576171875, 6.0782318115234375, 1.181142807006836, 24.338516235351562, 3.047060012817383, -2.12213134765625, 11.550048828125, 6.927192687988281, 10.652206420898438, 16.379989624023438, 8.644458770751953, 0.6986904144287109, 13.275344848632812, 9.208877563476562, 14.216178894042969, 25.20063018798828, 13.262611389160156, 4.846895217895508, -0.3496551513671875, 12.105621337890625, -3.7604293823242188, 6.5614776611328125, 14.759078979492188, 6.368865966796875, 32.4072265625, 17.02264404296875, -0.8786392211914062, 10.504234313964844, 2.686248779296875, 2.3538389205932617, 3.2379989624023438, 12.885581970214844, 14.055137634277344, 1.5821571350097656, 13.046894073486328, 8.985397338867188, 16.70342254638672, -4.502773284912109, 0.875946044921875, 7.067207336425781, 8.766864776611328, 3.461118698120117, 8.542243957519531, -1.3873271942138672, 15.691757202148438, 13.332801818847656, 2.3948707580566406, 2.33843994140625, 3.082977294921875, 19.114425659179688, 2.469137191772461, 16.804244995117188, 7.905002593994141, 3.088724136352539, 6.563453674316406, 13.00543212890625, 6.016517639160156, 4.989490509033203, -0.7495498657226562, 3.0535831451416016, 14.961715698242188, 4.431114196777344, 5.885581970214844, 5.9206085205078125, -13.779327392578125, -6.347751617431641, 2.7452239990234375, 6.662723541259766, 18.954627990722656, -4.231544494628906, 5.4095458984375, 2.6408309936523438, -2.700164794921875, 7.248737335205078, 8.879913330078125, 4.84881591796875, 4.979824066162109, 12.983001708984375, 2.755950927734375, 3.4610252380371094, 25.177764892578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000066.npy"}
{"epoch": 0.19411764705882353, "step": 67, "batch_size": 128, "mean": 6.333862781524658, "std": 7.390278339385986, "min": -13.642288208007812, "p10": -0.6654438018798825, "median": 5.0955657958984375, "p90": 15.225182342529294, "max": 35.439247131347656, "pos_frac": 0.84375, "sample": [7.9696502685546875, 11.956085205078125, 4.42169189453125, 10.480270385742188, 4.814371109008789, -10.112190246582031, 2.2547454833984375, -0.27989959716796875, 9.891471862792969, 0.4214324951171875, 35.439247131347656, 0.20848655700683594, 8.403572082519531, 9.439254760742188, 3.576507568359375, 10.9959716796875, 6.830373764038086, 13.250679016113281, -0.511993408203125, 6.371147155761719, 8.760910034179688, -0.8548126220703125, 18.64202880859375, 4.310813903808594, -1.690338134765625, 5.688449859619141, 3.3288917541503906, 6.879978179931641, -5.3409423828125, 18.83612060546875, 5.609535217285156, 5.827667236328125, -6.914085388183594, 23.44464111328125, 3.6944961547851562, 9.43283462524414, 0.8828849792480469, 26.990650177001953, -13.642288208007812, 1.3185958862304688, 2.6952743530273438, 14.963211059570312, 3.4847984313964844, 8.325889587402344, 9.46624755859375, 4.793182373046875, -1.0085372924804688, -0.5842857360839844, 12.193962097167969, 4.470691680908203, 2.528423309326172, 5.704132080078125, -10.372932434082031, -2.328369140625, 6.437652587890625, 5.5416717529296875, 15.836448669433594, 5.113624572753906, 9.100677490234375, 1.952341079711914, 1.9915084838867188, 0.21458816528320312, 1.3913497924804688, -2.3442764282226562, 6.084869384765625, 8.281471252441406, -0.33514404296875, 11.907951354980469, 14.21380615234375, 2.806957244873047, 2.13018798828125, -3.69317626953125, 9.869342803955078, 13.752304077148438, 7.068168640136719, 3.172119140625, 0.5365009307861328, 0.09569168090820312, 5.077507019042969, 3.38079833984375, 3.2568588256835938, 3.4172439575195312, 9.821296691894531, 20.482666015625, 14.324398040771484, 4.76019287109375, 4.00921630859375, 4.032358169555664, 22.924102783203125, 2.753429412841797, -0.09235382080078125, 1.9379119873046875, 2.3585281372070312, 10.522621154785156, 10.354133605957031, 9.875743865966797, -4.3907928466796875, 12.895950317382812, 9.298171997070312, 4.8992919921875, 11.926145553588867, 2.6212692260742188, 3.960857391357422, 4.7213592529296875, 19.692657470703125, 7.932136535644531, 16.095947265625, 14.81024169921875, 12.699783325195312, 3.0254135131835938, 3.5844879150390625, 6.1786041259765625, 19.485321044921875, 3.420166015625, 6.312419891357422, -0.2590217590332031, 23.697021484375, 6.603290557861328, 5.1465301513671875, -0.12477874755859375, 9.678939819335938, 3.791278839111328, 8.265830993652344, 6.0099334716796875, -2.3263778686523438, 8.478290557861328, 1.8112258911132812, 21.110916137695312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000067.npy"}
{"epoch": 0.19705882352941176, "step": 68, "batch_size": 128, "mean": 6.520656108856201, "std": 9.101415634155273, "min": -12.291549682617188, "p10": -1.5605695724487303, "median": 4.397348403930664, "p90": 18.00747604370117, "max": 53.8946533203125, "pos_frac": 0.8359375, "sample": [1.1478958129882812, 18.373931884765625, 0.3742179870605469, 18.5101318359375, 12.478057861328125, 9.170455932617188, 30.61766815185547, 5.365234375, 6.105628967285156, 17.890670776367188, 2.650920867919922, -5.354225158691406, 0.9568462371826172, 8.145271301269531, 22.433731079101562, 3.576751708984375, 18.28002166748047, 1.7367134094238281, 4.398921966552734, 7.153221130371094, 5.299016952514648, 7.566623687744141, 4.5457763671875, -1.069580078125, 4.937263488769531, 6.313413619995117, 0.9706954956054688, -1.7569503784179688, 11.008903503417969, 3.2158870697021484, 2.5279388427734375, 18.343215942382812, 24.293319702148438, 22.85345458984375, 11.521177291870117, 1.1134719848632812, 14.8880615234375, 12.452537536621094, 13.356353759765625, -0.3727607727050781, 2.8413753509521484, 0.1774749755859375, 5.386543273925781, 20.06048583984375, -4.713920593261719, 14.092636108398438, 0.119415283203125, 14.923370361328125, 0.9354400634765625, 4.905696868896484, 9.654752731323242, 3.5741653442382812, 2.0279922485351562, 9.364398956298828, 0.9874534606933594, 1.5935745239257812, 3.743511199951172, 23.275787353515625, 0.5280990600585938, -6.2487335205078125, 0.556640625, 2.5492401123046875, 0.8982391357421875, 4.411346435546875, 9.752277374267578, 11.81591796875, 33.368438720703125, 14.04730224609375, 1.1612586975097656, -4.750419616699219, 10.685012817382812, 13.65608024597168, 9.640205383300781, 5.08473014831543, 14.073402404785156, 12.405975341796875, 1.2289276123046875, 9.491722106933594, 53.8946533203125, 1.1826667785644531, 8.149072647094727, 1.7140350341796875, 7.879600524902344, 6.6474456787109375, 0.0288238525390625, 2.0417022705078125, -3.0732040405273438, 3.346221923828125, 10.543197631835938, 4.7169342041015625, -1.2847824096679688, 7.709156036376953, 15.240867614746094, 5.3795013427734375, -1.668853759765625, -7.809806823730469, 3.048919677734375, 4.385671615600586, 8.880435943603516, 12.615074157714844, 3.5601425170898438, 1.6570625305175781, 9.534011840820312, 4.63592529296875, 9.502029418945312, -12.291549682617188, 16.622787475585938, 5.998117446899414, 37.60626220703125, 1.9171066284179688, 4.322624206542969, 4.395774841308594, 3.1153602600097656, -0.7277431488037109, -4.098503112792969, -3.1286354064941406, -1.265024185180664, -0.29032135009765625, -4.1033172607421875, 3.545705795288086, -3.7989044189453125, 1.8811683654785156, -0.30747222900390625, -1.5141620635986328, 5.115013122558594, 1.7180328369140625, 3.1508941650390625, 0.9985809326171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000068.npy"}
{"epoch": 0.2, "step": 69, "batch_size": 128, "mean": 8.265156745910645, "std": 9.185525894165039, "min": -18.622230529785156, "p10": -1.6631795883178704, "median": 7.013944625854492, "p90": 21.456153106689452, "max": 35.480804443359375, "pos_frac": 0.84375, "sample": [-2.79071044921875, 5.662265777587891, 8.672447204589844, 12.450538635253906, 25.377655029296875, 10.118789672851562, 5.554073333740234, 8.219039916992188, 11.005584716796875, -0.5793685913085938, 7.018699645996094, 13.701927185058594, 18.3189697265625, 12.040504455566406, 1.3023662567138672, 28.812408447265625, 12.151878356933594, 4.0286865234375, 32.483055114746094, 1.5149612426757812, 10.760448455810547, 10.881616592407227, 0.2455902099609375, 7.442394256591797, 6.615322113037109, 26.53857421875, 9.577762603759766, 10.610000610351562, 3.42767333984375, 5.989307403564453, 0.049510955810546875, 1.7177009582519531, 2.563568115234375, 11.39999008178711, -0.15967369079589844, 7.720268249511719, 35.480804443359375, 2.8717212677001953, 7.583194732666016, -3.5093040466308594, 13.890960693359375, 10.813995361328125, 21.47838592529297, 22.120346069335938, -2.836221694946289, 17.928775787353516, 1.9098281860351562, 5.043212890625, 6.868978500366211, 9.58935546875, 14.195293426513672, -18.622230529785156, -2.8890762329101562, -2.6273956298828125, 20.2841796875, 1.3462715148925781, 2.508350372314453, -0.3530235290527344, 1.3041648864746094, 8.221145629882812, 19.658920288085938, -1.394561767578125, 14.905616760253906, 21.216262817382812, 9.597373962402344, 0.29256439208984375, 7.009189605712891, 17.39630126953125, 9.745498657226562, 6.4499969482421875, -3.1848297119140625, -0.47023773193359375, 8.085235595703125, 1.6920852661132812, -2.048604965209961, 2.9320831298828125, 22.02093505859375, 24.959197998046875, 4.86639404296875, -1.4979972839355469, 0.721771240234375, 1.06365966796875, 30.19676971435547, 12.300430297851562, 16.300704956054688, 7.222255706787109, -5.5771942138671875, 5.259429931640625, 0.8292465209960938, 6.627756118774414, 14.77490234375, 3.239389419555664, 11.437416076660156, -7.3828125, -3.503765106201172, 14.347259521484375, 4.417724609375, 16.921165466308594, 1.747507095336914, 18.428009033203125, 12.2249755859375, 0.2569103240966797, 5.540517807006836, -5.722930908203125, 19.010147094726562, 4.063652038574219, 4.4621734619140625, 2.135761260986328, 10.836051940917969, 27.493408203125, 3.9097137451171875, 2.4490737915039062, 12.981922149658203, 8.612342834472656, 5.4378204345703125, -0.1826190948486328, 4.387794494628906, -4.777435302734375, 21.446624755859375, 22.80268096923828, 1.1083049774169922, 13.506904602050781, 5.221672058105469, 30.368865966796875, 7.463226318359375, 11.698366165161133, 8.489974975585938, 8.065528869628906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000069.npy"}
{"epoch": 0.20294117647058824, "step": 70, "batch_size": 128, "mean": 7.270971775054932, "std": 8.209802627563477, "min": -8.740737915039062, "p10": -1.7680311203002925, "median": 5.636969566345215, "p90": 17.705246353149413, "max": 34.16038513183594, "pos_frac": 0.828125, "sample": [1.0253524780273438, 10.817466735839844, 12.600288391113281, 11.278030395507812, -1.14422607421875, 10.528984069824219, 14.248714447021484, 0.4392814636230469, 16.582504272460938, 13.022308349609375, 1.0235176086425781, 2.6275711059570312, 20.465423583984375, 6.275154113769531, 0.6994400024414062, 13.043636322021484, 4.5858612060546875, 3.8153724670410156, 1.4833908081054688, 6.252388000488281, 5.383155822753906, 2.5971221923828125, 0.7980289459228516, 8.932205200195312, 3.316631317138672, -3.9577102661132812, 12.745552062988281, -3.976573944091797, 7.459144592285156, 0.5564727783203125, 28.15948486328125, 4.523738861083984, 7.326118469238281, 10.257087707519531, -8.740737915039062, 5.035491943359375, 4.2590789794921875, 34.16038513183594, 14.447647094726562, 17.828285217285156, 12.135917663574219, 2.298929214477539, 13.135364532470703, 13.535888671875, 5.448335647583008, 15.199180603027344, 17.652515411376953, -2.1333065032958984, 3.053752899169922, 4.5149993896484375, 5.54713249206543, 15.292098999023438, 7.686437606811523, 22.723068237304688, 19.299530029296875, 1.060699462890625, 19.13531494140625, 10.357330322265625, 12.805320739746094, 0.214324951171875, 16.55260467529297, 3.4943466186523438, 1.5603523254394531, 5.043327331542969, 0.3677101135253906, 15.577152252197266, 31.900497436523438, -0.5940933227539062, -0.212310791015625, 2.3130340576171875, 10.064323425292969, 25.241165161132812, 11.771102905273438, -0.10178756713867188, 5.726806640625, 16.957984924316406, -1.039825439453125, 22.971046447753906, 6.960735321044922, 22.090476989746094, -5.525728225708008, 12.083282470703125, 11.330169677734375, 1.3857574462890625, 8.135002136230469, 3.2428131103515625, 6.169647216796875, 2.602325439453125, 15.004112243652344, -0.616424560546875, -3.2334136962890625, 8.662025451660156, 0.8904533386230469, 15.396663665771484, 0.9188766479492188, -3.931243896484375, 8.89413070678711, 14.252639770507812, 18.36376190185547, -1.4157180786132812, 7.83172607421875, 1.9154338836669922, -2.4268627166748047, 6.191179275512695, 1.5277233123779297, -2.840301513671875, 15.067977905273438, -5.627410888671875, 1.5805511474609375, 4.96856689453125, 2.397541046142578, 4.8593902587890625, 7.3735198974609375, -1.6114845275878906, 11.475830078125, -6.4049072265625, -3.500396728515625, 16.95773696899414, 13.035316467285156, 11.244903564453125, -3.1314620971679688, -1.349700927734375, 0.30326080322265625, 20.562545776367188, 6.0282745361328125, 0.7184410095214844, 0.47521209716796875, 14.064079284667969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000070.npy"}
{"epoch": 0.20588235294117646, "step": 71, "batch_size": 128, "mean": 7.575624942779541, "std": 8.667098999023438, "min": -9.89971923828125, "p10": -2.120412445068359, "median": 6.912086486816406, "p90": 19.173412322998047, "max": 30.616809844970703, "pos_frac": 0.8125, "sample": [19.626678466796875, 7.885967254638672, -0.11032867431640625, 14.763320922851562, 19.243667602539062, 1.8093376159667969, 16.525054931640625, 3.5950164794921875, 0.8838214874267578, -5.94671630859375, 27.692062377929688, -9.22186279296875, 9.669784545898438, 12.116165161132812, 14.882844924926758, 16.325477600097656, 2.4098358154296875, 8.683563232421875, 16.21533203125, 4.170040130615234, 6.448524475097656, 5.800241470336914, -3.1017093658447266, 3.1656112670898438, 2.8849143981933594, -2.074615478515625, 2.628063201904297, 7.643486022949219, 8.204208374023438, 0.5008010864257812, 9.747039794921875, 11.112030029296875, 6.023338317871094, 3.9090499877929688, 0.8391799926757812, 11.8006591796875, 0.994903564453125, 10.608173370361328, -2.542205810546875, 3.9792633056640625, 18.824974060058594, 4.121051788330078, 13.802299499511719, 11.988662719726562, -9.89971923828125, 8.457792282104492, 9.717164993286133, 5.056781768798828, -1.8064193725585938, 5.374969482421875, 1.060821533203125, 25.121902465820312, 1.8293476104736328, 6.890045166015625, 20.283493041992188, 23.8558349609375, 17.780303955078125, 13.305976867675781, 4.162439346313477, 12.320396423339844, 13.111953735351562, 9.499786376953125, -2.2272720336914062, 13.402938842773438, 0.7258892059326172, 28.444366455078125, 0.8644809722900391, 8.29037857055664, -0.3937530517578125, 6.9341278076171875, -9.677055358886719, 7.374477386474609, -1.8875484466552734, -9.506057739257812, 0.86798095703125, 7.334358215332031, 9.534183502197266, 12.611640930175781, 3.0865859985351562, 8.351547241210938, 20.73046875, 17.630075454711914, -0.5975685119628906, 16.077194213867188, 26.891006469726562, 3.3157882690429688, -0.9459571838378906, 3.2216835021972656, 9.385574340820312, -8.516525268554688, -0.2664299011230469, -2.0520706176757812, -4.146812438964844, -4.402587890625, 14.183860778808594, 16.818885803222656, -3.683441162109375, 1.9807167053222656, 19.14330291748047, 4.94683837890625, 30.616809844970703, 5.956829071044922, 9.01773452758789, 22.1873779296875, 28.828567504882812, 8.40449333190918, 6.275367736816406, 5.513940811157227, 7.8744049072265625, -2.5114212036132812, 14.773345947265625, 17.411209106445312, 6.56903076171875, 9.796089172363281, -0.5217170715332031, 7.593658447265625, 9.212654113769531, 24.845054626464844, 1.0913829803466797, -1.1233291625976562, 0.2989349365234375, 11.329345703125, 4.765022277832031, 3.080080032348633, 10.631187438964844, 18.190704345703125, 3.9541893005371094, 7.123870849609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000071.npy"}
{"epoch": 0.2088235294117647, "step": 72, "batch_size": 128, "mean": 6.930836200714111, "std": 10.051806449890137, "min": -22.67877197265625, "p10": -3.1297313690185544, "median": 5.876714706420898, "p90": 17.87252197265625, "max": 47.84001159667969, "pos_frac": 0.8125, "sample": [4.758342742919922, -1.9004669189453125, 2.749542236328125, -3.674896240234375, 20.29302978515625, 8.301010131835938, -1.6588973999023438, 17.85308074951172, -7.577190399169922, -0.09995079040527344, 8.913780212402344, -1.228271484375, 2.519491195678711, 14.395156860351562, -1.9731616973876953, 5.66534423828125, 2.374420166015625, 3.671846389770508, 12.772697448730469, 6.150688171386719, 5.1572418212890625, 5.2880401611328125, 15.722221374511719, 23.080467224121094, 2.3447227478027344, 9.129974365234375, 0.794464111328125, 18.4083251953125, 1.1052474975585938, 17.917884826660156, 8.353309631347656, 6.063179016113281, 11.450302124023438, 3.643533706665039, 20.7159423828125, -22.67877197265625, -1.9943313598632812, 9.438858032226562, 7.735679626464844, -8.129203796386719, -0.7773094177246094, -3.1725120544433594, 7.927730560302734, 16.548236846923828, -6.334827423095703, 10.898029327392578, 1.1683616638183594, 1.6719608306884766, 14.770675659179688, -1.4006004333496094, 1.3176040649414062, 9.95303726196289, 0.1030120849609375, 14.697093963623047, 19.00792694091797, 7.086250305175781, 17.92760467529297, 5.6125030517578125, 10.723777770996094, -21.851287841796875, 0.1894855499267578, 7.734256744384766, 1.2467880249023438, 2.1473770141601562, 4.7092437744140625, 6.056922912597656, 2.5564002990722656, 14.20770263671875, -10.958999633789062, 5.26019287109375, 10.005451202392578, 21.113662719726562, 2.5008773803710938, 13.408744812011719, 15.950218200683594, 34.626739501953125, 35.123199462890625, 29.373687744140625, 3.146747589111328, -18.670501708984375, 12.053916931152344, 16.225204467773438, 1.729766845703125, 13.326278686523438, 2.187397003173828, 5.6927032470703125, -4.3332366943359375, 17.619449615478516, -3.1113967895507812, 13.345542907714844, 8.570228576660156, 12.76837158203125, 0.958251953125, -3.50830078125, -0.5724105834960938, 1.0394477844238281, 5.31427001953125, 12.014766693115234, 47.84001159667969, 2.4398727416992188, 17.050857543945312, 16.022216796875, 8.041473388671875, 11.505516052246094, 9.873161315917969, -10.392333984375, 7.669675827026367, 2.3020248413085938, 9.57461166381836, 2.300323486328125, -3.000762939453125, 10.211463928222656, 4.505374908447266, 12.625778198242188, 16.6085205078125, 14.683490753173828, 0.7252349853515625, 13.765945434570312, -5.597278594970703, 4.144905090332031, 27.29229736328125, 5.806835174560547, 12.641841888427734, 5.94659423828125, 7.567344665527344, 4.231292724609375, 7.419399261474609, 0.5629158020019531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000072.npy"}
{"epoch": 0.21176470588235294, "step": 73, "batch_size": 128, "mean": 6.295859336853027, "std": 10.227781295776367, "min": -25.375045776367188, "p10": -4.162129592895508, "median": 4.875911712646484, "p90": 18.68146514892578, "max": 47.67613983154297, "pos_frac": 0.7578125, "sample": [5.7787322998046875, 4.821144104003906, 12.029800415039062, 25.221580505371094, 2.2026596069335938, 15.039077758789062, 20.3076171875, 2.6673660278320312, 17.675086975097656, 5.628379821777344, 1.7555084228515625, -4.150615692138672, -25.375045776367188, -3.695037841796875, -0.14273452758789062, 33.34522247314453, 11.906463623046875, -5.635734558105469, 0.46945953369140625, 18.5068359375, 0.0476531982421875, 2.7293624877929688, 11.066360473632812, -1.129852294921875, 11.484237670898438, 2.6516265869140625, 4.335384368896484, 8.981220245361328, 10.188766479492188, 16.239913940429688, 4.19195556640625, 6.263645172119141, -2.1838912963867188, 2.5165252685546875, -2.9476470947265625, 0.2517242431640625, 12.809944152832031, 6.6763916015625, 3.5062637329101562, 15.366050720214844, 6.424263000488281, -6.22796630859375, 9.345405578613281, 1.445159912109375, 2.331817626953125, 7.0576171875, 7.340248107910156, -6.727142333984375, 0.22844696044921875, 1.4762954711914062, 18.760238647460938, 0.9731788635253906, 16.416221618652344, 27.81158447265625, 22.960647583007812, -4.150142669677734, 8.472442626953125, 0.47962188720703125, -1.3003768920898438, 16.06252670288086, 35.81608581542969, 5.792942047119141, -0.6153335571289062, 9.450927734375, 6.326887130737305, -4.76092529296875, 47.67613983154297, -2.321504592895508, 1.7709541320800781, -4.6668243408203125, -0.063690185546875, 1.1507186889648438, 4.815315246582031, 6.705421447753906, 1.790557861328125, -3.4809532165527344, -2.0302562713623047, 8.140274047851562, 11.697376251220703, 17.66307830810547, 3.8097286224365234, 10.6337890625, 29.510498046875, -8.198654174804688, 1.9635009765625, 3.9916553497314453, 1.9142074584960938, 18.894493103027344, 7.993927001953125, -8.651145935058594, 13.50698471069336, 22.843955993652344, 3.7545318603515625, 7.67852783203125, -2.6171646118164062, -3.377105712890625, 6.379261016845703, 5.170829772949219, 27.981216430664062, 10.58001708984375, 3.0793228149414062, 9.79812240600586, 7.194328308105469, 9.911117553710938, -12.046234130859375, 6.915409088134766, 4.9306793212890625, -3.5374755859375, 4.033599853515625, 21.735702514648438, 1.486419677734375, 1.420236587524414, -6.853858947753906, 6.6831817626953125, 18.647705078125, 9.850067138671875, 11.452560424804688, 1.42578125, 14.56494140625, -4.188995361328125, 5.741334915161133, -5.8663177490234375, 10.80615234375, -12.641937255859375, 12.454818725585938, -4.005546569824219, 18.550453186035156, -0.8733367919921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000073.npy"}
{"epoch": 0.21470588235294116, "step": 74, "batch_size": 128, "mean": 7.427906036376953, "std": 9.706584930419922, "min": -13.47735595703125, "p10": -2.550238037109375, "median": 5.441679000854492, "p90": 20.510619354248046, "max": 42.20147705078125, "pos_frac": 0.796875, "sample": [23.729461669921875, 0.4921302795410156, 9.976799011230469, 23.223983764648438, 13.9425048828125, 7.682731628417969, 11.252017974853516, 17.730289459228516, 21.049819946289062, 10.285171508789062, 10.667381286621094, 9.633064270019531, 6.489139556884766, 2.4676666259765625, 7.343711853027344, 2.5074195861816406, -1.1241207122802734, 31.514060974121094, 12.676620483398438, -12.813446044921875, 1.5346202850341797, 23.5152587890625, 6.182130813598633, 3.1241531372070312, 20.218971252441406, -5.763988494873047, -0.2022857666015625, 2.5213794708251953, 5.192865371704102, 14.756912231445312, -0.12499618530273438, -0.04273223876953125, 3.3222122192382812, -1.7674026489257812, 1.998708724975586, 2.4808120727539062, 11.348657608032227, 9.489219665527344, 4.616203308105469, 17.07486343383789, 27.757598876953125, 4.8079071044921875, 39.720794677734375, 17.57583236694336, 7.635028839111328, 6.138816833496094, 4.473117828369141, -2.6286697387695312, 14.320655822753906, 0.1467132568359375, -2.8579025268554688, 7.861976623535156, 4.002616882324219, 18.734643936157227, 0.9685211181640625, 9.449230194091797, 15.634506225585938, -2.5166244506835938, 4.825447082519531, -13.47735595703125, 19.011642456054688, -4.5453033447265625, 5.4935302734375, 0.07077407836914062, -1.8647842407226562, 9.766517639160156, 2.91668701171875, 10.78045654296875, 10.164894104003906, -2.7918624877929688, 4.2753753662109375, 9.065261840820312, 5.389827728271484, 1.5015716552734375, -1.0040359497070312, -2.9309539794921875, 23.18768310546875, 22.087806701660156, 0.2923011779785156, 14.05865478515625, 1.7041606903076172, 9.189216613769531, -13.111007690429688, 4.0336761474609375, -2.9261703491210938, 5.0432281494140625, 7.474102020263672, -2.1316890716552734, 3.175140380859375, 0.2245941162109375, -1.3441390991210938, 12.592369079589844, -0.06294441223144531, 3.7153072357177734, 8.219745635986328, 2.6764163970947266, -9.979789733886719, 32.783905029296875, 1.0582275390625, 20.30585479736328, 7.262733459472656, 9.776260375976562, 13.679466247558594, 18.595840454101562, 17.202266693115234, 5.241615295410156, 9.744583129882812, 0.8842353820800781, 16.96192169189453, -0.9947452545166016, 20.9884033203125, 7.364501953125, 10.3636474609375, 9.014175415039062, -9.365623474121094, 5.16131591796875, 6.54722785949707, 3.751708984375, 5.7827911376953125, -2.80377197265625, 10.479896545410156, 42.20147705078125, 23.4654541015625, -1.0137825012207031, 4.010530471801758, 4.314117431640625, 9.420539855957031, 4.394121170043945], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000074.npy"}
{"epoch": 0.21764705882352942, "step": 75, "batch_size": 128, "mean": 7.740569114685059, "std": 9.701170921325684, "min": -14.973098754882812, "p10": -2.1666994094848633, "median": 6.1941328048706055, "p90": 20.547462463378906, "max": 46.00091552734375, "pos_frac": 0.828125, "sample": [8.287696838378906, 13.577522277832031, 7.3957061767578125, 1.6391792297363281, 5.3335418701171875, 10.526741027832031, 1.5410232543945312, 0.6627693176269531, 15.10418701171875, 14.7076416015625, 5.2856292724609375, 5.679267883300781, 19.048736572265625, 19.161705017089844, 16.513774871826172, 0.7258510589599609, 20.69432830810547, 4.303258895874023, 9.607025146484375, 14.850730895996094, 40.913238525390625, 9.560661315917969, 1.4074554443359375, 25.053016662597656, -1.07855224609375, 0.130889892578125, 14.45733642578125, 2.1921615600585938, 0.23123931884765625, 21.581802368164062, -3.9904003143310547, 8.170455932617188, 5.649646759033203, 0.28448486328125, 4.173332214355469, 5.182502746582031, 9.396305084228516, 11.29262924194336, 2.8197174072265625, -3.6201171875, 6.140958786010742, 23.36414337158203, 5.438667297363281, 20.08858299255371, 9.918617248535156, 28.473114013671875, -2.993438720703125, 0.21541213989257812, 7.55816650390625, 11.042228698730469, 5.003047943115234, 7.853841781616211, -3.5410919189453125, 9.015251159667969, 6.890560150146484, 24.1539306640625, 2.1012001037597656, 5.90898323059082, 4.212837219238281, 8.03271484375, -2.274038314819336, 6.512758255004883, 6.847412109375, -8.846870422363281, 22.753143310546875, 16.872528076171875, -12.863555908203125, 6.247306823730469, 9.805316925048828, -2.0293045043945312, 7.808952331542969, 8.75251579284668, 1.2069854736328125, 5.78294563293457, 12.595321655273438, 17.787567138671875, 0.713775634765625, 3.8317413330078125, 12.112709045410156, 8.770837783813477, 4.436969757080078, 30.166534423828125, -1.293365478515625, 2.699556350708008, 13.454307556152344, 6.755605697631836, 27.981643676757812, -1.26708984375, 3.1866226196289062, 5.082935333251953, -4.6873931884765625, 6.413719177246094, 10.399356842041016, 1.4124126434326172, 8.602649688720703, 26.41583251953125, -1.4941787719726562, -2.0688323974609375, 46.00091552734375, -4.814653396606445, 11.533035278320312, 10.124473571777344, 2.7058563232421875, 20.21173095703125, 14.9520263671875, 8.602470397949219, 19.666351318359375, 20.484519958496094, 5.535064697265625, 24.552841186523438, 2.515155792236328, -5.8902740478515625, 8.648788452148438, 0.236968994140625, 3.5950698852539062, -0.15618133544921875, 4.710441589355469, -10.286121368408203, 6.4091796875, 5.684196472167969, -14.973098754882812, 4.613063812255859, -3.9115982055664062, 5.401313781738281, 11.7503662109375, -0.0023345947265625, -2.120697021484375, 9.096822738647461], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000075.npy"}
{"epoch": 0.22058823529411764, "step": 76, "batch_size": 128, "mean": 9.13976001739502, "std": 9.1727933883667, "min": -6.294872283935547, "p10": -1.0034049987792968, "median": 7.877347946166992, "p90": 21.30298690795898, "max": 46.228515625, "pos_frac": 0.8671875, "sample": [7.42315673828125, 17.83875274658203, 11.603775024414062, 5.226280212402344, 0.9702606201171875, 4.4290313720703125, 7.97454833984375, 4.834926605224609, 0.9430923461914062, 5.589427947998047, 4.423614501953125, 25.877899169921875, 28.490692138671875, 9.625782012939453, 4.630531311035156, 11.766193389892578, 1.4417457580566406, 6.238609313964844, 2.539398193359375, 14.932136535644531, -0.7749176025390625, 16.85543441772461, 6.424125671386719, 17.125267028808594, 11.801986694335938, 19.656570434570312, 19.542236328125, 4.336418151855469, 7.908039093017578, 12.900070190429688, 0.9567108154296875, 1.8843402862548828, 4.1999053955078125, 3.5540313720703125, 23.75823974609375, -3.0579071044921875, -6.294872283935547, 10.606800079345703, 9.583213806152344, -4.451873779296875, -5.7996063232421875, 26.461963653564453, 0.9547348022460938, -0.959228515625, -5.125297546386719, -2.2393798828125, 11.210699081420898, 6.566139221191406, 8.03616714477539, 13.6619873046875, 13.724174499511719, 17.285446166992188, -0.5163230895996094, 21.575927734375, 16.359130859375, -3.7761268615722656, 14.855522155761719, 10.250465393066406, 10.49908447265625, 1.156463623046875, 1.4427413940429688, 10.437187194824219, 1.4209976196289062, 3.7280120849609375, 12.342697143554688, 6.813007354736328, 18.887218475341797, 13.451663970947266, 4.361957550048828, 21.59819793701172, 4.282474517822266, 12.679229736328125, 4.2537994384765625, 10.253864288330078, 6.9347686767578125, 5.2783355712890625, 10.320438385009766, 4.968257904052734, -4.342182159423828, 7.846656799316406, 19.401634216308594, -1.1064834594726562, 2.2291088104248047, 7.273801803588867, 10.78338623046875, 8.627464294433594, 6.3957672119140625, 4.013265609741211, 2.2716140747070312, 15.767181396484375, -3.5372886657714844, 8.993896484375, 8.311111450195312, 2.7150421142578125, 25.16039276123047, -0.19264984130859375, 46.228515625, 7.921607971191406, 20.479568481445312, 12.58526611328125, 9.509990692138672, 0.5359878540039062, 25.943450927734375, 28.4627685546875, 2.5217819213867188, 18.240013122558594, 16.169052124023438, 38.280113220214844, 16.62713623046875, 2.2496795654296875, 8.561714172363281, -5.9308319091796875, 23.237884521484375, 7.2760162353515625, 11.853111267089844, 18.171035766601562, 2.734588623046875, -1.3852081298828125, 24.278182983398438, 7.103305816650391, 16.267906188964844, -5.6488189697265625, 6.273918151855469, 21.186012268066406, 8.262126922607422, 0.9077835083007812, 6.8822784423828125, 12.541130065917969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000076.npy"}
{"epoch": 0.2235294117647059, "step": 77, "batch_size": 128, "mean": 8.523674964904785, "std": 9.529865264892578, "min": -15.379348754882812, "p10": -2.78253231048584, "median": 7.466538429260254, "p90": 21.297893142700193, "max": 31.884674072265625, "pos_frac": 0.796875, "sample": [15.145675659179688, 3.3179473876953125, 1.776641845703125, 3.578765869140625, -8.191192626953125, 14.102653503417969, 1.9634876251220703, 4.2890625, 10.554290771484375, 2.1370105743408203, 4.077386856079102, 9.330249786376953, 15.457717895507812, 31.845840454101562, 3.0694427490234375, 5.099830627441406, 21.181686401367188, 0.9888858795166016, -0.2536582946777344, -0.5992813110351562, 9.409774780273438, 21.569042205810547, -3.7461776733398438, -2.7752017974853516, 12.375457763671875, -2.2657623291015625, 10.988754272460938, 12.370895385742188, 26.08039093017578, 0.6204319000244141, 20.671804428100586, 1.2102432250976562, 6.8895416259765625, 19.493698120117188, 9.929588317871094, 2.8572616577148438, -2.0709457397460938, 11.138025283813477, 20.69921875, -1.155019760131836, 5.7546234130859375, 21.8629150390625, 6.95733642578125, 3.738025665283203, 30.513671875, 7.366302490234375, 12.948883056640625, 7.235420227050781, 8.902549743652344, 4.5860137939453125, 1.4753265380859375, 19.252593994140625, 7.566774368286133, 1.7265052795410156, -2.8029251098632812, 8.50335693359375, 14.969429016113281, 9.285430908203125, 9.76031494140625, 6.612762451171875, -4.31787109375, -2.7996368408203125, 3.2767715454101562, 21.777984619140625, 0.246734619140625, 3.2735023498535156, -6.405517578125, -0.7085628509521484, 31.884674072265625, 8.000175476074219, 3.0454654693603516, 20.163436889648438, -0.9182910919189453, -0.488067626953125, 3.6663665771484375, -0.4446086883544922, 7.8198699951171875, 31.05218505859375, -6.2745819091796875, 4.414665222167969, -6.727508544921875, 14.518882751464844, -2.9664440155029297, 8.930301666259766, 27.13983154296875, 19.6788330078125, 2.150604248046875, 14.135513305664062, -3.5911598205566406, 10.877803802490234, 20.066543579101562, 5.1458587646484375, -2.0535507202148438, 13.50469970703125, 23.035995483398438, -3.8640823364257812, 12.368209838867188, -5.2164764404296875, 10.062385559082031, 20.808238983154297, -2.7246551513671875, 14.903289794921875, 10.165395736694336, 14.852165222167969, 12.097244262695312, 16.820297241210938, 14.565574645996094, 10.9453125, 7.192138671875, -15.379348754882812, 0.7618904113769531, 23.838897705078125, 5.302398681640625, 5.737159729003906, -1.5088844299316406, 9.629241943359375, 18.42327880859375, 0.68536376953125, 12.273555755615234, 10.478248596191406, 30.43781280517578, 14.116119384765625, 23.77886199951172, 9.198192596435547, 6.746551513671875, 5.344757080078125, 17.84654998779297, 20.855010986328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000077.npy"}
{"epoch": 0.22647058823529412, "step": 78, "batch_size": 128, "mean": 7.296419143676758, "std": 10.517468452453613, "min": -21.647048950195312, "p10": -4.08471450805664, "median": 6.389669418334961, "p90": 20.308260726928708, "max": 38.34423828125, "pos_frac": 0.796875, "sample": [8.330642700195312, 7.3536224365234375, 0.03240013122558594, -6.34844970703125, 12.085968017578125, -0.47930145263671875, 4.907066345214844, -1.9176788330078125, 6.476593017578125, 10.991748809814453, 11.191627502441406, -3.8997802734375, 22.391944885253906, 11.339908599853516, 6.3341217041015625, 23.972183227539062, -0.18364715576171875, 4.647979736328125, 18.08942413330078, -5.37493896484375, 25.303054809570312, 6.229209899902344, 2.370098114013672, 5.612907409667969, -2.51513671875, 13.466278076171875, 5.862159729003906, 16.109840393066406, 7.786842346191406, 5.600040435791016, 12.331686019897461, -9.183815002441406, 9.971017837524414, -1.030813217163086, 8.924476623535156, 0.2451305389404297, 10.456954956054688, -11.456787109375, 8.575019836425781, 12.732284545898438, 11.741081237792969, 2.8956375122070312, 18.600196838378906, 18.125823974609375, 3.2766799926757812, 7.136810302734375, 2.9484615325927734, 0.2933807373046875, 4.585866928100586, 11.091299057006836, 28.419586181640625, 7.543922424316406, 18.62664031982422, 13.99036979675293, 38.34423828125, 19.10143280029297, 7.1530303955078125, 7.794406890869141, 1.1846847534179688, 3.201505661010742, 9.595939636230469, 4.184736251831055, -1.770263671875, 6.618370056152344, 7.172264099121094, 12.885086059570312, 25.563026428222656, 9.531936645507812, 1.3921585083007812, 16.620010375976562, -10.881256103515625, -8.241214752197266, 7.095121383666992, 13.871475219726562, 31.989852905273438, 14.3551025390625, 6.311798095703125, 32.243865966796875, 6.548553466796875, 7.878271102905273, 26.149311065673828, 2.825580596923828, 0.015295028686523438, 0.9312515258789062, 8.101730346679688, -8.398849487304688, -21.647048950195312, -4.518135070800781, 14.523468017578125, 2.3296852111816406, 0.8115749359130859, 10.298368453979492, 10.281814575195312, 4.507829666137695, -20.329559326171875, -2.523151397705078, 3.3569488525390625, 6.445217132568359, -8.54510498046875, 20.872623443603516, 0.1537322998046875, -2.1130638122558594, -9.825702667236328, -3.81048583984375, 3.5205116271972656, 0.8783493041992188, 2.4583778381347656, 16.423606872558594, 31.39179229736328, 7.134521484375, 34.178070068359375, 27.703033447265625, 6.2881317138671875, -3.663330078125, 0.9671859741210938, 18.95619010925293, 15.649421691894531, 5.4909515380859375, 20.066390991210938, 19.405054092407227, -0.4941253662109375, 2.1753463745117188, 5.255287170410156, -1.6884841918945312, 5.30708122253418, 0.30249786376953125, -4.516227722167969, 14.506893157958984], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000078.npy"}
{"epoch": 0.22941176470588234, "step": 79, "batch_size": 128, "mean": 9.416430473327637, "std": 8.897610664367676, "min": -8.275358200073242, "p10": -0.5105556488037102, "median": 8.438515663146973, "p90": 21.783370971679688, "max": 38.5592041015625, "pos_frac": 0.8671875, "sample": [10.663589477539062, 15.866119384765625, 13.1533203125, 15.743148803710938, 10.760345458984375, 2.230958938598633, 7.329643249511719, -0.0739593505859375, 11.411026000976562, 13.26689338684082, 6.759452819824219, 3.4091644287109375, 4.163204193115234, -1.4376487731933594, 9.694747924804688, 3.019582748413086, 10.991378784179688, 0.35076904296875, 6.15423583984375, 8.418031692504883, 13.26580810546875, 13.978630065917969, 11.270315170288086, 5.6831512451171875, 12.021652221679688, 9.563907623291016, 6.5770416259765625, 2.4061203002929688, -0.07568740844726562, 10.587600708007812, 6.723289489746094, 15.077072143554688, 1.2967910766601562, 12.378570556640625, 22.14471435546875, 29.857666015625, 5.493467330932617, 8.783592224121094, 24.923858642578125, 11.390579223632812, 11.970596313476562, 29.902374267578125, -1.0138702392578125, 5.969709396362305, 10.87298583984375, 16.8150634765625, 2.05035400390625, -3.0275802612304688, 0.11739540100097656, 23.417861938476562, -1.6629352569580078, 3.7796974182128906, 1.4073734283447266, 11.87905502319336, 12.83056640625, -1.41632080078125, 33.166961669921875, 4.577949523925781, 4.528629302978516, -7.4633331298828125, 17.753143310546875, 2.1591720581054688, -0.2948493957519531, 0.20400238037109375, 2.583314895629883, 0.6890144348144531, 8.458999633789062, 7.942359924316406, 7.254768371582031, 0.5389938354492188, 5.947299957275391, 5.807769775390625, -8.275358200073242, 20.98412322998047, -1.014495849609375, 6.765357971191406, 23.811431884765625, -3.128843307495117, 18.43853759765625, 1.4242095947265625, 6.355628967285156, 11.3734130859375, -3.0569076538085938, -3.1413421630859375, 23.748001098632812, 20.481447219848633, 15.453018188476562, 29.412124633789062, 19.199264526367188, 5.883544921875, 26.3419189453125, 15.433219909667969, 0.2940826416015625, 12.813774108886719, 3.216379165649414, 12.323562622070312, 5.55830192565918, 10.415420532226562, 12.185455322265625, 7.773040771484375, 15.7198486328125, 12.565765380859375, -0.1193389892578125, -2.1160316467285156, 21.628509521484375, 10.211273193359375, 10.86334228515625, 11.902053833007812, 5.251106262207031, 0.9152908325195312, 9.581336975097656, 13.176223754882812, 17.590850830078125, 5.7655029296875, 3.3274612426757812, 28.199615478515625, 2.7257308959960938, 23.638355255126953, 0.36663818359375, 18.08648681640625, 13.913742065429688, -4.339714050292969, 17.140907287597656, 19.896095275878906, 38.5592041015625, 7.789455413818359, 16.50286865234375, 2.5235118865966797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000079.npy"}
{"epoch": 0.2323529411764706, "step": 80, "batch_size": 128, "mean": 7.937648773193359, "std": 12.129645347595215, "min": -12.950019836425781, "p10": -5.0847677230834964, "median": 5.755588531494141, "p90": 26.914477920532224, "max": 58.13887023925781, "pos_frac": 0.7265625, "sample": [4.812049865722656, -1.0144729614257812, 14.682937622070312, -6.10191535949707, 14.528488159179688, 21.002471923828125, -1.0699577331542969, 0.9852981567382812, -0.107025146484375, 6.689342498779297, 34.68806457519531, 16.417091369628906, 22.299541473388672, 2.7218666076660156, -6.8028717041015625, 4.475791931152344, -9.987319946289062, 10.129371643066406, 7.9010467529296875, -0.19640159606933594, 8.06411361694336, -6.4698486328125, 2.689544677734375, 29.445175170898438, -0.16016006469726562, 5.4858551025390625, 14.345216751098633, 6.0698394775390625, 5.245231628417969, -8.326278686523438, 0.4598426818847656, 14.997611999511719, 11.955352783203125, 10.4935302734375, 3.8797149658203125, 1.5826835632324219, -3.835582733154297, -8.3714599609375, 6.025321960449219, -1.872528076171875, 2.6020545959472656, 3.011077880859375, 7.557147979736328, -10.32080078125, 7.1290435791015625, 10.120803833007812, 4.336553573608398, 8.702178955078125, 8.133544921875, 26.562362670898438, 1.380319595336914, 6.428119659423828, -1.414306640625, 38.52867889404297, 4.026004791259766, 0.7024574279785156, 11.148216247558594, 11.495277404785156, 0.2764015197753906, 36.36381530761719, 11.388328552246094, 7.471771240234375, 4.639411926269531, 6.964988708496094, 11.406539916992188, -3.659332275390625, 19.562210083007812, 13.484413146972656, -2.186368942260742, 1.8615188598632812, 15.09967041015625, 12.586563110351562, 1.535614013671875, -0.34604644775390625, -6.762916564941406, 30.994110107421875, 28.756431579589844, -10.066276550292969, 19.759323120117188, -4.4867706298828125, 6.9959259033203125, 29.216384887695312, -6.774778366088867, -4.658576965332031, 0.07706451416015625, 11.305923461914062, 28.160369873046875, 31.747634887695312, -1.1550331115722656, 22.915729522705078, 2.598224639892578, 6.654838562011719, 7.1427154541015625, -1.34765625, 8.88177490234375, 21.946269989013672, 9.373970031738281, -2.6775150299072266, -5.115327835083008, -5.0716705322265625, 12.35064697265625, 27.736080169677734, 30.782337188720703, -4.333221435546875, 12.333831787109375, -12.950019836425781, 2.26373291015625, 13.437271118164062, 58.13887023925781, 17.412586212158203, 3.9690399169921875, 14.829442977905273, -0.39940643310546875, 13.49951171875, -11.164840698242188, 22.40912628173828, 11.298015594482422, 1.4666595458984375, 0.9048728942871094, 4.752805709838867, 3.2347793579101562, -0.2815532684326172, -3.3317794799804688, 24.287155151367188, -1.1364936828613281, 36.69670104980469, 3.8817138671875, 11.186233520507812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000080.npy"}
{"epoch": 0.23529411764705882, "step": 81, "batch_size": 128, "mean": 7.379763603210449, "std": 9.909814834594727, "min": -11.473464965820312, "p10": -2.900371742248535, "median": 4.951995849609375, "p90": 23.002763366699217, "max": 34.62257385253906, "pos_frac": 0.796875, "sample": [0.8389530181884766, 21.023895263671875, -2.537261962890625, 3.6849288940429688, 32.03492736816406, 1.218109130859375, -10.6220703125, -7.803314208984375, 18.451980590820312, -6.595115661621094, -4.6708831787109375, 30.798843383789062, 2.644533157348633, -5.856494903564453, 3.554107666015625, -2.995349884033203, 2.5134544372558594, -2.8596668243408203, 25.14410400390625, 1.4597091674804688, 0.4117240905761719, -4.3549346923828125, -0.0335845947265625, 5.9905853271484375, -0.7223358154296875, 4.7704315185546875, 7.576812744140625, -1.6028938293457031, 0.2845458984375, 20.658851623535156, 3.035076141357422, 6.2712554931640625, 20.104759216308594, -5.349512100219727, 7.845733642578125, 28.498916625976562, 12.592628479003906, 4.347293853759766, 10.530763626098633, 1.6382675170898438, 9.895072937011719, 0.8314380645751953, 7.844581604003906, 4.1690673828125, 23.617965698242188, 14.168350219726562, 16.39910888671875, 0.06822967529296875, -11.473464965820312, 10.283279418945312, -5.8598175048828125, 7.2610321044921875, 7.143585205078125, 2.8895645141601562, 17.95441436767578, 16.140174865722656, 3.5889549255371094, 16.229957580566406, 11.114364624023438, 0.5443477630615234, 5.473926544189453, 11.468608856201172, 7.984321594238281, -6.2077484130859375, 0.2394256591796875, 4.9233551025390625, 21.889373779296875, 22.739105224609375, 8.946975708007812, 31.92437744140625, 3.400615692138672, 19.114669799804688, 9.698150634765625, 3.7706451416015625, 10.401565551757812, 10.470245361328125, 12.883872985839844, 5.739402770996094, 5.776153564453125, 2.8348388671875, -0.3990955352783203, 1.568328857421875, 17.817337036132812, 8.967973709106445, 28.11395263671875, 26.4736328125, 4.9806365966796875, -1.4564743041992188, 4.501766204833984, 0.5121231079101562, 2.7260284423828125, 34.62257385253906, 5.436027526855469, 2.4750518798828125, 31.989120483398438, 8.904220581054688, 8.815563201904297, -0.02449798583984375, 0.03981781005859375, 15.919258117675781, 6.1427001953125, -4.452125549316406, -1.4887542724609375, -0.08424949645996094, 2.3061599731445312, 15.776809692382812, 5.13275146484375, 24.093307495117188, -2.6837005615234375, 14.750167846679688, 9.628068923950195, 3.16107177734375, 4.27490234375, 6.768428802490234, 4.242340087890625, 26.201942443847656, 6.622493743896484, -2.4814453125, -3.2798919677734375, 0.5978317260742188, 1.3309059143066406, 1.7518844604492188, 9.371034622192383, 10.809600830078125, 2.5081119537353516, -1.182220458984375, 7.499980926513672, 27.104400634765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000081.npy"}
{"epoch": 0.23823529411764705, "step": 82, "batch_size": 128, "mean": 8.96574592590332, "std": 9.982397079467773, "min": -14.422904968261719, "p10": -2.154816436767578, "median": 8.491235733032227, "p90": 21.745304107666012, "max": 44.9632568359375, "pos_frac": 0.828125, "sample": [2.2255287170410156, 3.6255111694335938, 18.38674545288086, 13.408294677734375, -1.5564918518066406, -2.4239730834960938, 8.570865631103516, -0.16913604736328125, 6.207986831665039, 19.657958984375, -2.426158905029297, 10.42352294921875, 7.782558441162109, 11.047088623046875, 4.9736480712890625, 8.411605834960938, 9.162864685058594, 4.761528015136719, 3.273469924926758, -3.909292221069336, -1.292388916015625, 25.200164794921875, 11.309288024902344, 2.3453826904296875, 31.151611328125, 0.20104598999023438, 11.088550567626953, 10.0345458984375, 13.710220336914062, 28.059951782226562, 15.844444274902344, 1.2138442993164062, 2.9894332885742188, 12.269859313964844, 10.277366638183594, 2.8236160278320312, 4.279178619384766, 21.529708862304688, 14.637596130371094, 7.532238006591797, 3.3498764038085938, 1.9652557373046875, 12.449398040771484, -5.691802978515625, 0.12021636962890625, 10.790176391601562, 28.992401123046875, -1.0613861083984375, 8.837913513183594, -2.1291656494140625, 23.86394500732422, 13.62213134765625, -0.7069511413574219, 10.235786437988281, 4.187774658203125, 44.9632568359375, 2.7207794189453125, 8.0257568359375, -11.981582641601562, -7.079639434814453, 9.984443664550781, 2.50726318359375, 27.61121368408203, -4.111942291259766, -2.2146682739257812, 20.85968017578125, 16.487091064453125, 19.189163208007812, 11.175483703613281, 17.839317321777344, 15.872795104980469, 37.01679992675781, 13.312591552734375, 15.612884521484375, -1.178192138671875, 2.028209686279297, -5.047096252441406, 9.67584228515625, 5.668754577636719, 34.689239501953125, 10.696269989013672, 1.5641555786132812, -5.045707702636719, 18.04340362548828, 15.426841735839844, 6.255962371826172, 22.551498413085938, 11.299339294433594, 2.28448486328125, 14.921356201171875, 15.644121170043945, 5.9374847412109375, 27.708053588867188, 11.772537231445312, 19.563308715820312, 20.45958709716797, -0.6430625915527344, 14.828563690185547, 11.16595458984375, 1.7319183349609375, 0.9832744598388672, 11.047096252441406, 5.200225830078125, 9.18035888671875, 10.25218391418457, 8.395271301269531, 1.1130447387695312, 0.1679840087890625, 8.385894775390625, 1.690643310546875, 9.940452575683594, 0.2793540954589844, 14.834716796875, 3.9908676147460938, -2.4582443237304688, 12.906166076660156, 7.257535934448242, -1.263031005859375, 0.4937744140625, 2.4345664978027344, -14.422904968261719, 15.918182373046875, 22.24835968017578, -3.8710098266601562, 13.761835098266602, 24.63079833984375, 16.273406982421875, 2.98583984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000082.npy"}
{"epoch": 0.2411764705882353, "step": 83, "batch_size": 128, "mean": 8.253133773803711, "std": 11.033876419067383, "min": -10.358383178710938, "p10": -3.147670364379883, "median": 6.036167144775391, "p90": 24.566522216796873, "max": 58.9794921875, "pos_frac": 0.7734375, "sample": [2.7947006225585938, 7.655496597290039, -1.64678955078125, 4.715742111206055, -7.2510986328125, 0.4325885772705078, 6.349395751953125, 2.1483917236328125, -0.9007034301757812, 9.05975341796875, 16.858474731445312, 20.705535888671875, 2.541524887084961, 11.579097747802734, 5.534097671508789, 28.01177978515625, 28.994613647460938, -1.0819549560546875, 5.78411865234375, -5.170448303222656, 5.289424896240234, -0.17458343505859375, 3.661956787109375, 0.2929115295410156, -0.8430023193359375, 2.3925628662109375, 6.163963317871094, 10.66143798828125, -1.3456268310546875, 8.971389770507812, -0.5679512023925781, 21.122314453125, -1.2970733642578125, -0.5816726684570312, 0.3208503723144531, -4.864959716796875, -1.163421630859375, 12.692943572998047, 9.239158630371094, -3.1493072509765625, 0.6811447143554688, 9.227630615234375, -1.8497390747070312, 0.2534923553466797, 9.453895568847656, 11.257755279541016, 24.30792236328125, -3.1469688415527344, 2.6037750244140625, -4.222806930541992, 7.9402923583984375, 2.7932891845703125, 28.31292724609375, 5.169429779052734, 7.5982208251953125, 7.115394592285156, 10.319168090820312, 14.733665466308594, 26.938697814941406, 27.75971221923828, -0.7647247314453125, 6.943950653076172, 7.505226135253906, 11.732597351074219, 29.150985717773438, 27.217132568359375, -3.3187637329101562, 25.169921875, 5.9083709716796875, 18.871078491210938, -2.984344482421875, -6.071281433105469, 22.80450439453125, 5.0025177001953125, 1.7008209228515625, 9.243911743164062, 9.659210205078125, 58.9794921875, 3.7035465240478516, 2.7618637084960938, 7.617229461669922, 15.884429931640625, 11.855247497558594, 18.308311462402344, 7.383813858032227, 16.152870178222656, 14.92242431640625, 11.275123596191406, -0.6922454833984375, 18.1412353515625, 8.846908569335938, 3.815776824951172, -4.571483612060547, 0.12128829956054688, 3.2866744995117188, -5.306190490722656, 18.0028076171875, 0.2356414794921875, 9.306732177734375, 3.516490936279297, 2.9486255645751953, 21.295059204101562, -4.277130126953125, 3.328540802001953, -5.0962982177734375, -7.413503646850586, 2.5041961669921875, 7.5727081298828125, -10.358383178710938, 32.51165771484375, 13.904647827148438, 44.82086181640625, 4.584526062011719, 7.956066131591797, 0.43060302734375, 3.656574249267578, 19.9517822265625, 1.6669235229492188, 28.546295166015625, 6.186126708984375, 21.678340911865234, -2.115386962890625, 2.313426971435547, 17.188674926757812, 12.563674926757812, 13.972614288330078, 25.29925537109375, 8.279006958007812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000083.npy"}
{"epoch": 0.24411764705882352, "step": 84, "batch_size": 128, "mean": 9.513760566711426, "std": 10.558860778808594, "min": -21.25970458984375, "p10": -1.9086418151855467, "median": 9.218975067138672, "p90": 19.3317455291748, "max": 59.519775390625, "pos_frac": 0.8671875, "sample": [13.782909393310547, 5.234254837036133, 12.965545654296875, 15.443885803222656, 29.76141357421875, 18.94623374938965, 9.76483154296875, 9.769905090332031, 6.312953948974609, 0.5585708618164062, 12.601734161376953, 14.654685974121094, -2.4940872192382812, 14.027425765991211, 13.415657043457031, 13.647247314453125, 20.161235809326172, 13.9156494140625, 14.221817016601562, 12.591854095458984, 18.06922149658203, 3.709379196166992, 18.713607788085938, -8.83111572265625, 15.858673095703125, 1.3427810668945312, 10.367683410644531, 2.4709625244140625, 1.1482391357421875, 6.720659255981445, 3.0691452026367188, 15.796844482421875, 17.631702423095703, 8.749858856201172, 59.519775390625, 25.277931213378906, 11.660037994384766, 3.141510009765625, 7.2337799072265625, 9.918235778808594, 18.97624969482422, -21.25970458984375, 26.161293029785156, 8.105155944824219, -12.527557373046875, 11.419578552246094, -0.5389938354492188, 2.6782302856445312, 8.174385070800781, 13.916671752929688, 25.991287231445312, 9.59926986694336, 42.59828186035156, -9.879974365234375, 6.926368713378906, 13.704238891601562, 11.599357604980469, 1.265737533569336, 2.54754638671875, -1.8606109619140625, 8.356971740722656, 5.0293731689453125, -2.0207138061523438, -8.289993286132812, 4.096015930175781, 5.43402099609375, 2.4386634826660156, 2.722665786743164, 0.6728515625, 10.522550582885742, 24.34014129638672, 3.444293975830078, 13.853694915771484, 41.03596496582031, 15.5079345703125, 17.064117431640625, 23.037368774414062, 10.039024353027344, 15.166069030761719, 14.127899169921875, -5.394207000732422, 11.789356231689453, 14.574787139892578, 11.670003890991211, -2.7055511474609375, 1.5293598175048828, 2.7618942260742188, 4.6308441162109375, 11.721435546875, 9.210403442382812, 25.140640258789062, 3.322704315185547, 8.750747680664062, 4.777313232421875, 18.331314086914062, 5.281684875488281, -12.582969665527344, -3.1750717163085938, -8.516098022460938, 3.914764404296875, 14.268478393554688, 13.05377197265625, 8.895133972167969, 25.31982421875, 13.396430969238281, 3.5386199951171875, -0.7672576904296875, 16.473175048828125, 15.129463195800781, 6.109077453613281, 6.307823181152344, 8.476726531982422, 6.878789901733398, 15.325567245483398, 3.5932540893554688, 5.022056579589844, 13.89947509765625, 8.730951309204102, 4.793529510498047, 10.312355041503906, 17.104351043701172, -3.1181373596191406, 2.75775146484375, -0.4262542724609375, 23.76617431640625, 7.910030364990234, 11.72091293334961, 9.227546691894531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000084.npy"}
{"epoch": 0.24705882352941178, "step": 85, "batch_size": 128, "mean": 8.280359268188477, "std": 11.943631172180176, "min": -18.4754638671875, "p10": -2.3367443084716797, "median": 5.517549514770508, "p90": 25.90051574707031, "max": 53.91773986816406, "pos_frac": 0.7734375, "sample": [8.984764099121094, 12.749273300170898, 2.9414634704589844, -1.0467109680175781, 5.4345245361328125, 12.756690979003906, -1.8959732055664062, 22.807586669921875, 6.968959808349609, -2.3089752197265625, 3.173828125, 8.686332702636719, 9.98025894165039, 3.8958396911621094, 9.206573486328125, -2.8920059204101562, 3.2497024536132812, 6.676843643188477, 11.563652038574219, 6.3994598388671875, 16.604202270507812, 2.9223175048828125, 2.248199462890625, 35.70259094238281, 36.16868591308594, 15.640901565551758, 31.565231323242188, -1.309661865234375, 8.76224136352539, 3.736907958984375, 29.680953979492188, 8.878654479980469, 10.367233276367188, -1.0764274597167969, -1.141815185546875, 14.950336456298828, -13.293182373046875, -0.11561012268066406, 7.942718505859375, -2.8119354248046875, 9.048507690429688, 7.1151123046875, 16.335800170898438, -5.369926452636719, 8.71063232421875, 1.8385047912597656, 36.60794448852539, -0.789306640625, 0.3698081970214844, 35.209228515625, -0.2787590026855469, -0.8450393676757812, 3.3233261108398438, 0.6016769409179688, 7.231727600097656, -3.0787124633789062, 14.863245010375977, -10.1063232421875, 6.698417663574219, 3.918048858642578, 4.347053527832031, 19.199817657470703, 26.78570556640625, -1.5159835815429688, 12.240032196044922, -0.19281768798828125, 18.648696899414062, 7.314325332641602, 11.609050750732422, 5.030941009521484, -7.246337890625, -11.817604064941406, 19.30530548095703, 34.81996154785156, 9.102302551269531, 7.957283020019531, 4.452251434326172, 5.600574493408203, 4.167205810546875, 13.188068389892578, 4.879997253417969, 1.415313720703125, 44.03019714355469, -0.44829559326171875, 20.742469787597656, 3.1992111206054688, 6.010440826416016, 25.521148681640625, 3.7193756103515625, 5.326602935791016, 6.039884567260742, 3.3181381225585938, 10.652618408203125, 7.01837158203125, 31.548171997070312, 3.805023193359375, 13.044670104980469, 4.0913543701171875, 12.395851135253906, -2.0189132690429688, 4.606325149536133, -2.401538848876953, 2.4332637786865234, -15.535179138183594, 0.239105224609375, -0.5856170654296875, -18.4754638671875, 2.6665878295898438, -5.53118896484375, 19.448898315429688, 22.05809783935547, 53.91773986816406, 10.388938903808594, 9.254913330078125, -9.019500732421875, 17.444725036621094, 7.253852844238281, -1.869049072265625, 3.5239028930664062, 4.959007263183594, 4.2322235107421875, 19.465011596679688, 0.9815769195556641, 10.711402893066406, 27.91168212890625, 3.2395362854003906, 33.1287841796875, 3.9919261932373047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000085.npy"}
{"epoch": 0.25, "step": 86, "batch_size": 128, "mean": 8.388190269470215, "std": 11.13306999206543, "min": -14.542587280273438, "p10": -3.4112884521484372, "median": 6.841682434082031, "p90": 21.462001037597652, "max": 65.06790161132812, "pos_frac": 0.78125, "sample": [0.5029525756835938, 29.351669311523438, 18.77751922607422, 3.3931655883789062, 6.7281341552734375, -2.9780349731445312, 1.448324203491211, -2.11456298828125, 3.710784912109375, 4.2352447509765625, 9.654399871826172, -4.703521728515625, 23.80144500732422, 20.731666564941406, 12.738639831542969, 6.186342239379883, 29.33465576171875, 10.73062515258789, 9.876091003417969, -4.39752197265625, 10.321952819824219, 20.304214477539062, -0.7195053100585938, 30.35552978515625, -1.3308944702148438, -2.9436874389648438, 2.9920425415039062, -3.3860702514648438, 25.47467803955078, 12.63241195678711, 9.038497924804688, 11.852569580078125, -8.831161499023438, 5.337066650390625, 3.86151123046875, 26.576717376708984, 4.317344665527344, 6.4411468505859375, 6.987300872802734, 9.173343658447266, 26.32421875, 12.986766815185547, 2.0178756713867188, 65.06790161132812, -7.307834625244141, 9.815853118896484, 4.658512115478516, -14.542587280273438, 11.240440368652344, 6.276023864746094, 17.370559692382812, 3.289196014404297, 3.4191360473632812, 12.5982666015625, 8.30259895324707, 19.35675048828125, 31.33984375, 9.564613342285156, 0.5534477233886719, -4.650321960449219, 5.361572265625, 0.9617691040039062, 22.815505981445312, 7.0461578369140625, -1.1210174560546875, 10.069183349609375, -0.205780029296875, 8.40131950378418, 8.756576538085938, -0.647857666015625, 9.30380630493164, 4.5247344970703125, 0.08757781982421875, 9.927139282226562, -4.645353317260742, 12.603927612304688, -0.11836624145507812, 6.055046081542969, 2.25213623046875, 13.542259216308594, 16.988842010498047, 4.7878875732421875, 30.84978485107422, 17.927284240722656, 35.471160888671875, 9.486228942871094, 6.221757888793945, 15.906974792480469, 2.34222412109375, 14.532424926757812, 16.987548828125, 8.169929504394531, -2.691436767578125, 6.569240570068359, 14.785724639892578, -4.4569091796875, 4.0816802978515625, -8.510673522949219, -0.44976806640625, 2.530242919921875, -3.4701309204101562, 8.711280822753906, 13.656211853027344, 11.885726928710938, 4.292030334472656, 14.004934310913086, 15.546119689941406, 14.533576965332031, -2.148681640625, 42.082122802734375, -4.7419586181640625, 9.997243881225586, -2.608051300048828, 5.8708038330078125, 10.319122314453125, 1.0941925048828125, 6.955230712890625, -9.98468017578125, 2.1071014404296875, 13.13020133972168, 7.940185546875, 2.9089813232421875, 20.881927490234375, 7.928504943847656, -0.6834011077880859, -4.835243225097656, 15.992462158203125, 2.6777000427246094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000086.npy"}
{"epoch": 0.2529411764705882, "step": 87, "batch_size": 128, "mean": 8.416332244873047, "std": 10.423148155212402, "min": -17.954544067382812, "p10": -2.3751274108886715, "median": 7.370556831359863, "p90": 19.80671463012695, "max": 41.2015380859375, "pos_frac": 0.8125, "sample": [3.7620697021484375, 8.111618041992188, 8.587064743041992, 5.33758544921875, 0.03679847717285156, 9.505325317382812, -0.16391754150390625, 8.64007568359375, 6.327323913574219, 20.378204345703125, 6.887811660766602, 14.849273681640625, 7.432882308959961, -1.8777847290039062, 11.40731430053711, 0.7634201049804688, -6.4926300048828125, 6.210626602172852, 12.5072021484375, -1.1921844482421875, 25.4488525390625, 18.72802734375, -1.3523311614990234, 20.395965576171875, -4.848567962646484, 15.856925964355469, 8.533390045166016, 7.465511322021484, 11.006359100341797, -8.745681762695312, -5.3936309814453125, 24.33758544921875, 18.125686645507812, 2.404569625854492, 2.5318450927734375, -1.5877227783203125, 1.5673942565917969, -2.84014892578125, 13.226398468017578, 17.136566162109375, 9.109169006347656, 7.207447052001953, 16.589508056640625, 5.481891632080078, 24.54220962524414, 7.444911956787109, 5.714954376220703, 5.164644241333008, 23.295318603515625, 19.18219757080078, 0.47260093688964844, -2.036163330078125, 4.799713134765625, 1.6146488189697266, 10.739738464355469, 13.335601806640625, 2.481353759765625, 2.3709049224853516, 5.6908111572265625, 9.614145278930664, 7.7996826171875, 4.378257751464844, 16.51656723022461, -2.5890655517578125, 7.210170745849609, 2.515625, 0.6694908142089844, -2.2834396362304688, -3.86065673828125, 8.717262268066406, -2.0546112060546875, 5.007940292358398, 3.52679443359375, 8.726455688476562, 6.900474548339844, 4.851593017578125, 8.159847259521484, 3.5795364379882812, 11.787689208984375, 1.5120010375976562, 9.996063232421875, -3.937835693359375, 16.436920166015625, 6.383876800537109, 19.418243408203125, 14.591629028320312, -10.038742065429688, 40.559837341308594, 15.485031127929688, 15.196640014648438, 5.526054382324219, 14.036989212036133, 12.761260986328125, 22.11817169189453, 9.871917724609375, 17.93035888671875, 13.433633804321289, 33.71736145019531, 35.516700744628906, -7.009635925292969, 0.8596038818359375, -0.5763702392578125, 16.820104598999023, 2.6104736328125, 17.38751220703125, -14.185401916503906, 19.561790466308594, 9.394607543945312, 9.842536926269531, 9.711999893188477, 9.272918701171875, 3.3083972930908203, 37.788360595703125, 35.79839324951172, 4.191652297973633, 15.306991577148438, -10.155935287475586, -17.954544067382812, -1.716888427734375, 14.325469970703125, 11.673999786376953, -0.147857666015625, 3.1189346313476562, 1.3421497344970703, 41.2015380859375, 7.308231353759766, 18.439498901367188, 3.8656063079833984], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000087.npy"}
{"epoch": 0.25588235294117645, "step": 88, "batch_size": 128, "mean": 9.63380241394043, "std": 10.530073165893555, "min": -14.586807250976562, "p10": -1.9108848571777342, "median": 8.196111679077148, "p90": 21.64133071899414, "max": 39.07225036621094, "pos_frac": 0.84375, "sample": [3.0338268280029297, 8.061281204223633, 12.250703811645508, -6.913909912109375, 15.991363525390625, 16.17022705078125, 16.44548797607422, 6.2806243896484375, 10.002531051635742, 6.2887725830078125, 11.215660095214844, 29.763763427734375, 25.643516540527344, 3.3836402893066406, 14.756389617919922, 14.575515747070312, 39.07225036621094, 8.330942153930664, 2.973785400390625, 9.084096908569336, -1.4317626953125, 20.46355438232422, 0.15758323669433594, 4.933801651000977, 5.263420104980469, 12.037605285644531, 10.69717025756836, 3.8941192626953125, 12.554340362548828, 7.316307067871094, 6.795942306518555, 4.6961822509765625, 2.283052444458008, 32.64507293701172, 36.149017333984375, -8.552200317382812, -3.9467620849609375, 23.078170776367188, 8.562515258789062, 1.7093505859375, 9.23227310180664, 6.090236663818359, 26.782943725585938, 4.532814025878906, 20.274925231933594, 9.321784973144531, -11.031803131103516, 4.906929016113281, 13.296960830688477, 4.707374572753906, 16.96831512451172, 11.5361328125, 1.1707687377929688, 20.074623107910156, 20.070877075195312, 13.424530029296875, 18.33519744873047, 7.365228652954102, 2.6812896728515625, 32.42052459716797, 3.4846420288085938, 5.480232238769531, -5.897613525390625, -5.179649353027344, 3.4095687866210938, 0.0947113037109375, 7.888038635253906, 21.367599487304688, 18.162399291992188, 11.713460922241211, 2.08489990234375, 35.724609375, 10.37030029296875, 21.371448516845703, 0.4387054443359375, -0.6805267333984375, 4.0833282470703125, 0.13327789306640625, 17.875877380371094, 0.23169326782226562, 9.476669311523438, -14.586807250976562, 0.08761978149414062, 15.247184753417969, 3.9005088806152344, 4.701057434082031, 11.208297729492188, 14.120716094970703, 5.345602035522461, 21.536376953125, 17.87002182006836, -1.2476730346679688, -4.995079040527344, -2.0535049438476562, 13.345630645751953, 16.732589721679688, -2.4197731018066406, 2.061920166015625, -7.951686859130859, 33.5064697265625, 6.202632904052734, -3.6312522888183594, 29.625518798828125, 9.62872314453125, 14.7420654296875, -1.2723255157470703, -2.370563507080078, 18.73896026611328, 20.203460693359375, -1.3050308227539062, 17.983535766601562, 13.259262084960938, 5.1786041259765625, 3.7380332946777344, -1.849761962890625, 4.463249206542969, 11.34817123413086, 18.7684326171875, 17.594039916992188, 21.88622283935547, -1.4010791778564453, 3.3737335205078125, 9.000387191772461, 2.5997772216796875, 20.737985610961914, 34.138641357421875, 3.783946990966797, 12.039314270019531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000088.npy"}
{"epoch": 0.25882352941176473, "step": 89, "batch_size": 128, "mean": 10.792949676513672, "std": 12.60999584197998, "min": -10.80398178100586, "p10": -2.3676429748535153, "median": 7.477016448974609, "p90": 25.284900665283203, "max": 45.89594650268555, "pos_frac": 0.8203125, "sample": [3.763824462890625, -2.255401611328125, 10.49798583984375, 6.032096862792969, 14.604667663574219, 19.886547088623047, -5.7943267822265625, 10.944053649902344, 2.9022674560546875, 38.266082763671875, -0.5832595825195312, 29.619354248046875, 5.183452606201172, 7.452945709228516, -7.3792266845703125, 17.332847595214844, 15.521926879882812, -2.9563217163085938, 9.225685119628906, 6.3130340576171875, -1.81195068359375, 17.08162498474121, 2.472135543823242, 0.7997016906738281, 14.98556900024414, -1.9414291381835938, -0.17194366455078125, 12.600496292114258, 7.189538955688477, -3.1886653900146484, 19.484649658203125, 4.396574020385742, 20.83802032470703, 21.14574432373047, 38.66349792480469, 0.8121166229248047, 6.739147186279297, 7.399566650390625, 7.665000915527344, 6.306209564208984, -0.36608314514160156, 1.6664257049560547, 45.89594650268555, -10.80398178100586, 2.5268630981445312, 10.889408111572266, 9.013683319091797, 12.3489990234375, -1.8702850341796875, 12.384124755859375, 4.329410552978516, 6.116376876831055, 5.0827484130859375, 15.272872924804688, 17.520553588867188, 1.8708572387695312, 1.9278392791748047, 11.73872184753418, 19.259490966796875, 0.2792205810546875, 38.97221374511719, 24.366180419921875, 9.500411987304688, -0.21274566650390625, 7.501087188720703, 1.4659957885742188, 13.852104187011719, -10.795669555664062, 22.613571166992188, 6.95367431640625, 21.086082458496094, 0.24548912048339844, 41.063201904296875, 5.702293395996094, 41.95771789550781, -7.1315765380859375, 17.711700439453125, 8.072532653808594, 4.1789703369140625, 11.595726013183594, 6.292306900024414, 1.467092514038086, 18.98211669921875, 38.38639831542969, 11.753265380859375, -5.930900573730469, 4.969341278076172, 13.274429321289062, 16.343059539794922, 45.35151672363281, 4.054435729980469, 44.232574462890625, 25.200363159179688, 1.3465900421142578, -5.5705108642578125, 11.760383605957031, 7.333595275878906, 16.433067321777344, 5.587440490722656, 16.59978485107422, 21.07593536376953, 3.3182296752929688, 16.604820251464844, 11.751968383789062, 2.0219345092773438, 41.78268051147461, 20.16876220703125, -4.00555419921875, 23.271133422851562, 4.749544143676758, -1.8754959106445312, 20.14697265625, -1.4046058654785156, 11.504539489746094, -3.4142532348632812, 25.482154846191406, 1.1424503326416016, 16.772083282470703, 16.668790817260742, 7.1841278076171875, 14.26247787475586, 6.2443695068359375, -2.6295394897460938, 13.485527038574219, 42.12428283691406, -7.946136474609375, 14.6246337890625, 6.693386077880859], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000089.npy"}
{"epoch": 0.26176470588235295, "step": 90, "batch_size": 128, "mean": 9.177949905395508, "std": 11.33342170715332, "min": -17.746719360351562, "p10": -4.478691864013672, "median": 9.375956535339355, "p90": 22.313771438598632, "max": 37.606597900390625, "pos_frac": 0.75, "sample": [11.956676483154297, 22.179603576660156, 6.379878997802734, 21.396095275878906, 4.650327682495117, -0.13834381103515625, 19.65216064453125, 17.71640396118164, -3.256683349609375, 22.438262939453125, 9.708641052246094, 1.6851806640625, 10.397781372070312, -2.6279067993164062, 31.519500732421875, 1.0560226440429688, 21.662818908691406, 6.452545166015625, -4.962677001953125, 17.519454956054688, -1.894418716430664, 20.093238830566406, -0.8122329711914062, 12.18954086303711, 21.150928497314453, -7.669921875, -2.7397079467773438, 17.032699584960938, 9.027801513671875, 32.9403076171875, 14.905937194824219, 19.46155548095703, -5.105207443237305, -4.1093902587890625, 0.7317123413085938, 10.586227416992188, 2.357542037963867, 9.278671264648438, -1.6879196166992188, 22.260417938232422, 23.782306671142578, -0.16633224487304688, 15.936195373535156, 36.70616149902344, 9.630653381347656, 20.17249298095703, 22.628429412841797, 37.016143798828125, 9.018150329589844, -1.9891319274902344, 4.679683685302734, -0.495941162109375, 29.438579559326172, 10.701904296875, 2.3237152099609375, 9.644027709960938, -12.297439575195312, 16.183242797851562, -11.771736145019531, 1.6631278991699219, -4.3392333984375, 17.297470092773438, 2.4695472717285156, -5.048484802246094, 11.051210403442383, 24.25079345703125, 10.769683837890625, -4.461212158203125, -0.857696533203125, 29.348175048828125, 3.32861328125, 7.1800994873046875, 1.769775390625, -11.410385131835938, 9.293157577514648, 11.018890380859375, 21.3583984375, -9.875885009765625, 9.458755493164062, 13.312728881835938, 25.8055419921875, -0.5545425415039062, 20.926834106445312, 0.11869430541992188, 1.219970703125, 13.144294738769531, -0.6889801025390625, 12.417724609375, 21.12368392944336, 8.256866455078125, 9.766765594482422, 18.271217346191406, 3.3019981384277344, 33.46747589111328, 8.432228088378906, -4.519477844238281, 16.001060485839844, 9.917724609375, 0.7755889892578125, 7.037528991699219, -17.746719360351562, 18.710891723632812, 10.782012939453125, 5.677032470703125, 10.35552978515625, 20.641372680664062, -5.149955749511719, 6.250816345214844, 7.209087371826172, -0.4390411376953125, 15.871734619140625, 3.45379638671875, 16.78973960876465, 4.038078308105469, 16.465621948242188, 10.82834243774414, -8.17205810546875, -1.0487556457519531, 7.6642913818359375, 13.648490905761719, 14.986740112304688, 17.659622192382812, -0.9306411743164062, -13.4937744140625, 37.606597900390625, 17.205001831054688, 8.078922271728516, 11.51043701171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000090.npy"}
{"epoch": 0.2647058823529412, "step": 91, "batch_size": 128, "mean": 9.552933692932129, "std": 11.818706512451172, "min": -25.900108337402344, "p10": -4.163093566894531, "median": 8.163262367248535, "p90": 24.234769439697263, "max": 46.510772705078125, "pos_frac": 0.8125, "sample": [10.328201293945312, 3.5997314453125, 11.556268692016602, 19.892375946044922, 31.581649780273438, -0.9607772827148438, -6.3265533447265625, 3.7443981170654297, 5.3239593505859375, 2.3837814331054688, 4.599437713623047, 8.495738983154297, 9.640405654907227, -5.377685546875, -4.62274169921875, 23.89263916015625, 0.140228271484375, 9.09166145324707, 5.314231872558594, 4.709150314331055, 7.613636016845703, 6.205230712890625, 12.05843734741211, -8.780326843261719, 9.039619445800781, 14.289207458496094, 18.303234100341797, 46.510772705078125, 11.689010620117188, 9.470985412597656, 5.767635345458984, -5.629865646362305, 0.09515380859375, 4.54266357421875, 11.179901123046875, 4.753391265869141, 1.2296142578125, -6.937103271484375, -8.2808837890625, 19.105606079101562, -4.2937774658203125, 9.0074462890625, -4.107086181640625, 45.038185119628906, 8.1644287109375, 5.114465713500977, 6.303226470947266, 7.214988708496094, 8.64900016784668, -13.481712341308594, 24.120811462402344, 8.994361877441406, 23.569366455078125, 13.10340690612793, 1.0287017822265625, 7.04876708984375, 0.7249126434326172, 7.494987487792969, 22.897903442382812, 13.072868347167969, 11.912887573242188, 5.71044921875, 10.32861328125, 10.640239715576172, 19.918930053710938, 14.258987426757812, 24.50067138671875, 18.765533447265625, 20.728595733642578, 12.2965087890625, 30.174392700195312, 10.808975219726562, 26.564970016479492, 6.118019104003906, 2.7606773376464844, 0.8309249877929688, 19.913589477539062, 0.4615478515625, 37.1858024597168, 8.16209602355957, -1.05743408203125, 0.49675750732421875, 26.744888305664062, 23.616928100585938, 2.9712295532226562, -2.0351028442382812, 20.196128845214844, 15.540435791015625, -13.147132873535156, -1.3380279541015625, 34.9888916015625, 12.607481002807617, 12.01333999633789, -1.7428703308105469, -6.29266357421875, 4.841693878173828, 25.328628540039062, 10.025766372680664, 11.888534545898438, 14.27618408203125, -1.0070953369140625, -0.7477302551269531, 20.203033447265625, -25.900108337402344, 6.915367126464844, -4.371795654296875, 26.56385040283203, 1.6230316162109375, 6.144704818725586, 4.8257598876953125, 20.351974487304688, 0.6654739379882812, -0.63800048828125, 9.461114883422852, 3.4486961364746094, 11.115974426269531, 17.060653686523438, 21.48894500732422, 15.935810089111328, 34.852630615234375, -2.3401641845703125, 4.064949035644531, 34.853057861328125, 6.733074188232422, 22.030441284179688, 7.561580657958984, -1.5615158081054688, 22.578521728515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000091.npy"}
{"epoch": 0.2676470588235294, "step": 92, "batch_size": 128, "mean": 7.6110639572143555, "std": 9.969470024108887, "min": -11.852523803710938, "p10": -2.9016071319580075, "median": 6.545036315917969, "p90": 19.812689971923827, "max": 57.07429504394531, "pos_frac": 0.7734375, "sample": [11.133705139160156, 18.595958709716797, 1.0522327423095703, -3.8847732543945312, 1.2834720611572266, 9.998527526855469, 23.955703735351562, 15.8525390625, 34.54988098144531, 14.76947021484375, 3.8370361328125, -1.0279655456542969, 16.3287353515625, 7.763965606689453, 26.65386199951172, 9.129096984863281, 2.8564281463623047, 0.18436431884765625, 17.20538330078125, 2.692596435546875, 12.611564636230469, 19.61518096923828, 3.8702354431152344, 25.117198944091797, -1.096405029296875, -2.8082351684570312, 12.711833953857422, 8.652214050292969, 14.179946899414062, 25.251861572265625, -0.16129302978515625, 14.870464324951172, 12.831298828125, -0.635711669921875, -3.087310791015625, 2.1387786865234375, -2.5870628356933594, 2.841663360595703, -3.5605621337890625, 13.436012268066406, -0.7224884033203125, 0.9191570281982422, 15.450241088867188, 5.596591949462891, 2.6720046997070312, -2.9956817626953125, 8.374679565429688, 10.195026397705078, 3.9082489013671875, -2.2008819580078125, 43.321044921875, 4.544158935546875, 7.355918884277344, 13.157119750976562, 6.431173324584961, -4.0681610107421875, 6.7622833251953125, 19.290130615234375, 8.543525695800781, 7.904458999633789, 9.2196044921875, -2.987865447998047, 6.6152496337890625, 8.005973815917969, 6.755859375, -1.4152679443359375, 3.8145294189453125, 9.788799285888672, 2.7097549438476562, 11.149375915527344, -11.852523803710938, 4.326656341552734, 10.591300964355469, 6.906761169433594, 11.362361907958984, 20.675216674804688, 0.122650146484375, 13.984207153320312, -3.0685882568359375, 3.9567947387695312, 20.273544311523438, 20.671592712402344, -0.9378280639648438, 14.866485595703125, 5.338348388671875, -9.123886108398438, 6.474822998046875, 6.7195892333984375, -0.079681396484375, -7.123008728027344, -0.6578960418701172, 5.476081848144531, -4.293430328369141, 6.659568786621094, -2.8646392822265625, 5.150459289550781, 24.090736389160156, 1.300750732421875, 57.07429504394531, 10.679901123046875, 2.223125457763672, 10.864051818847656, 2.9811553955078125, 1.7672576904296875, 8.897579193115234, 6.200315475463867, -2.350627899169922, -7.773456573486328, 10.843994140625, 6.811668395996094, 20.792465209960938, 0.8995361328125, 13.028709411621094, 7.3066558837890625, -0.058990478515625, 1.7242622375488281, 2.539031982421875, 6.179401397705078, -3.8955917358398438, 9.879575729370117, 8.031478881835938, 17.94635009765625, 1.9727821350097656, 6.265907287597656, 11.2052001953125, -2.3086204528808594, 12.659881591796875, 27.66602325439453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000092.npy"}
{"epoch": 0.27058823529411763, "step": 93, "batch_size": 128, "mean": 8.605449676513672, "std": 10.333915710449219, "min": -18.306549072265625, "p10": -2.3723705291748045, "median": 8.00149154663086, "p90": 21.2970645904541, "max": 41.41914367675781, "pos_frac": 0.796875, "sample": [-12.455577850341797, -0.580078125, 1.2130470275878906, 14.873397827148438, 13.07187271118164, 2.3194217681884766, -4.789377212524414, 7.975120544433594, 2.438138961791992, 6.3655548095703125, 14.552879333496094, -7.5779266357421875, -0.8042583465576172, 11.355350494384766, 6.276462554931641, 6.3293609619140625, 17.483661651611328, 14.464035034179688, -11.406944274902344, 1.1153182983398438, -7.096900939941406, 11.932350158691406, 29.97930908203125, 2.5465354919433594, 21.07166290283203, 8.964883804321289, 31.316314697265625, -1.4033050537109375, 3.6454620361328125, 9.992620468139648, -1.5328750610351562, 25.70761489868164, 10.560333251953125, 12.728713989257812, 7.482429504394531, 18.522850036621094, 11.211807250976562, -8.751407623291016, 24.151138305664062, 16.73333740234375, 12.970211029052734, 2.0751113891601562, 1.2306060791015625, 3.8139419555664062, 5.152290344238281, 8.027862548828125, 20.751785278320312, -2.5987548828125, 0.4712677001953125, 1.9923439025878906, -3.6822280883789062, 2.65972900390625, -3.1069564819335938, 2.7421531677246094, 26.599822998046875, 18.234664916992188, 7.746856689453125, 1.9964523315429688, -9.175369262695312, 30.190139770507812, -0.42303466796875, 1.6787567138671875, 3.7062301635742188, -2.275348663330078, 17.723419189453125, 6.33625602722168, 14.208877563476562, 5.256980895996094, 8.63907241821289, 10.548126220703125, 13.963104248046875, 7.582489013671875, 3.951305389404297, -8.006988525390625, 5.188213348388672, -0.06413459777832031, 15.356430053710938, 8.72354507446289, 14.9832763671875, 6.790565490722656, 18.570240020751953, -0.6499385833740234, 0.5436744689941406, 21.823001861572266, 35.37379455566406, 24.319625854492188, 15.659111022949219, 0.6480178833007812, -2.2524185180664062, 2.934295654296875, 15.706771850585938, 25.939334869384766, 12.54425048828125, 17.627708435058594, 6.443817138671875, 12.157222747802734, 17.590499877929688, 9.58620834350586, 8.329269409179688, 13.950424194335938, -1.0149612426757812, -0.17525863647460938, 11.300201416015625, 4.559135437011719, 8.464111328125, 14.488243103027344, 10.21236801147461, 28.959693908691406, 29.75897216796875, 13.74700927734375, 15.847305297851562, -6.760047912597656, 20.274871826171875, -0.15603256225585938, 9.362693786621094, 17.707427978515625, 2.26898193359375, 41.41914367675781, -18.306549072265625, 10.475929260253906, 0.3987579345703125, -0.33431243896484375, 5.466516494750977, 17.804885864257812, 8.813644409179688, 12.319473266601562, 3.81500244140625, 11.9940185546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000093.npy"}
{"epoch": 0.2735294117647059, "step": 94, "batch_size": 128, "mean": 8.562750816345215, "std": 11.728584289550781, "min": -20.751449584960938, "p10": -4.18363800048828, "median": 6.735401153564453, "p90": 22.94702529907226, "max": 49.51446533203125, "pos_frac": 0.828125, "sample": [-20.703895568847656, 29.790325164794922, -0.3102130889892578, 6.755363464355469, -7.318515777587891, 17.898780822753906, 17.696178436279297, 30.500946044921875, 17.384536743164062, 4.79522705078125, 1.7116775512695312, -11.020479202270508, 9.322525024414062, -5.699462890625, 42.025848388671875, -9.696731567382812, 4.455413818359375, -5.873638153076172, -3.8830795288085938, 33.086936950683594, 14.460037231445312, 11.209270477294922, 0.8978729248046875, 10.382209777832031, -20.751449584960938, 6.7154388427734375, 3.0006561279296875, 2.1231155395507812, 3.29425048828125, 14.722747802734375, 13.61079216003418, 20.60028076171875, 12.960014343261719, 11.461952209472656, 6.330635070800781, 1.0463333129882812, 40.02754211425781, 3.8972396850585938, 20.239532470703125, 16.77722930908203, 1.6906051635742188, 14.079231262207031, 30.880706787109375, -6.3361968994140625, 11.983589172363281, -2.807720184326172, -7.0611724853515625, 5.137176513671875, 9.347095489501953, 22.277542114257812, 13.349037170410156, 6.276817321777344, -2.0224609375, 5.308908462524414, 6.887596130371094, -1.8714618682861328, 6.911092758178711, 2.0739669799804688, 1.8414459228515625, 32.38225555419922, 25.429458618164062, 2.0129756927490234, -7.787445068359375, 0.4122943878173828, 17.590789794921875, 12.759040832519531, 18.676170349121094, 2.6237564086914062, 5.7198638916015625, 3.4603271484375, 4.656944274902344, -7.9808349609375, 17.736854553222656, 10.805038452148438, 1.5895214080810547, 0.5666351318359375, -4.884941101074219, 2.5271148681640625, 5.367866516113281, 17.298583984375, 11.561050415039062, 18.926227569580078, 22.293418884277344, 12.615882873535156, 3.5305404663085938, 20.312480926513672, 2.9786853790283203, 9.784294128417969, 14.868244171142578, 24.47210693359375, 1.9431991577148438, 3.8339157104492188, 4.460865020751953, -1.7805233001708984, 16.036800384521484, 13.019302368164062, 19.682281494140625, 9.867074966430664, 0.9345722198486328, 1.8897171020507812, 0.8352813720703125, 13.248359680175781, 14.31256103515625, 10.082176208496094, 11.998367309570312, 7.1192169189453125, 7.667695999145508, 4.211544036865234, 28.185836791992188, 1.8765106201171875, 49.51446533203125, -19.774459838867188, 11.82562255859375, -0.09401702880859375, -2.8762359619140625, 7.919231414794922, 10.547431945800781, 3.5190792083740234, 5.1655731201171875, 1.2065811157226562, 10.114517211914062, 24.488174438476562, 25.60590362548828, 9.272689819335938, 21.469268798828125, -0.06755447387695312, 2.9287261962890625, 1.6679344177246094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000094.npy"}
{"epoch": 0.27647058823529413, "step": 95, "batch_size": 128, "mean": 11.06259822845459, "std": 13.125391960144043, "min": -22.154693603515625, "p10": -1.65630760192871, "median": 7.927227020263672, "p90": 30.349013137817376, "max": 54.77159118652344, "pos_frac": 0.8671875, "sample": [5.625984191894531, 10.620803833007812, -5.8010101318359375, 31.83740234375, 1.2681884765625, -3.5300426483154297, 8.098388671875, -2.220958709716797, 1.9814910888671875, 1.7418384552001953, -1.1505584716796875, -5.206428527832031, 1.1274261474609375, 11.852340698242188, 5.074577331542969, 36.0667610168457, 8.97793197631836, 54.231781005859375, 1.50726318359375, 7.846549987792969, 1.4122638702392578, 43.293609619140625, -5.268291473388672, 8.007904052734375, 4.416839599609375, 25.806655883789062, 15.227636337280273, 1.665506362915039, 14.788728713989258, 26.443923950195312, 6.123384475708008, -0.9389114379882812, 0.5806846618652344, 3.278270721435547, 6.358020782470703, 33.511314392089844, 17.786712646484375, 11.107421875, 22.329328536987305, 13.216995239257812, 17.316329956054688, 6.190452575683594, -3.8365707397460938, 15.08519172668457, 5.43589973449707, 19.553787231445312, 3.5757827758789062, 13.385570526123047, 1.1655807495117188, 15.395130157470703, 18.199813842773438, 19.196762084960938, 6.077571868896484, 29.711132049560547, 23.091064453125, 39.257904052734375, 9.016998291015625, 15.090961456298828, 0.557769775390625, -3.913616180419922, -5.2015533447265625, 2.845794677734375, 0.7050323486328125, 10.062049865722656, -1.4143142700195312, 8.486970901489258, 12.7451171875, 5.446372985839844, 9.897193908691406, 5.084735870361328, 19.81427001953125, 4.141010284423828, 0.8457565307617188, 13.250396728515625, 15.173236846923828, 5.2653961181640625, 0.2200145721435547, 28.163719177246094, -22.154693603515625, 11.491523742675781, 1.4748191833496094, 3.4808483123779297, 20.808502197265625, 5.507667541503906, 12.2742919921875, 0.2179107666015625, -2.258148193359375, 13.4049072265625, -2.6624755859375, 13.240306854248047, -6.2463531494140625, 28.156234741210938, 5.776817321777344, 17.9306583404541, 8.733566284179688, 10.811134338378906, 2.1107521057128906, 37.70452880859375, 36.27372741699219, 1.054727554321289, 21.945844650268555, 6.4051055908203125, 54.77159118652344, 9.928085327148438, 22.474403381347656, 3.15985107421875, 6.949371337890625, 40.97160339355469, 32.278533935546875, 12.558937072753906, 2.9802322387695312, 13.161048889160156, 8.734169006347656, -0.16337203979492188, 6.7782135009765625, 12.805572509765625, -4.188192367553711, 6.043060302734375, 11.306751251220703, 24.361724853515625, 1.014181137084961, 43.523468017578125, 1.4024734497070312, 43.45429992675781, 7.396324157714844, 11.851844787597656, 7.559329986572266, 5.2344207763671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000095.npy"}
{"epoch": 0.27941176470588236, "step": 96, "batch_size": 128, "mean": 11.778440475463867, "std": 11.555212020874023, "min": -16.68304443359375, "p10": -1.552162170410156, "median": 11.206551551818848, "p90": 26.420101928710938, "max": 49.364837646484375, "pos_frac": 0.8515625, "sample": [24.9058837890625, 25.673995971679688, 30.55780029296875, 5.891819000244141, 13.716445922851562, -5.4838409423828125, 18.905826568603516, 21.84661865234375, 2.976734161376953, 5.4353790283203125, 18.469390869140625, 21.297937393188477, 8.791976928710938, 16.177490234375, 9.797637939453125, 17.99578857421875, 6.617774963378906, 5.05072021484375, 11.455680847167969, 8.24407958984375, 4.539268493652344, 12.23727798461914, 30.526084899902344, 1.223245620727539, 6.645088195800781, 28.642074584960938, 4.50677490234375, 15.750648498535156, 9.441093444824219, -1.5103912353515625, 26.636085510253906, 11.147148132324219, 1.3677043914794922, 23.59149169921875, 16.442298889160156, 22.147903442382812, -5.303802490234375, 23.687355041503906, 27.554306030273438, -0.76837158203125, 29.85393524169922, 11.331527709960938, -1.2575626373291016, 46.12867736816406, 10.86798095703125, 8.745986938476562, 26.327537536621094, -7.301700592041016, 3.660104751586914, 22.541610717773438, 16.191818237304688, -1.649627685546875, 9.743682861328125, 41.63337707519531, 3.772378921508789, 0.029682159423828125, 9.52752685546875, 14.585655212402344, 0.7263946533203125, 8.544960021972656, 33.72895812988281, 6.597721099853516, 8.364238739013672, 21.47041130065918, 12.781639099121094, 0.003459930419921875, -3.384979248046875, 4.355167388916016, 25.853893280029297, 29.579376220703125, -1.2179412841796875, -0.5149574279785156, -1.087860107421875, 14.757034301757812, -16.68304443359375, 9.78238296508789, 2.006824493408203, 24.929519653320312, -5.555353164672852, 0.7107143402099609, 30.000274658203125, 16.983978271484375, 13.706195831298828, 4.50996208190918, 14.435302734375, 24.765350341796875, 13.78057861328125, 5.938922882080078, 13.083694458007812, 11.808929443359375, 9.501401901245117, 0.02862548828125, 12.519927978515625, -4.616127014160156, 1.8044548034667969, 17.737468719482422, 3.998291015625, 15.705810546875, 22.421875, 13.59893798828125, -5.926399230957031, 4.27044677734375, 5.3777923583984375, 1.912078857421875, 1.7425994873046875, 17.926002502441406, 16.021224975585938, 49.364837646484375, 5.617496490478516, 31.3050537109375, 13.918647766113281, 11.212884902954102, 8.29461669921875, 16.092041015625, 10.367145538330078, -1.774383544921875, 11.200218200683594, 18.959060668945312, 17.6231689453125, 23.89837646484375, 11.744209289550781, -4.0602874755859375, -3.4563140869140625, 19.573646545410156, 15.775806427001953, 14.681476593017578, -12.197128295898438, 19.152618408203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000096.npy"}
{"epoch": 0.2823529411764706, "step": 97, "batch_size": 128, "mean": 8.682527542114258, "std": 11.11905288696289, "min": -27.6798095703125, "p10": -2.5045625686645505, "median": 7.1399736404418945, "p90": 22.442074584960938, "max": 41.65266418457031, "pos_frac": 0.8203125, "sample": [9.07647705078125, -0.9026203155517578, 8.129745483398438, 9.649856567382812, -27.6798095703125, 0.36315155029296875, 2.013233184814453, 7.17711067199707, 8.351259231567383, 10.320892333984375, 0.169769287109375, 32.88665771484375, -0.9255294799804688, 1.6331100463867188, 2.662656784057617, 2.732696533203125, 6.756988525390625, 4.282722473144531, 11.636955261230469, -14.3465576171875, 7.102836608886719, -10.969635009765625, 0.9747791290283203, 4.739274978637695, 15.47081184387207, 4.498081207275391, 2.870616912841797, 16.040977478027344, 6.847934722900391, 1.3048248291015625, 21.544925689697266, 9.762435913085938, -19.43206787109375, 40.26963806152344, 14.313007354736328, 1.0816173553466797, 12.454414367675781, 6.1321258544921875, 28.75206756591797, 1.3357467651367188, -2.7918148040771484, 14.008781433105469, -0.5875778198242188, -0.6364898681640625, 8.477348327636719, -12.697677612304688, -3.344766616821289, 25.531478881835938, 11.474075317382812, 11.898056030273438, 11.596763610839844, -0.03406524658203125, 14.383857727050781, -10.508081436157227, 15.856513977050781, 14.473987579345703, 13.76611328125, 30.304893493652344, -0.7031059265136719, 8.458898544311523, 22.644737243652344, 3.5293731689453125, 13.675125122070312, 17.79937744140625, 18.074554443359375, -1.0235977172851562, 5.6533203125, 20.33368682861328, 6.091239929199219, 10.313621520996094, 0.10425186157226562, 22.92874526977539, 26.7681884765625, 5.849998474121094, 10.866792678833008, 16.89085578918457, 1.0833740234375, -1.5002918243408203, 2.9375534057617188, 6.986217498779297, 29.805644989013672, 6.040668487548828, 8.501504898071289, 22.366424560546875, 18.652725219726562, 1.5382003784179688, 13.32186508178711, 3.199493408203125, 4.0467681884765625, -3.39971923828125, 12.28128433227539, -2.3814544677734375, 7.192752838134766, 21.297897338867188, 1.077066421508789, 16.896320343017578, -4.091743469238281, 0.45635223388671875, 20.79710578918457, 21.632843017578125, -3.88092041015625, 17.076683044433594, 4.1816558837890625, 1.8193321228027344, 14.677207946777344, 13.058502197265625, 3.1604137420654297, 27.972747802734375, 7.741422653198242, 4.222217559814453, 35.13481140136719, 41.65266418457031, 19.393104553222656, -4.0658721923828125, -6.208915710449219, 10.813102722167969, 5.845094680786133, 22.61859130859375, 5.957916259765625, 4.876007080078125, 15.175525665283203, 18.14143180847168, 17.110748291015625, 6.910697937011719, 10.161396026611328, -0.48012542724609375, 19.512367248535156, 1.5362739562988281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000097.npy"}
{"epoch": 0.2852941176470588, "step": 98, "batch_size": 128, "mean": 10.761238098144531, "std": 12.678398132324219, "min": -18.825347900390625, "p10": -2.7349254608154294, "median": 8.770088195800781, "p90": 28.824417877197266, "max": 58.86968231201172, "pos_frac": 0.8359375, "sample": [12.567571640014648, 7.3378753662109375, 4.241220474243164, 10.28862190246582, 7.546237945556641, 31.736656188964844, 5.25408935546875, 6.8047332763671875, 14.8760986328125, -2.3456802368164062, -2.987110137939453, 39.059234619140625, -3.3377761840820312, 17.463333129882812, 3.6466903686523438, 3.2148284912109375, 14.482000350952148, 23.935623168945312, -12.381301879882812, 13.317863464355469, 0.7618942260742188, 6.301288604736328, 14.173965454101562, 5.0872955322265625, 3.6538467407226562, 7.6582794189453125, 16.150596618652344, 1.21246337890625, 20.491546630859375, 25.238361358642578, 13.121208190917969, 0.9282379150390625, 17.660125732421875, 0.3324718475341797, -4.342948913574219, 7.15673828125, -7.833709716796875, -12.509185791015625, 13.728446960449219, 12.826679229736328, 9.02216911315918, -0.241973876953125, 15.064735412597656, 29.818939208984375, 5.764562606811523, 12.649980545043945, 8.737907409667969, -2.6268463134765625, -1.369598388671875, 22.212509155273438, 10.307823181152344, 17.30577850341797, 36.891746520996094, 23.511016845703125, 3.5491180419921875, 9.179641723632812, 5.095283508300781, 6.459573745727539, 9.424015045166016, 8.438383102416992, 3.678995132446289, 2.9604644775390625, 20.288543701171875, 2.7286338806152344, 37.053741455078125, 28.798538208007812, 22.770416259765625, 28.43453598022461, -2.2094573974609375, 22.28435516357422, 30.832550048828125, -16.8125, 5.843070983886719, 10.21563720703125, -3.25006103515625, 8.372751235961914, 16.27393341064453, -3.9485645294189453, -1.2801513671875, 11.341373443603516, 33.31353759765625, 25.26097869873047, -1.0386810302734375, 9.234039306640625, 7.470359802246094, 5.318305969238281, 3.9878005981445312, 28.884803771972656, 4.129856109619141, -18.825347900390625, -15.256263732910156, 8.067626953125, 46.25623321533203, 10.619071960449219, 58.86968231201172, -5.573387145996094, 29.93108367919922, 11.275711059570312, 4.052253723144531, 11.493419647216797, 25.473068237304688, 31.723182678222656, 10.836349487304688, 3.5517501831054688, 5.784450531005859, 11.983757019042969, 12.979595184326172, 10.507488250732422, 8.802268981933594, 3.845611572265625, 22.467178344726562, 2.717792510986328, 9.90030288696289, 13.306808471679688, 39.361324310302734, -0.4842033386230469, 16.758346557617188, 3.95184326171875, 8.023445129394531, 6.9167022705078125, 0.9991836547851562, 8.283374786376953, -3.2158660888671875, 15.740730285644531, 25.56359100341797, 16.58984375, 16.804443359375, 0.7050266265869141], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000098.npy"}
{"epoch": 0.28823529411764703, "step": 99, "batch_size": 128, "mean": 8.895986557006836, "std": 10.190813064575195, "min": -16.935928344726562, "p10": -2.6945188522338865, "median": 8.02086067199707, "p90": 21.212249183654784, "max": 38.945831298828125, "pos_frac": 0.828125, "sample": [-0.3006401062011719, 20.876426696777344, 26.277416229248047, 0.01786041259765625, 8.545585632324219, 1.8267078399658203, 4.040048599243164, 21.143592834472656, 3.1215667724609375, 29.55157470703125, -10.5394287109375, 0.36520957946777344, 5.3290863037109375, 20.4329833984375, 21.205183029174805, 3.8665008544921875, -2.805339813232422, 12.052993774414062, -4.253448486328125, 16.251312255859375, 14.672325134277344, -1.574432373046875, 4.6823883056640625, -16.935928344726562, 10.140769958496094, 24.595474243164062, -4.022207260131836, 3.1052322387695312, 9.775177001953125, 0.3437538146972656, 18.681617736816406, 8.38840103149414, -0.199737548828125, 7.404243469238281, 32.09041976928711, 12.781906127929688, 2.549518585205078, 18.97857666015625, -7.959815979003906, 8.64251708984375, 12.59499740600586, 2.808042526245117, 1.2319869995117188, 13.405426025390625, 30.046916961669922, 12.306434631347656, 24.03582000732422, 1.7765884399414062, 3.7971668243408203, -13.6500244140625, 0.4346637725830078, 9.568092346191406, 12.11154556274414, 11.266738891601562, 18.509918212890625, 10.711650848388672, 14.361663818359375, 19.800243377685547, 14.190559387207031, 6.546283721923828, -7.829559326171875, -2.5260467529296875, 12.075592041015625, 12.286697387695312, 0.12926483154296875, -3.647735595703125, 6.393001556396484, 33.96855163574219, 6.438301086425781, 11.631828308105469, 5.9300994873046875, 2.7666358947753906, 24.366920471191406, 17.84368133544922, 14.071533203125, -0.4535655975341797, 2.2631187438964844, 2.6874542236328125, 3.5060386657714844, 10.791885375976562, 8.185737609863281, -0.6828079223632812, 19.4849853515625, -0.8067779541015625, -5.836111068725586, 3.4645462036132812, -3.869598388671875, 21.228736877441406, 4.270612716674805, 7.419994354248047, 1.0804405212402344, 12.552200317382812, 16.932350158691406, 31.446544647216797, -0.7718162536621094, 13.782266616821289, 8.073806762695312, 15.167030334472656, 11.233444213867188, 14.762237548828125, -5.091156005859375, 12.475639343261719, 0.30339813232421875, 12.691314697265625, 5.238475799560547, 5.2735443115234375, 6.128984451293945, 2.759124755859375, 5.929531097412109, 20.50939178466797, 10.725914001464844, 13.973201751708984, -8.784088134765625, 32.692283630371094, 15.025398254394531, 9.1612548828125, 7.967914581298828, 18.65357208251953, 7.884803771972656, 1.560546875, 15.2630615234375, 5.422279357910156, -2.647024154663086, 23.062637329101562, 6.143218994140625, 38.945831298828125, 5.461921691894531, 13.143611907958984], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000099.npy"}
{"epoch": 0.2911764705882353, "step": 100, "batch_size": 128, "mean": 9.909294128417969, "std": 11.475228309631348, "min": -16.96881103515625, "p10": -2.680595397949218, "median": 8.801094055175781, "p90": 26.109188079833984, "max": 46.33484649658203, "pos_frac": 0.859375, "sample": [17.30113983154297, 10.216606140136719, -16.96881103515625, -10.719024658203125, 32.633689880371094, 18.973154067993164, 7.2387847900390625, 9.377784729003906, 2.9386253356933594, 1.4730224609375, 19.174591064453125, 6.486152648925781, 4.438697814941406, 7.547447204589844, 14.217453002929688, 33.33251953125, 32.21665954589844, 13.690040588378906, 4.593074798583984, 15.1669921875, 6.2391510009765625, 6.737249374389648, 0.9497489929199219, 12.296577453613281, -5.040946960449219, -5.379570007324219, 3.4465408325195312, 9.435184478759766, 9.51511001586914, 14.853012084960938, 23.249923706054688, 8.365325927734375, -13.082054138183594, 20.971940994262695, 23.402099609375, 12.308448791503906, 0.6843719482421875, 5.996063232421875, 17.235984802246094, 9.318962097167969, 8.754135131835938, 0.7364578247070312, 3.569671630859375, 10.146919250488281, 19.496524810791016, 7.516387939453125, 1.8216400146484375, 13.109527587890625, 1.052734375, 7.676910400390625, 18.10710906982422, 7.6541595458984375, 28.11761474609375, 12.405624389648438, 0.8245258331298828, 6.240266799926758, 4.366701126098633, 12.607452392578125, -0.32041168212890625, 3.63543701171875, 14.466001510620117, 9.54180908203125, 21.159515380859375, 0.849395751953125, 13.876144409179688, 16.898395538330078, 14.181098937988281, 29.73712158203125, 5.552734375, 15.606559753417969, 2.3615264892578125, 11.32383918762207, 12.283096313476562, 29.83978271484375, 1.29925537109375, 23.063392639160156, 2.7919559478759766, 2.6403446197509766, 26.042451858520508, 24.032432556152344, 5.509590148925781, -8.22833251953125, 5.193891525268555, 8.67779541015625, -16.6221923828125, 12.900054931640625, 13.738601684570312, 31.108871459960938, 19.42364501953125, -0.3667182922363281, -0.4782390594482422, 32.64469909667969, 5.390350341796875, 10.714569091796875, 8.255027770996094, 10.339000701904297, 43.602294921875, -3.3394088745117188, 26.268638610839844, 3.5278701782226562, -13.530593872070312, -7.05523681640625, 3.6128578186035156, 21.246231079101562, -5.13323974609375, 8.848052978515625, 12.676887512207031, -1.6522979736328125, 46.33484649658203, 3.0282135009765625, 9.358161926269531, 2.6797847747802734, 3.259632110595703, 12.573661804199219, 5.8723297119140625, 11.74700927734375, 2.1423892974853516, -10.154754638671875, 14.130895614624023, 5.859853744506836, 36.512577056884766, 14.61932373046875, 7.223613739013672, 10.618736267089844, -3.6914138793945312, 26.26490592956543, -2.3982467651367188, 15.237571716308594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000100.npy"}
{"epoch": 0.29411764705882354, "step": 101, "batch_size": 128, "mean": 8.689888000488281, "std": 11.328944206237793, "min": -13.5416259765625, "p10": -3.373769569396972, "median": 7.718583106994629, "p90": 22.70445137023926, "max": 63.97450256347656, "pos_frac": 0.796875, "sample": [16.50292205810547, 5.1226043701171875, 11.800849914550781, -1.0354766845703125, 5.752134323120117, 22.598480224609375, 4.762674331665039, 4.518043518066406, 18.754623413085938, -0.6471786499023438, 10.044906616210938, 9.899826049804688, 34.893653869628906, 11.306167602539062, 6.6201019287109375, 0.20489501953125, 16.003074645996094, 24.729568481445312, -10.837966918945312, 2.8077850341796875, 12.970039367675781, 8.607376098632812, 13.50015640258789, 39.314910888671875, 21.991317749023438, 3.09130859375, 14.236282348632812, -5.579139709472656, 5.413778305053711, 13.548133850097656, 0.4885711669921875, 18.59490966796875, 0.9149303436279297, 8.03924560546875, 0.659881591796875, 12.018157958984375, 4.98077392578125, 3.32672119140625, 33.876251220703125, 11.094339370727539, 9.786506652832031, 18.341402053833008, -11.427906036376953, 16.899520874023438, 11.895538330078125, -1.694509506225586, 10.034683227539062, -3.2494564056396484, 5.16180419921875, 4.73748779296875, 6.899833679199219, 9.835533142089844, -5.526338577270508, 18.964797973632812, 1.7478485107421875, 5.615989685058594, 63.97450256347656, 18.202049255371094, -1.4906806945800781, -9.600921630859375, 16.156539916992188, 17.19146728515625, 12.839019775390625, 3.5177650451660156, 25.698638916015625, 0.2114582061767578, -8.289653778076172, 8.271194458007812, 2.333660125732422, 5.2256622314453125, 9.73358154296875, -2.178619384765625, -8.506591796875, 6.848480224609375, 13.435638427734375, 12.447639465332031, 8.794746398925781, 19.16028594970703, -3.8576622009277344, 7.402687072753906, 13.615669250488281, 9.061153411865234, -2.963775634765625, -4.809226989746094, 1.5770339965820312, 10.132904052734375, 15.7728271484375, 27.028564453125, 6.854866027832031, 15.706634521484375, 4.211427688598633, 5.486305236816406, -6.5551910400390625, -2.5395660400390625, 17.403228759765625, 8.612831115722656, 25.460708618164062, 12.569568634033203, 1.7600669860839844, -13.5416259765625, 10.958587646484375, 26.320220947265625, 11.61773681640625, 2.2477874755859375, -3.6638336181640625, -1.3368892669677734, 42.36003112792969, -1.3309612274169922, 8.395788192749023, 13.048606872558594, 2.3455352783203125, 23.183807373046875, 11.950897216796875, -4.486263275146484, -1.3900375366210938, 24.989303588867188, 6.1876220703125, 22.951717376708984, 8.741451263427734, 0.2553253173828125, 8.034479141235352, -1.6274948120117188, 4.242160797119141, 6.380388259887695, 15.369911193847656, -0.7569465637207031, 1.4882278442382812, 0.5509262084960938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000101.npy"}
{"epoch": 0.29705882352941176, "step": 102, "batch_size": 128, "mean": 9.18067741394043, "std": 10.513472557067871, "min": -21.060791015625, "p10": -3.4665397644042963, "median": 8.604156494140625, "p90": 21.312948989868165, "max": 54.25054931640625, "pos_frac": 0.828125, "sample": [0.0877227783203125, 12.355438232421875, 16.783248901367188, 6.7124786376953125, -2.010618209838867, 30.7615966796875, 7.184162139892578, 16.606857299804688, 14.610984802246094, -3.963287353515625, 16.380584716796875, 7.810451507568359, 27.27021026611328, 22.060028076171875, 5.6454315185546875, 6.661502838134766, 17.345977783203125, 6.4417266845703125, 8.843109130859375, 20.069549560546875, 13.812797546386719, 10.330377578735352, 8.905181884765625, 5.628746032714844, 0.28055381774902344, 4.010463714599609, 14.540746688842773, -1.3741378784179688, 15.48968505859375, 11.98243522644043, 7.818565368652344, -7.484222412109375, 2.6569900512695312, 10.917144775390625, 5.1168060302734375, 29.314565658569336, 13.068382263183594, 25.830711364746094, -6.682268142700195, 2.39349365234375, 20.356979370117188, 3.5750045776367188, 20.017974853515625, 8.550525665283203, -9.0826416015625, 10.357810974121094, -8.178237915039062, 8.747322082519531, 1.8054656982421875, 7.501792907714844, 14.216175079345703, 13.174129486083984, 0.8972434997558594, 5.352592468261719, -3.85272216796875, 1.9026546478271484, 12.743810653686523, 18.050872802734375, 31.7122802734375, 29.013534545898438, 6.731025695800781, 10.8531494140625, 5.129524230957031, 8.079158782958984, 1.9956207275390625, 12.383852005004883, 0.8768539428710938, 9.649499893188477, 21.487262725830078, 8.657787322998047, 6.54962158203125, 10.199638366699219, -21.060791015625, 18.778453826904297, 11.273544311523438, 11.200361251831055, 22.087982177734375, -4.4892730712890625, -0.4705352783203125, 1.804616928100586, -0.375579833984375, 1.261880874633789, 4.813720703125, 18.273208618164062, 9.661178588867188, 1.5499725341796875, 27.366477966308594, -4.750946044921875, 24.771018981933594, 4.857879638671875, -5.396186828613281, 18.25836944580078, -11.180908203125, -1.1714305877685547, 13.996284484863281, 15.826339721679688, 8.258995056152344, -14.914848327636719, -3.3010330200195312, 6.391338348388672, 6.21673583984375, 21.238243103027344, 13.454071044921875, 7.966194152832031, -0.92327880859375, -0.126678466796875, 8.530487060546875, 29.77859115600586, 14.801334381103516, 0.025604248046875, 54.25054931640625, 3.6894073486328125, 18.62055206298828, 16.306541442871094, 5.990814208984375, 12.085563659667969, 19.52093505859375, 1.3718147277832031, 13.254745483398438, 14.410652160644531, 10.8331298828125, 9.82826042175293, 6.359897613525391, 17.57940673828125, -6.8494873046875, 17.900117874145508, 12.110164642333984, -0.08746337890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000102.npy"}
{"epoch": 0.3, "step": 103, "batch_size": 128, "mean": 10.268743515014648, "std": 10.237410545349121, "min": -17.71087646484375, "p10": -1.77225227355957, "median": 9.394794464111328, "p90": 23.923368072509763, "max": 45.51689910888672, "pos_frac": 0.859375, "sample": [0.5153217315673828, 13.398605346679688, 8.614486694335938, 35.61607360839844, 7.975872039794922, 7.727870941162109, -0.5815410614013672, 13.656768798828125, -1.7041130065917969, 16.861572265625, 27.22589874267578, 10.955814361572266, 21.045915603637695, 12.373443603515625, 4.028839111328125, 16.318359375, 18.430755615234375, 12.787700653076172, 10.643726348876953, 14.282730102539062, 41.003692626953125, 1.143035888671875, 9.771087646484375, 13.712860107421875, 38.501495361328125, 9.43341064453125, 14.005329132080078, 23.79279327392578, 10.078655242919922, 4.412117004394531, 13.975212097167969, 21.523208618164062, -1.5211029052734375, 10.969745635986328, 3.8623199462890625, -5.80126953125, 3.66839599609375, 9.542024612426758, -4.826786041259766, 20.329544067382812, 9.889671325683594, 11.210220336914062, 20.2576904296875, 6.9646148681640625, 8.037738800048828, 5.90728759765625, -2.5597362518310547, 10.243949890136719, -17.71087646484375, 17.25940704345703, -8.672931671142578, 10.116409301757812, 9.54940414428711, 2.8039188385009766, -1.931243896484375, 16.148990631103516, 5.975063323974609, 4.819419860839844, 3.7104415893554688, -4.279945373535156, 0.16982078552246094, 21.4920654296875, 1.354217529296875, 16.60888671875, 4.175376892089844, 8.503448486328125, 8.185680389404297, 8.379920959472656, 45.51689910888672, 26.77893829345703, 23.771198272705078, 16.146621704101562, 13.706571578979492, 6.7804412841796875, 10.376396179199219, 7.0117645263671875, 6.672698974609375, -6.834510803222656, 24.39010238647461, 8.999221801757812, 24.5980167388916, 24.228042602539062, 8.56157112121582, 16.297569274902344, 11.939102172851562, -5.729896545410156, 28.145790100097656, 7.189002990722656, 18.191001892089844, 1.8177108764648438, -3.6831016540527344, 7.550102233886719, 13.64404296875, -0.7891426086425781, 28.562583923339844, 21.771583557128906, 10.195999145507812, 0.63531494140625, 13.583826065063477, 4.3056793212890625, 9.356178283691406, 6.759284973144531, -2.97149658203125, 4.3264617919921875, 6.950416564941406, 9.561790466308594, -0.8478317260742188, 17.43830108642578, 5.333099365234375, 3.4467735290527344, 27.307846069335938, 18.617355346679688, 0.12880706787109375, 17.4525146484375, -2.287992477416992, 7.857826232910156, 5.611597061157227, -2.2935333251953125, 3.8729629516601562, 15.677173614501953, 5.510187149047852, 14.182798385620117, 4.764945983886719, 26.458099365234375, 1.5158843994140625, 1.9646282196044922, 16.911666870117188, 13.103511810302734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000103.npy"}
{"epoch": 0.3029411764705882, "step": 104, "batch_size": 128, "mean": 11.07557201385498, "std": 11.821821212768555, "min": -14.925294876098633, "p10": -1.5203176498413085, "median": 9.21364974975586, "p90": 25.73943481445312, "max": 47.267333984375, "pos_frac": 0.8515625, "sample": [8.203834533691406, 0.7609710693359375, 15.117862701416016, 5.968055725097656, 14.53143310546875, -2.043163299560547, 44.53984832763672, 1.8939208984375, 10.51405143737793, 22.4798583984375, 5.898719787597656, 4.546791076660156, 24.186676025390625, 41.20207595825195, 7.0329132080078125, 20.73155975341797, 4.1696319580078125, 2.2300643920898438, 17.056785583496094, 32.8448486328125, 10.918365478515625, 11.848556518554688, 4.6876983642578125, 18.191207885742188, 30.680328369140625, 15.98443603515625, 6.349809646606445, 47.267333984375, -14.925294876098633, 10.580680847167969, 30.787765502929688, 9.11065673828125, -3.834178924560547, -1.431915283203125, 33.21295166015625, -1.6291732788085938, 22.815414428710938, -4.2424468994140625, 6.290771484375, 12.229528427124023, 5.008281707763672, -0.849273681640625, 17.154808044433594, 21.658065795898438, 1.20111083984375, 4.7748870849609375, 24.120101928710938, 21.539710998535156, 16.763992309570312, 32.76416778564453, 5.1331939697265625, 25.11579132080078, 13.790313720703125, 3.906005859375, 7.4166717529296875, 27.194602966308594, 3.3988399505615234, 3.7694091796875, -0.10825347900390625, 40.24891662597656, -12.024368286132812, 37.750465393066406, 28.80157470703125, 6.988758087158203, 4.52874755859375, 1.8832778930664062, -2.8484878540039062, 9.61661148071289, 4.645881652832031, 0.8298797607421875, 10.826919555664062, 2.839628219604492, 10.550064086914062, 9.316642761230469, 34.81413269042969, 14.036251068115234, 6.2799072265625, 22.980979919433594, 13.45849609375, 6.844882965087891, 2.4997711181640625, 10.692291259765625, 15.880962371826172, 21.021896362304688, 25.07183837890625, 0.9971504211425781, 21.96288299560547, 12.63800048828125, -6.130134582519531, 18.805381774902344, 1.2722320556640625, 16.249130249023438, 22.545448303222656, 11.500640869140625, 6.63018798828125, -2.2578659057617188, 19.846004486083984, 10.982803344726562, 16.564430236816406, -1.4736652374267578, 2.8655929565429688, 0.43326568603515625, 19.599388122558594, 12.534271240234375, -0.01647186279296875, 0.727020263671875, 13.230926513671875, -3.9887847900390625, -9.882064819335938, 20.5872802734375, 0.12057113647460938, 8.428268432617188, 10.983123779296875, 1.8068504333496094, 6.881187438964844, 14.239105224609375, 2.1774768829345703, 20.20366668701172, 14.81161117553711, -1.2647552490234375, 1.1196365356445312, 21.77251434326172, 9.779373168945312, -3.502216339111328, 8.059398651123047, 0.8309040069580078, 8.603439331054688, -1.6476211547851562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000104.npy"}
{"epoch": 0.3058823529411765, "step": 105, "batch_size": 128, "mean": 9.054426193237305, "std": 11.084685325622559, "min": -15.896263122558594, "p10": -3.247976875305176, "median": 8.323980331420898, "p90": 22.537708091735833, "max": 43.96395492553711, "pos_frac": 0.8203125, "sample": [28.816574096679688, 9.223587036132812, 31.256790161132812, -0.1289520263671875, 19.33574676513672, 24.436607360839844, 21.361061096191406, 8.322063446044922, 27.641521453857422, -9.305030822753906, 4.63848876953125, 12.372472763061523, 43.96395492553711, 9.4898681640625, -0.1253032684326172, -10.600257873535156, -11.260505676269531, 17.16547393798828, 6.307872772216797, 11.456451416015625, -9.867551803588867, 10.8668212890625, 33.16452407836914, 21.55535888671875, 15.737678527832031, 19.56129264831543, 8.625785827636719, 6.794683456420898, 10.322998046875, -15.896263122558594, 13.268775939941406, 12.06463623046875, 14.451316833496094, -3.3162841796875, -0.4737358093261719, 2.514829635620117, 10.761455535888672, -4.930839538574219, 18.625595092773438, 20.688858032226562, 11.966812133789062, -12.501556396484375, -0.2431621551513672, 5.673458099365234, 9.368904113769531, 1.2526397705078125, 14.568778991699219, 12.222015380859375, -0.5821723937988281, 9.879783630371094, 1.821157455444336, -2.7197418212890625, -8.182960510253906, 3.1098785400390625, 14.17401123046875, 8.3311767578125, 7.707117080688477, 5.490232467651367, 1.5406646728515625, -6.330474853515625, 9.233184814453125, 25.580116271972656, 6.4244384765625, 13.584640502929688, 4.4750823974609375, -11.520584106445312, -1.0787200927734375, 19.838653564453125, 2.7604751586914062, 17.05157470703125, 9.089073181152344, -3.2187023162841797, 12.080036163330078, 6.4368743896484375, 1.916107177734375, 7.481536865234375, 9.449493408203125, 6.3438568115234375, 10.26063346862793, 5.324798583984375, 17.695358276367188, 16.918899536132812, 3.727691650390625, -2.673797607421875, 2.700836181640625, 6.136665344238281, 1.7016773223876953, 8.938655853271484, 10.512763977050781, 5.174350738525391, 32.20972442626953, 43.223602294921875, 21.101966857910156, 21.723894119262695, 13.806045532226562, 2.370258331298828, 10.70513916015625, -12.843090057373047, 26.2978515625, -4.336418151855469, 3.356058120727539, -0.4591522216796875, 18.133136749267578, 12.647815704345703, 7.738945007324219, 1.1907997131347656, 1.6571598052978516, 8.325897216796875, 14.332420349121094, 28.18890380859375, 6.2953338623046875, 11.73773193359375, 11.083707809448242, 19.722915649414062, 9.40966796875, 4.27923583984375, 1.8756237030029297, 8.12506103515625, 36.089195251464844, 8.16668701171875, 6.138256072998047, 1.812734603881836, 3.451549530029297, 16.45543670654297, 3.4254531860351562, 1.7158279418945312, 3.230499267578125, 28.800025939941406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000105.npy"}
{"epoch": 0.3088235294117647, "step": 106, "batch_size": 128, "mean": 10.86612319946289, "std": 12.038691520690918, "min": -15.918060302734375, "p10": -3.467622375488281, "median": 8.337030410766602, "p90": 26.485469818115234, "max": 50.361083984375, "pos_frac": 0.84375, "sample": [8.23150634765625, -1.2391395568847656, 25.03375244140625, 7.444267272949219, 11.252166748046875, 7.995597839355469, 16.276084899902344, 17.21368408203125, 24.908714294433594, -7.6199951171875, 12.184135437011719, 2.9341049194335938, 4.096235275268555, 0.03371429443359375, 10.995735168457031, 13.47140884399414, -15.918060302734375, 22.528114318847656, 9.77301025390625, 18.88636016845703, 2.594207763671875, 19.663219451904297, 7.956699371337891, 13.046279907226562, -8.378337860107422, 41.830406188964844, -12.48095703125, 12.350957870483398, 5.669342041015625, -4.36469841003418, 17.018722534179688, 20.225601196289062, 22.472427368164062, -1.4904632568359375, 36.64398956298828, -5.521610260009766, 4.7527008056640625, 5.269866943359375, 15.358650207519531, 16.938400268554688, 5.087471008300781, 0.68701171875, 1.2034378051757812, 43.865875244140625, 4.1473236083984375, 8.04449462890625, 28.02008056640625, 13.635543823242188, 14.491764068603516, 20.662857055664062, -4.629432678222656, 0.9937496185302734, 2.7125892639160156, 8.442554473876953, 3.824617385864258, 5.050575256347656, 9.193260192871094, 7.509510040283203, -2.352510452270508, 12.70700454711914, 2.877410888671875, 10.357376098632812, 2.4891815185546875, -4.671165466308594, 2.0021820068359375, 7.317420959472656, 38.9771728515625, 21.278274536132812, 1.6846275329589844, 7.594551086425781, 7.05908203125, 3.2767257690429688, 27.757659912109375, 29.957149505615234, 19.228370666503906, 18.59320068359375, 7.21978759765625, -5.398902893066406, 2.914562225341797, 28.481468200683594, 25.858963012695312, 12.915756225585938, 4.477039337158203, -6.147117614746094, 26.690948486328125, 18.66248321533203, 6.993961334228516, 50.361083984375, 23.132064819335938, 6.120159149169922, 5.636722564697266, 20.08319091796875, 27.735137939453125, 6.5366668701171875, 6.559490203857422, 3.537843704223633, 6.069568634033203, 5.604602813720703, 14.729080200195312, 19.81561279296875, 2.2971572875976562, -3.4279022216796875, -15.441070556640625, 24.81078338623047, 3.9684696197509766, 13.030113220214844, 38.04306411743164, -2.5203075408935547, 18.343570709228516, -4.228702545166016, 10.686309814453125, 18.615360260009766, 15.220848083496094, 5.1445465087890625, 8.863250732421875, 11.899513244628906, 7.67864990234375, 12.045486450195312, -2.877992630004883, -0.5606479644775391, 24.464431762695312, 8.859792709350586, 22.445308685302734, 26.39740753173828, 10.123313903808594, 17.009475708007812, -3.560302734375, 27.8297119140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000106.npy"}
{"epoch": 0.31176470588235294, "step": 107, "batch_size": 128, "mean": 10.529056549072266, "std": 12.86307144165039, "min": -32.79582595825195, "p10": -2.253973007202148, "median": 9.027926445007324, "p90": 27.99126739501953, "max": 49.7950439453125, "pos_frac": 0.8515625, "sample": [49.7950439453125, 6.1628265380859375, 10.079483032226562, 1.9859580993652344, 4.3074493408203125, 4.472541809082031, 1.5689811706542969, 23.18010711669922, 0.9090900421142578, 12.371479034423828, 23.442867279052734, 28.431095123291016, 45.23112487792969, 21.294113159179688, 10.105802536010742, 47.73583984375, 2.210723876953125, 16.7841796875, 6.1011810302734375, 3.401275634765625, 15.444442749023438, 33.43175506591797, 16.71912384033203, 14.866134643554688, 19.30584716796875, 14.5023193359375, 24.06053924560547, 28.38709259033203, 13.157516479492188, 2.3436813354492188, -3.2693252563476562, -0.2176036834716797, -26.624755859375, 0.46734619140625, -0.2741069793701172, 23.42181396484375, 9.247299194335938, -10.952295303344727, 2.945587158203125, 18.75501251220703, 15.335342407226562, 28.23590850830078, 9.2843017578125, 5.764108657836914, 8.537162780761719, -2.4336166381835938, 1.8156356811523438, 9.236343383789062, 10.927810668945312, 3.0479087829589844, 27.725914001464844, 0.152496337890625, 10.001937866210938, 18.80266571044922, 26.41513442993164, 27.63287353515625, 12.8194580078125, -0.0658416748046875, 1.6331367492675781, 5.485099792480469, 10.833198547363281, 12.566295623779297, 27.88642120361328, 12.041065216064453, 6.017059326171875, 16.845840454101562, -3.4254226684570312, 2.4742507934570312, 12.463798522949219, -4.078035354614258, -2.176982879638672, -13.478317260742188, 6.039794921875, 0.33269309997558594, 9.56732177734375, 25.566509246826172, -4.055629730224609, 7.4327392578125, 14.601192474365234, 8.49932861328125, 7.633636474609375, 29.913414001464844, 0.25643157958984375, 30.41485595703125, 32.00208282470703, 4.4781646728515625, 2.4633255004882812, 30.933128356933594, 18.343353271484375, 9.706939697265625, 5.9740142822265625, -4.4929656982421875, -0.2530784606933594, 6.77946662902832, 0.9848785400390625, 1.5659027099609375, 20.6329345703125, 2.96783447265625, 15.369239807128906, 7.83929443359375, 9.531892776489258, -0.9706573486328125, 12.9498291015625, 5.026451110839844, 11.085617065429688, 8.819509506225586, -5.195915222167969, 4.481355667114258, 5.162933349609375, 0.06479072570800781, 22.690101623535156, 10.023193359375, 12.686885833740234, 13.177825927734375, 19.262313842773438, 33.68316650390625, -32.79582595825195, 34.855865478515625, 3.188913345336914, 11.944114685058594, 7.572803497314453, 27.078643798828125, 5.579597473144531, 0.56640625, 8.280036926269531, 25.511444091796875, -8.992437362670898, -2.6579132080078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000107.npy"}
{"epoch": 0.31470588235294117, "step": 108, "batch_size": 128, "mean": 10.715563774108887, "std": 11.354247093200684, "min": -9.530677795410156, "p10": -2.0951442718505855, "median": 8.593841552734375, "p90": 26.44585380554199, "max": 48.526214599609375, "pos_frac": 0.8125, "sample": [6.320518493652344, 25.74066162109375, 20.22087860107422, 8.281333923339844, -3.472381591796875, 15.321121215820312, -0.08359527587890625, 21.398880004882812, 16.85887908935547, 9.970527648925781, 32.43351745605469, 14.713537216186523, -5.469383239746094, 13.70953369140625, 20.940643310546875, -6.0145263671875, 6.417675018310547, -4.268665313720703, 26.147075653076172, 1.5955314636230469, -0.45713043212890625, 12.028900146484375, 28.308029174804688, 22.365081787109375, 3.3244857788085938, 14.684295654296875, -5.438743591308594, -3.6707324981689453, 30.045578002929688, 6.673421859741211, 6.615203857421875, -7.211639404296875, 19.45207977294922, 11.014154434204102, 26.828460693359375, 6.970670700073242, 4.0717315673828125, 6.160346984863281, 22.852279663085938, -0.4320220947265625, -1.8652839660644531, 7.075786590576172, -0.8234176635742188, 6.8818817138671875, 25.75274658203125, 8.304821014404297, 3.2739505767822266, 3.687349319458008, 3.0417938232421875, 13.497909545898438, 13.184745788574219, 14.526016235351562, 26.910919189453125, 12.11480712890625, 7.046527862548828, -3.20611572265625, 16.712669372558594, 4.74530029296875, 0.7705001831054688, 9.563749313354492, 8.02816390991211, 5.005863189697266, -2.2719974517822266, 5.431060791015625, 20.222610473632812, 10.940216064453125, -9.530677795410156, 17.444419860839844, 22.859817504882812, 9.437026977539062, 1.265777587890625, 11.540351867675781, -1.909841537475586, 25.34795379638672, 11.057548522949219, 1.6715087890625, 1.1826171875, 27.948959350585938, 3.489002227783203, -1.31243896484375, 12.612411499023438, 0.5139064788818359, 27.298309326171875, 9.283195495605469, -8.948478698730469, 26.281879425048828, -3.0893211364746094, 0.9048004150390625, 4.209083557128906, 7.682403564453125, 23.082595825195312, 8.882862091064453, 1.3561019897460938, -1.055938720703125, 6.348825454711914, 32.95022964477539, 21.535173416137695, -0.9403305053710938, 48.526214599609375, 1.800079345703125, 19.35586929321289, -1.736175537109375, 2.561359405517578, 14.56195068359375, 27.40538787841797, 28.915138244628906, 4.372978210449219, 25.60291290283203, 41.670440673828125, 15.038257598876953, 3.1015853881835938, 13.03603744506836, 2.5958423614501953, 25.07944107055664, 6.169370651245117, 2.2398719787597656, 8.218210220336914, 14.27998161315918, 9.199777603149414, 34.66123962402344, 13.210216522216797, 15.439918518066406, 25.991310119628906, 15.445556640625, 12.052520751953125, -6.346418380737305, -2.019350051879883, 18.264076232910156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000108.npy"}
{"epoch": 0.3176470588235294, "step": 109, "batch_size": 128, "mean": 10.9865140914917, "std": 11.82139778137207, "min": -11.266021728515625, "p10": -2.3350154876708977, "median": 8.947883605957031, "p90": 25.431065177917482, "max": 56.374053955078125, "pos_frac": 0.859375, "sample": [15.45025634765625, 7.602027893066406, -5.856010437011719, 2.6608428955078125, 5.08995246887207, 0.5539321899414062, -2.914714813232422, 30.3543701171875, 6.341306686401367, -7.121673583984375, 23.518089294433594, 56.374053955078125, 1.4748210906982422, 8.429058074951172, 16.47777557373047, 11.4356689453125, -7.147686004638672, 4.191001892089844, 21.56134033203125, 12.956966400146484, 14.429656982421875, 19.077377319335938, -11.161087036132812, 25.134368896484375, 16.699466705322266, 8.800882339477539, 8.98651123046875, 0.9966354370117188, 25.211898803710938, 6.039735794067383, 17.694393157958984, 1.8205890655517578, 6.4038848876953125, 17.521148681640625, 0.2697601318359375, 40.655067443847656, 10.685661315917969, 15.432659149169922, 1.1906013488769531, 17.38671875, 26.58251953125, 6.7654266357421875, 14.156723022460938, 3.595949172973633, 12.74554443359375, 21.792383193969727, 14.4254150390625, 4.321537017822266, 25.533607482910156, -0.269073486328125, 12.012435913085938, 17.831314086914062, 8.974761962890625, 29.9071044921875, -2.094757080078125, 20.29187774658203, 12.02273178100586, 21.31727409362793, 7.618705749511719, 7.7384796142578125, 1.3961677551269531, 15.04278564453125, 7.90338134765625, -0.11167716979980469, 13.934822082519531, 7.308128356933594, 15.031190872192383, 10.700355529785156, 35.647621154785156, 7.297325134277344, 38.102081298828125, 1.9877777099609375, 13.116935729980469, 6.840375900268555, 4.552398681640625, 15.797096252441406, 4.522003173828125, -1.915679931640625, 2.3302154541015625, 25.198654174804688, 26.171051025390625, 8.014259338378906, 30.6243896484375, 11.00975227355957, 3.5679874420166016, -2.895618438720703, 21.77462387084961, 1.7057571411132812, 11.437217712402344, 3.0185775756835938, -4.150299072265625, 11.955326080322266, 25.44707489013672, 2.7737159729003906, 4.822023391723633, 15.297210693359375, 4.914045333862305, 12.338142395019531, 25.424203872680664, 2.5257644653320312, 6.887580871582031, 22.164260864257812, 19.483787536621094, 5.618194580078125, 10.227832794189453, -7.72271728515625, 19.715850830078125, 8.921005249023438, 16.300857543945312, 21.13857650756836, 7.988674163818359, 13.0667724609375, 6.053794860839844, 1.0016708374023438, 17.741104125976562, -8.679214477539062, -4.1646575927734375, -5.352470397949219, 44.634857177734375, 16.893798828125, 41.66377258300781, 3.5006866455078125, 3.9835777282714844, -7.066986083984375, 0.5347900390625, -11.266021728515625, -1.4276008605957031, 13.997611999511719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000109.npy"}
{"epoch": 0.3205882352941177, "step": 110, "batch_size": 128, "mean": 8.555511474609375, "std": 8.738350868225098, "min": -16.914424896240234, "p10": -1.4392974853515623, "median": 7.979618072509766, "p90": 18.42680320739746, "max": 39.934471130371094, "pos_frac": 0.8515625, "sample": [31.235260009765625, 1.0398788452148438, 7.837453842163086, 24.13561248779297, 3.757688522338867, 16.528823852539062, -2.767822265625, 2.935098648071289, 2.2088966369628906, 22.260894775390625, 8.65386962890625, -6.183982849121094, 16.034942626953125, 10.306716918945312, -9.878286361694336, 2.2596206665039062, 6.7373199462890625, -1.5236625671386719, -4.122535705566406, 24.982566833496094, 10.788200378417969, 2.64178466796875, 11.954376220703125, 7.367218017578125, 3.021808624267578, 19.50750732421875, -0.325469970703125, 13.439254760742188, 3.5486068725585938, -5.0415802001953125, -2.923065185546875, 18.251476287841797, 7.991607666015625, 24.75482177734375, 1.24462890625, 3.2191543579101562, 12.300605773925781, 25.7276611328125, 39.934471130371094, 9.216316223144531, 16.99481201171875, 3.69378662109375, 1.78302001953125, 2.9082565307617188, 16.362831115722656, 18.835899353027344, 10.182706832885742, 13.426712036132812, 25.487300872802734, 0.8565216064453125, 5.961265563964844, 5.109832763671875, 7.967628479003906, 10.814517974853516, 7.123680114746094, 3.986865997314453, 10.949031829833984, 12.457113265991211, 9.120155334472656, -0.93768310546875, 6.45513916015625, 6.377799987792969, 6.855894088745117, 1.6699104309082031, 7.030845642089844, 3.3185176849365234, 11.527326583862305, -4.408374786376953, 12.315195083618164, 18.231307983398438, 16.851531982421875, -0.9287261962890625, 26.55756378173828, 9.189506530761719, 17.06633758544922, 17.844202041625977, 17.787139892578125, 2.4223365783691406, 1.4847564697265625, 6.6392364501953125, 1.41021728515625, -0.6053619384765625, 12.905609130859375, 13.971170425415039, 6.2408447265625, 11.105484008789062, -1.4031410217285156, 7.44879150390625, 3.3081893920898438, 7.1005859375, 5.347496032714844, 11.878719329833984, 15.8375244140625, 8.107879638671875, -16.914424896240234, 20.42310333251953, 5.400321960449219, 14.043354034423828, 10.13040542602539, 14.061904907226562, 17.908340454101562, 9.202533721923828, -1.8099441528320312, 5.397369384765625, -3.084808349609375, 12.464496612548828, 2.7542762756347656, 8.887588500976562, 11.894695281982422, 15.320711135864258, 13.425628662109375, 9.559494018554688, 2.8853836059570312, 2.8192138671875, 1.566497802734375, 9.5074462890625, 1.1805305480957031, -0.6039543151855469, 15.514053344726562, 8.137191772460938, 9.128284454345703, 17.077735900878906, 28.009002685546875, 11.609588623046875, 5.6542510986328125, 9.647933959960938, -4.7930908203125, -4.383033752441406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000110.npy"}
{"epoch": 0.3235294117647059, "step": 111, "batch_size": 128, "mean": 12.166679382324219, "std": 12.534388542175293, "min": -22.24230194091797, "p10": -1.2769947052001946, "median": 11.833949089050293, "p90": 26.576537704467768, "max": 48.080291748046875, "pos_frac": 0.875, "sample": [6.451879501342773, 5.362083435058594, 25.5074462890625, 3.5786094665527344, 15.66592788696289, 33.98594665527344, 6.716167449951172, 28.07763671875, 7.79925537109375, 34.550537109375, -19.63935089111328, 26.026947021484375, 18.05474090576172, 16.165176391601562, 22.02729034423828, 11.654380798339844, 3.4684677124023438, 10.575912475585938, 20.58557891845703, 25.20950698852539, 7.484870910644531, -5.541160583496094, 24.3970947265625, 15.655136108398438, 20.57843017578125, 23.236953735351562, 4.572336196899414, -9.582572937011719, 5.4636993408203125, 13.381851196289062, 1.8695068359375, 6.173377990722656, 29.101402282714844, 26.09952163696289, 40.096458435058594, -1.7297019958496094, 21.79712677001953, 7.888275146484375, 16.234386444091797, 19.35559844970703, 13.538276672363281, 20.489131927490234, 21.28689193725586, -5.6998443603515625, 11.33824348449707, -6.752044677734375, 15.322761535644531, 14.490798950195312, -1.082977294921875, 27.6895751953125, 20.263214111328125, 12.438201904296875, 10.993026733398438, 13.04788589477539, 9.494461059570312, 41.995201110839844, 10.179040908813477, 13.591598510742188, 7.214750289916992, 14.000213623046875, 17.282058715820312, 2.929372787475586, 32.352569580078125, 13.356796264648438, 3.2325439453125, 22.85076904296875, 1.2751998901367188, 7.335454940795898, 4.197563171386719, 18.28797149658203, 1.4638671875, 8.091590881347656, -1.7301025390625, 4.3153839111328125, 19.333404541015625, 11.915937423706055, 22.20258331298828, 7.948581695556641, -21.519805908203125, 14.463119506835938, 33.3194580078125, 1.4631690979003906, 17.788375854492188, 24.380401611328125, 5.492490768432617, 10.496135711669922, -22.24230194091797, 18.96112060546875, 2.7289352416992188, 8.712928771972656, -3.606475830078125, 24.144372940063477, 37.15406036376953, 18.624103546142578, 1.066375732421875, 2.4795074462890625, 8.340606689453125, 5.5032958984375, 2.0194320678710938, 6.352497100830078, 48.080291748046875, 14.432329177856445, 8.317619323730469, 13.961624145507812, -0.9268875122070312, 17.16530990600586, 41.189064025878906, 8.301074981689453, 23.81812286376953, 0.8238029479980469, 11.751960754394531, -0.5686225891113281, 0.7576141357421875, 7.247505187988281, -9.73173713684082, -9.420913696289062, 15.797492980957031, 19.45450210571289, 16.146976470947266, 23.734237670898438, 10.330093383789062, -15.738571166992188, 12.329299926757812, 5.051361083984375, 19.17816162109375, 15.138946533203125, 27.895767211914062, 7.859989166259766], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000111.npy"}
{"epoch": 0.3264705882352941, "step": 112, "batch_size": 128, "mean": 10.860298156738281, "std": 11.73920726776123, "min": -17.838178634643555, "p10": -1.576962280273437, "median": 9.716330528259277, "p90": 28.136683654785156, "max": 46.37687301635742, "pos_frac": 0.8359375, "sample": [13.689796447753906, -14.462120056152344, 29.119659423828125, 1.0514450073242188, 3.2137298583984375, 6.3936004638671875, 15.194900512695312, 5.577869415283203, 7.181007385253906, 4.364574432373047, 29.339187622070312, 3.31561279296875, 11.125764846801758, -6.824047088623047, 12.121280670166016, -17.838178634643555, 46.37687301635742, -2.26251220703125, 16.96186065673828, 9.607254028320312, 2.1522674560546875, 0.2531242370605469, 4.218576431274414, 27.3748779296875, 10.630237579345703, 10.127971649169922, 9.460626602172852, -1.0788421630859375, -2.1277542114257812, 41.24168395996094, 9.158166885375977, 10.837127685546875, 4.238716125488281, 12.022903442382812, 1.5667400360107422, 9.809532165527344, 11.266555786132812, 10.933204650878906, 36.348541259765625, 17.274158477783203, 6.178462982177734, 16.985107421875, -0.9472465515136719, 34.01216125488281, 25.167720794677734, -7.3843841552734375, 4.614006042480469, 5.15863037109375, -5.7572021484375, -1.830810546875, 11.588293075561523, 23.093292236328125, -1.9066619873046875, 3.184600830078125, 9.15740966796875, 7.024559020996094, 0.1535797119140625, -0.000873565673828125, 5.976325988769531, 19.801300048828125, 12.757474899291992, 9.997604370117188, -5.019741058349609, 7.2655029296875, 15.94158935546875, 7.857780456542969, 7.3307952880859375, 1.914459228515625, -0.6030921936035156, 32.160789489746094, 29.96920394897461, 1.6797924041748047, 5.6945343017578125, 13.292732238769531, 10.092880249023438, 28.661331176757812, 31.09661865234375, 11.317840576171875, 18.47283172607422, 26.069976806640625, 9.623128890991211, 27.763885498046875, 13.800048828125, 22.96875, 5.43603515625, -0.6063766479492188, 2.887819290161133, 10.238025665283203, 5.0593719482421875, 10.915407180786133, 10.541908264160156, 13.969690322875977, 45.283592224121094, 9.330860137939453, 4.2541351318359375, 12.054214477539062, 12.560501098632812, -2.2425079345703125, 16.863969802856445, 25.93614959716797, 16.28652572631836, 12.111391067504883, 27.911834716796875, 8.93212890625, 8.387992858886719, 3.0827560424804688, 14.207643508911133, 10.13359260559082, 25.10802459716797, -0.49143218994140625, -1.468170166015625, -4.110771179199219, 26.437301635742188, 14.9520263671875, 10.468196868896484, -0.7089633941650391, 8.597307205200195, 2.063396453857422, 0.15388107299804688, 5.6730194091796875, 1.7670822143554688, 16.391456604003906, 11.467208862304688, -5.600067138671875, 18.380210876464844, 33.000946044921875, 38.61555480957031, 12.554342269897461], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000112.npy"}
{"epoch": 0.32941176470588235, "step": 113, "batch_size": 128, "mean": 10.603593826293945, "std": 12.630918502807617, "min": -19.004924774169922, "p10": -3.8320489883422844, "median": 10.674905776977539, "p90": 25.406243896484376, "max": 63.670501708984375, "pos_frac": 0.8046875, "sample": [10.3905029296875, 19.629959106445312, 32.16486358642578, 25.489898681640625, -0.6980419158935547, 2.0659027099609375, -3.4481639862060547, 7.2393798828125, -14.720220565795898, -2.148824691772461, -5.58729362487793, 3.7818756103515625, 13.359580993652344, 28.973480224609375, 17.92046356201172, 23.58327865600586, 8.9315185546875, 15.267406463623047, -11.197256088256836, 16.454010009765625, 12.75042724609375, -11.720771789550781, 21.281497955322266, 20.845291137695312, 11.597557067871094, 1.2740859985351562, 9.061248779296875, 8.282028198242188, 18.732635498046875, -14.938079833984375, -19.004924774169922, 32.32521057128906, 1.1553955078125, 10.674171447753906, 15.287055969238281, 12.942527770996094, -12.494613647460938, -1.1694488525390625, 14.861701965332031, 11.291763305664062, -0.49491310119628906, 7.457527160644531, 33.639122009277344, 13.147872924804688, 22.484642028808594, 12.13507080078125, 25.370391845703125, 4.815311431884766, 23.78291893005371, 16.768661499023438, 8.152229309082031, -1.2692680358886719, 15.320423126220703, 6.2191314697265625, 63.670501708984375, 10.675640106201172, 11.26715087890625, 19.277114868164062, 25.183258056640625, -3.640960693359375, 7.789104461669922, 18.348608016967773, -6.3553924560546875, 21.391845703125, 34.195777893066406, -7.2930908203125, 9.57436752319336, 12.8138427734375, 1.6254196166992188, 4.886423110961914, 13.86859130859375, 27.4241943359375, 19.084678649902344, 16.58819580078125, 10.62054443359375, 1.2528076171875, 22.006372451782227, 40.93174743652344, 12.125865936279297, 22.98846435546875, 12.150323867797852, 11.371185302734375, -8.840766906738281, 11.348434448242188, 24.018020629882812, 18.07141876220703, 26.727706909179688, 2.362823486328125, -4.277921676635742, 11.815982818603516, 3.586883544921875, 25.952674865722656, 16.030059814453125, 12.12299919128418, 3.0775489807128906, 44.57177734375, 12.94390869140625, 3.395437240600586, 18.643665313720703, 1.669891357421875, -3.574666976928711, 5.886981964111328, 2.239990234375, 9.022123336791992, -2.28021240234375, 15.745223999023438, 10.861961364746094, 8.378128051757812, 2.9027938842773438, 18.69738006591797, 9.565277099609375, 8.69559097290039, 3.2647552490234375, -0.8677291870117188, 3.0065841674804688, -7.380317687988281, -5.281734466552734, -2.406280517578125, -3.0030517578125, 10.577911376953125, 9.17901611328125, 2.9645957946777344, 24.707664489746094, 4.813470840454102, 12.945953369140625, 0.3662681579589844, 27.308868408203125, 15.864154815673828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000113.npy"}
{"epoch": 0.3323529411764706, "step": 114, "batch_size": 128, "mean": 11.23328685760498, "std": 12.435948371887207, "min": -19.525482177734375, "p10": -3.7955522537231428, "median": 11.207572937011719, "p90": 24.227475738525392, "max": 59.83796691894531, "pos_frac": 0.8359375, "sample": [18.236587524414062, 8.331855773925781, 9.471027374267578, 38.55194854736328, 14.233131408691406, -2.9782962799072266, 8.740135192871094, 21.2766170501709, 19.969253540039062, 12.8287353515625, 26.288192749023438, 50.287994384765625, 0.9579505920410156, 22.123294830322266, 16.414020538330078, 6.789466857910156, 12.456550598144531, 0.8303089141845703, -6.85296630859375, -8.399986267089844, 15.068115234375, 16.45147705078125, 15.993331909179688, -2.551532745361328, 27.868148803710938, 11.874956130981445, 15.23837661743164, 49.15412902832031, 24.17633056640625, 3.17401123046875, -0.3122978210449219, -6.99870491027832, 23.231246948242188, 4.911417007446289, 8.238494873046875, 19.482139587402344, 10.417915344238281, 3.7221946716308594, -2.580249786376953, 12.304969787597656, 3.449522018432617, 19.66522216796875, 11.794170379638672, 11.257003784179688, 24.2144775390625, 17.050735473632812, 19.419227600097656, 23.837677001953125, 15.328475952148438, 32.64228820800781, -2.8531856536865234, 59.83796691894531, -5.139902114868164, -14.623260498046875, 6.647590637207031, -2.4206390380859375, 19.26087188720703, -7.493858337402344, -2.263612747192383, 5.829261779785156, 13.787288665771484, 19.438701629638672, 7.708333969116211, 15.826141357421875, 11.731201171875, -17.885086059570312, 13.746185302734375, 4.703125, 9.361076354980469, 24.999847412109375, -5.629356384277344, 19.567726135253906, 28.975486755371094, 8.572662353515625, 3.624391555786133, 5.654346466064453, -3.219402313232422, 14.376319885253906, 11.565071105957031, 12.162124633789062, 5.430986404418945, 12.51922607421875, 10.550544738769531, 3.1725196838378906, 6.210960388183594, 11.127853393554688, 5.5024261474609375, 23.3233642578125, 9.519054412841797, -7.991973876953125, 29.574790954589844, 7.436702728271484, -5.864240646362305, 17.01160430908203, 9.12164306640625, 17.046600341796875, 10.849578857421875, 1.2431221008300781, 7.0043182373046875, 16.447174072265625, 35.677093505859375, 11.9002685546875, 17.90764617919922, 12.259750366210938, 31.551597595214844, 13.557098388671875, 6.611961364746094, 3.1335487365722656, -6.23797607421875, 21.97161865234375, 20.499237060546875, 7.130842208862305, 3.4699058532714844, 13.334129333496094, 11.15814208984375, 3.0427589416503906, 12.683525085449219, 11.90934944152832, 18.297080993652344, -5.584264755249023, 24.25780487060547, 8.563674926757812, -19.525482177734375, 15.47601318359375, 3.2467498779296875, 9.15829086303711, 0.4275665283203125, 9.820037841796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000114.npy"}
{"epoch": 0.3352941176470588, "step": 115, "batch_size": 128, "mean": 12.468379974365234, "std": 13.25106143951416, "min": -16.0908203125, "p10": -1.0991785049438476, "median": 11.366693496704102, "p90": 29.068193054199213, "max": 61.95109558105469, "pos_frac": 0.890625, "sample": [15.814186096191406, 5.183147430419922, 13.622116088867188, 27.68726348876953, -2.8472518920898438, -7.399993896484375, 6.51275634765625, 23.93177032470703, 16.06800079345703, 0.1853179931640625, 12.48440933227539, 39.9337158203125, 0.1751117706298828, 10.707473754882812, 7.809968948364258, 2.2822189331054688, 6.701051712036133, 33.9256591796875, 30.365493774414062, 4.020908355712891, 9.238510131835938, 6.0518341064453125, 3.1987762451171875, -3.2686538696289062, -1.084320068359375, -10.955141067504883, 7.671136856079102, 27.015960693359375, 1.8878612518310547, 6.808753967285156, 22.563255310058594, 1.8070831298828125, 8.422393798828125, 42.89793395996094, 20.061065673828125, 33.20740509033203, 14.96042251586914, 17.57379150390625, 24.429733276367188, 21.300270080566406, 19.541776657104492, 10.560659408569336, 59.16365051269531, -16.0908203125, 3.1762962341308594, 16.620590209960938, 16.794090270996094, 15.838394165039062, 9.246406555175781, 0.3244361877441406, 0.40479278564453125, 4.348920822143555, 12.045246124267578, 4.040443420410156, 28.51220703125, 34.00213623046875, 4.895408630371094, 8.871536254882812, 3.1406478881835938, 21.643173217773438, -10.591522216796875, -1.1338481903076172, 12.353591918945312, 0.8203125, 12.020347595214844, 19.29851531982422, 2.473665237426758, 9.520622253417969, 9.734619140625, 13.548433303833008, 11.530725479125977, 15.45306396484375, 0.7813034057617188, 19.127113342285156, 6.8808441162109375, 1.5840606689453125, 23.813461303710938, -10.570491790771484, -2.9287033081054688, 18.413440704345703, 18.767776489257812, 16.856582641601562, 26.28544044494629, 10.364410400390625, 18.730422973632812, 6.3136749267578125, 1.2699050903320312, 27.714111328125, 37.62799072265625, 8.385818481445312, 13.375923156738281, 6.274238586425781, 19.103958129882812, 3.669393539428711, 41.59552001953125, 7.106956481933594, 11.241260528564453, 4.969337463378906, 12.810577392578125, 15.752517700195312, -9.308403015136719, 20.49871826171875, 24.675010681152344, 11.49212646484375, 6.721521377563477, 15.499935150146484, 10.076171875, 18.717758178710938, 30.866897583007812, 11.616397857666016, 18.50206756591797, 0.6527786254882812, 0.4485359191894531, 2.6277236938476562, -8.498649597167969, 14.371337890625, -7.078887939453125, 3.5746994018554688, 32.2481689453125, 9.209327697753906, 61.95109558105469, 13.117843627929688, 32.869422912597656, -12.822990417480469, 18.334049224853516, 15.551986694335938, 15.536209106445312, 26.125137329101562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000115.npy"}
{"epoch": 0.3382352941176471, "step": 116, "batch_size": 128, "mean": 10.934165000915527, "std": 13.066137313842773, "min": -15.292152404785156, "p10": -3.234025192260742, "median": 7.592109680175781, "p90": 26.479029846191402, "max": 58.64710998535156, "pos_frac": 0.828125, "sample": [48.44725799560547, 18.93988800048828, 14.0458984375, 18.91888427734375, 3.5513534545898438, 12.693521499633789, 55.98162078857422, 8.462387084960938, 15.706005096435547, 25.068286895751953, 13.918586730957031, 13.692672729492188, 37.37647247314453, 13.20638656616211, 6.981071472167969, 24.05254364013672, 7.665740966796875, 7.439811706542969, -15.292152404785156, 3.781582832336426, 4.358697891235352, 9.166561126708984, 16.49993133544922, 10.456939697265625, -0.9391021728515625, 1.516021728515625, 19.208873748779297, 14.99124526977539, 0.5517196655273438, -3.1007003784179688, -4.6629486083984375, 38.86756134033203, 4.117862701416016, 22.93865966796875, 4.628242492675781, 10.44329833984375, -0.49599456787109375, 8.244316101074219, 32.26019287109375, 8.699243545532227, 5.5901336669921875, -1.1877174377441406, 25.877349853515625, -0.213134765625, 20.985960006713867, 36.29411315917969, 23.000404357910156, 22.218963623046875, 4.811027526855469, 1.4717464447021484, 25.800270080566406, 2.8656997680664062, 5.6089630126953125, 4.004371643066406, 21.65906524658203, 4.977537155151367, -5.7711029052734375, 5.198719024658203, 0.8582839965820312, 5.901123046875, 7.5445098876953125, 6.793464660644531, 2.6615371704101562, 14.991035461425781, 11.406646728515625, -4.385490417480469, 1.500417709350586, 23.88226318359375, 0.11660003662109375, 14.659393310546875, 22.510665893554688, 22.42828369140625, 13.525115966796875, 10.671394348144531, -0.6570587158203125, 13.995075225830078, 12.836956024169922, 22.309478759765625, -4.6104583740234375, 7.63970947265625, -4.5626373291015625, 3.6493377685546875, -1.4942779541015625, 30.145401000976562, 5.591617584228516, 4.329341888427734, 10.249588012695312, 41.238311767578125, 28.325927734375, -6.5552520751953125, 58.64710998535156, 1.8216285705566406, 3.1400604248046875, 21.220306396484375, 33.21247863769531, 10.730648040771484, 1.6558303833007812, 30.219192504882812, -2.6559104919433594, 15.262809753417969, 24.152069091796875, -3.721343994140625, -5.124053955078125, 1.323129653930664, 26.00048828125, -1.6411323547363281, 18.597808837890625, -4.441566467285156, 1.7158432006835938, 27.595626831054688, 7.174507141113281, 10.786125183105469, 22.7237548828125, -8.284393310546875, 7.380865097045898, -3.545116424560547, 5.862701416015625, -12.46200180053711, 2.2317047119140625, 1.3312568664550781, 13.446632385253906, 11.212684631347656, 3.3311538696289062, 7.52783203125, 5.625476837158203, 9.018844604492188, 4.2506256103515625, 1.1724166870117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000116.npy"}
{"epoch": 0.3411764705882353, "step": 117, "batch_size": 128, "mean": 11.64725399017334, "std": 12.649276733398438, "min": -26.600791931152344, "p10": -0.7459892272949218, "median": 9.760673522949219, "p90": 28.67271041870117, "max": 75.88493347167969, "pos_frac": 0.8828125, "sample": [4.413602828979492, 10.682859420776367, 15.481727600097656, 15.673416137695312, 9.776321411132812, 13.875297546386719, -6.69232177734375, -1.9198150634765625, 19.379669189453125, -2.328113555908203, 18.261024475097656, 29.938461303710938, 10.32253646850586, 9.863117218017578, 9.530380249023438, 29.122413635253906, 6.669811248779297, 17.272506713867188, 11.123603820800781, 33.119781494140625, 1.6607093811035156, 26.393081665039062, 4.162567138671875, 4.90815544128418, 1.2266311645507812, 4.075035095214844, 4.363908767700195, 14.143203735351562, 18.89293670654297, 14.459197998046875, 0.6113815307617188, 10.15673828125, 9.082427978515625, -10.00423812866211, 6.135589599609375, 16.101261138916016, 9.019935607910156, 12.732307434082031, 30.224533081054688, 25.060775756835938, 13.95458984375, 17.940719604492188, 8.028411865234375, -11.84454345703125, -9.278823852539062, 2.945953369140625, -0.6873931884765625, 17.007919311523438, 8.204364776611328, 28.47998046875, -0.7183609008789062, 9.745025634765625, 1.4694976806640625, 20.664215087890625, 8.533184051513672, -9.475730895996094, 3.0667877197265625, 18.626270294189453, 6.510097503662109, 75.88493347167969, 19.245086669921875, 8.014163970947266, 36.500457763671875, 15.059028625488281, 0.5628261566162109, 30.08611297607422, 36.24784851074219, 17.22381591796875, 8.28714370727539, 14.607879638671875, 13.918838500976562, 16.858245849609375, 3.3060684204101562, 4.736438751220703, 1.9222946166992188, 7.948516845703125, 26.193641662597656, 22.584251403808594, 4.007331848144531, -4.774021148681641, 26.774124145507812, 18.59756088256836, 34.09202575683594, 3.8721771240234375, 11.900999069213867, 21.909164428710938, 6.935394287109375, 9.663009643554688, 14.466300964355469, 7.836860656738281, 17.966445922851562, 21.47957992553711, 31.09771728515625, -26.600791931152344, 15.618606567382812, 16.33485984802246, 14.924659729003906, 14.089279174804688, 9.571887969970703, 6.6987762451171875, -11.293869018554688, -0.810455322265625, 9.094058990478516, 1.4649162292480469, 7.46429443359375, 2.1640548706054688, 11.040298461914062, 23.313934326171875, 4.052278518676758, 42.83659362792969, 2.450124740600586, 21.805267333984375, 6.363128662109375, 30.21930694580078, 11.982419967651367, 7.549407958984375, -2.5270843505859375, 1.2703399658203125, 6.079277038574219, 1.4476318359375, 1.2100982666015625, 18.097871780395508, 14.606864929199219, 9.1502685546875, 12.546859741210938, -7.774482727050781, 31.97319793701172, 9.277801513671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000117.npy"}
{"epoch": 0.34411764705882353, "step": 118, "batch_size": 128, "mean": 11.571135520935059, "std": 13.780163764953613, "min": -18.7635498046875, "p10": -1.4510086059570309, "median": 9.863731384277344, "p90": 31.23524017333984, "max": 57.30229187011719, "pos_frac": 0.84375, "sample": [2.700359344482422, 2.5264320373535156, 3.0762176513671875, 10.889446258544922, 3.393360137939453, 37.160125732421875, 3.0315704345703125, 21.316741943359375, 10.124191284179688, -1.9099311828613281, 11.967199325561523, 1.592926025390625, -1.7036819458007812, 7.2626953125, -2.849811553955078, -0.95440673828125, 25.0626220703125, 12.686164855957031, 12.147647857666016, 23.3682861328125, 12.40900993347168, 25.069190979003906, 16.68008804321289, 6.9764251708984375, 1.8676300048828125, 7.7920684814453125, 31.915725708007812, 28.59722900390625, -1.3427200317382812, -5.969518661499023, 2.603242874145508, -15.879047393798828, 10.580860137939453, 20.882495880126953, -18.7635498046875, 6.340007781982422, -12.954681396484375, 6.063682556152344, 2.700103759765625, -3.22418212890625, 22.447372436523438, 29.281173706054688, 0.26853179931640625, 2.646209716796875, 49.766319274902344, 22.71160888671875, 3.1444854736328125, 11.594131469726562, 2.34698486328125, 18.32478141784668, 41.385711669921875, 5.75811767578125, 24.471588134765625, 13.335147857666016, 12.155632019042969, -0.4886932373046875, 5.311893463134766, 9.603271484375, 30.943603515625, 1.6637859344482422, 11.999427795410156, 4.938575744628906, 40.37336730957031, 4.8278961181640625, 26.42431640625, 3.1021652221679688, -6.5987701416015625, 14.325592041015625, 11.057636260986328, 20.41339111328125, 12.634170532226562, 4.534170150756836, 11.607986450195312, 9.344108581542969, 0.14502716064453125, 57.30229187011719, 10.589881896972656, 0.8370742797851562, 0.08054351806640625, 42.201629638671875, -7.842498779296875, 21.573612213134766, 18.10338592529297, 35.2149658203125, 1.1640663146972656, 36.51647186279297, 10.328964233398438, 12.059209823608398, 1.5620861053466797, 1.410684585571289, 25.285934448242188, 32.32770538330078, 26.22020721435547, -1.2857437133789062, 13.362691879272461, 18.134469985961914, 3.895336151123047, -0.5616378784179688, 17.2874755859375, 40.209625244140625, 16.634977340698242, 8.721527099609375, 17.13042449951172, 16.243915557861328, 17.473424911499023, 4.376640319824219, 8.405391693115234, 40.046905517578125, 21.103729248046875, 2.0920047760009766, 14.294448852539062, 6.72504997253418, 17.8883056640625, 16.087905883789062, 4.962982177734375, 4.46063232421875, -0.5550518035888672, 12.073631286621094, 6.93170166015625, 6.178218841552734, 41.143280029296875, 11.953088760375977, -0.11735153198242188, -11.667205810546875, -17.810104370117188, 18.987274169921875, -4.7219390869140625, 7.0501556396484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000118.npy"}
{"epoch": 0.34705882352941175, "step": 119, "batch_size": 128, "mean": 12.020868301391602, "std": 13.602185249328613, "min": -11.5562744140625, "p10": -1.368475341796875, "median": 8.302696228027344, "p90": 32.289924621582024, "max": 50.557098388671875, "pos_frac": 0.8515625, "sample": [5.698638916015625, 10.376081466674805, 31.583343505859375, 18.821548461914062, 0.8668537139892578, 27.380706787109375, 10.626197814941406, 9.838813781738281, 6.195211410522461, 7.42962646484375, -2.2105178833007812, 12.857521057128906, 7.176460266113281, -1.4810333251953125, 50.557098388671875, 42.390777587890625, 12.123016357421875, 19.742626190185547, 11.3084716796875, 28.736038208007812, 12.159591674804688, 9.674461364746094, 0.3499298095703125, 6.169883728027344, -8.927070617675781, 40.29180145263672, 1.88775634765625, 3.1963424682617188, -9.052978515625, 2.322244644165039, 12.200714111328125, 33.30677032470703, -0.67877197265625, 13.250808715820312, 6.816619873046875, 15.894874572753906, 40.63172912597656, 5.65386962890625, 23.89480972290039, 11.100570678710938, 1.5820960998535156, 31.85413360595703, 6.178466796875, 9.10296630859375, 25.41975975036621, -1.7819252014160156, 38.852760314941406, 1.3432769775390625, 29.920654296875, -1.0154571533203125, 17.29483413696289, 15.71673583984375, 30.804443359375, 5.10357666015625, 5.284400939941406, 2.70013427734375, 3.3796768188476562, 21.7335205078125, 8.006881713867188, 11.07822036743164, -8.799736022949219, 20.538009643554688, 5.5471038818359375, -0.19840240478515625, -6.1031341552734375, 5.728395462036133, 1.6009674072265625, 29.820846557617188, 8.96470832824707, -1.8470687866210938, 8.949005126953125, 34.15167236328125, 31.10546875, 2.47308349609375, 4.474676132202148, 6.926631927490234, 0.8971176147460938, 33.335418701171875, 8.5985107421875, 4.78155517578125, 21.167434692382812, 10.049484252929688, 30.960220336914062, 6.948873519897461, 1.3777275085449219, 3.9774513244628906, 49.67271423339844, 11.932327270507812, 1.8440475463867188, 50.280426025390625, 16.114322662353516, 12.325302124023438, 13.553512573242188, 13.949356079101562, 2.202922821044922, 39.2521858215332, 4.24736213684082, 23.957061767578125, 23.209211349487305, 34.35820007324219, -6.756870269775391, -0.9496574401855469, 11.34762191772461, 5.07672119140625, 1.1093921661376953, 44.19183349609375, 5.77362060546875, 5.952106475830078, 25.58788299560547, -1.3202362060546875, 9.7738037109375, 13.735702514648438, -2.217010498046875, -11.5562744140625, 21.376731872558594, -7.3112335205078125, -2.1234359741210938, 6.912086486816406, 7.994117736816406, 13.3331298828125, 12.991846084594727, 1.4514122009277344, 7.822601318359375, 0.5987701416015625, 5.631891250610352, 11.08458137512207, -0.40460205078125, 4.518945693969727], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000119.npy"}
{"epoch": 0.35, "step": 120, "batch_size": 128, "mean": 9.882142066955566, "std": 12.625410079956055, "min": -29.400836944580078, "p10": -4.258590316772459, "median": 9.625633239746094, "p90": 26.14844741821289, "max": 41.01197814941406, "pos_frac": 0.7890625, "sample": [26.038604736328125, -0.7820816040039062, 1.638946533203125, 23.185714721679688, 18.996849060058594, 18.020751953125, 12.590133666992188, 7.0568084716796875, 35.679534912109375, 9.019390106201172, 10.936347961425781, 21.459304809570312, 29.32000732421875, -3.6378326416015625, 11.931625366210938, -3.5036678314208984, 13.673896789550781, 22.66080093383789, -5.9425201416015625, 7.7384185791015625, 6.583671569824219, 38.07786560058594, 11.16900634765625, 9.81890869140625, -11.284988403320312, 10.899330139160156, 24.375425338745117, 8.523246765136719, 12.967781066894531, 10.964374542236328, 36.2960205078125, 10.216758728027344, -0.4099082946777344, -0.313232421875, 6.15234375, 20.67987060546875, 2.6513938903808594, 13.204505920410156, 0.11565971374511719, 1.7719764709472656, -7.390892028808594, -0.7672290802001953, 10.106586456298828, 15.0528564453125, 13.355789184570312, 2.4552383422851562, 10.00467300415039, 14.133796691894531, 9.966293334960938, 18.39898681640625, 3.298553466796875, 14.245407104492188, -5.612274169921875, 14.333345413208008, -19.23925018310547, 5.4624176025390625, 40.830413818359375, 4.947698593139648, 11.404977798461914, -7.830022811889648, 22.391021728515625, -1.9466629028320312, 8.02777099609375, -1.9505271911621094, 10.416221618652344, 35.49296951293945, 4.9917144775390625, 5.406829833984375, -0.2895317077636719, 21.427459716796875, -1.3778533935546875, 22.36328125, -7.92167854309082, 7.365264892578125, 10.206794738769531, 22.472511291503906, 8.290275573730469, 21.906631469726562, 4.240852355957031, 27.665952682495117, 6.142364501953125, 4.7901458740234375, -15.80206298828125, 35.05678939819336, 17.24427032470703, 14.918937683105469, -10.304244995117188, 8.954620361328125, 26.404747009277344, 13.063545227050781, 6.6885528564453125, 31.438369750976562, 4.9167022705078125, 10.983810424804688, 2.7356033325195312, 21.502254486083984, 1.311859130859375, -8.789081573486328, 12.000223159790039, 14.360610961914062, -10.31298828125, 2.77679443359375, 28.759784698486328, -1.4417743682861328, 12.619354248046875, 9.432357788085938, -7.8324737548828125, 0.16779327392578125, -2.3927230834960938, 14.311126708984375, 5.0443572998046875, 23.414052963256836, -1.7084922790527344, 41.01197814941406, 5.3043365478515625, 7.0759124755859375, 35.37135314941406, 5.325340270996094, 24.930221557617188, -3.6784400939941406, 13.874595642089844, 2.9175758361816406, 18.595077514648438, 12.980064392089844, 1.7067642211914062, 1.189544677734375, 22.377853393554688, -29.400836944580078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000120.npy"}
{"epoch": 0.35294117647058826, "step": 121, "batch_size": 128, "mean": 12.44058609008789, "std": 12.79248046875, "min": -14.099517822265625, "p10": 0.2838054656982424, "median": 11.791404724121094, "p90": 26.510391616821284, "max": 61.91888427734375, "pos_frac": 0.90625, "sample": [19.97055435180664, 9.117958068847656, -3.9237442016601562, 10.894638061523438, 2.4899063110351562, 12.862518310546875, 21.71100616455078, 14.433547973632812, -2.0297393798828125, 15.023666381835938, 23.984411239624023, 15.219993591308594, 3.8812713623046875, 9.113868713378906, 11.378477096557617, 12.955520629882812, 24.231971740722656, 14.613861083984375, 19.480148315429688, 2.6333236694335938, 10.168815612792969, 11.854454040527344, -5.910331726074219, 16.62445068359375, 11.35305404663086, 16.97454833984375, 11.010688781738281, 4.4745941162109375, 13.873294830322266, 42.78205108642578, 13.078590393066406, 3.3588085174560547, 27.640464782714844, 26.050823211669922, 38.454776763916016, 9.588890075683594, 0.4780158996582031, 39.545005798339844, -2.820514678955078, 7.414516448974609, 49.069580078125, 18.645477294921875, 7.7297515869140625, 7.845069885253906, 56.64140319824219, 0.60400390625, 47.78434753417969, 11.728355407714844, 27.582717895507812, 18.19357681274414, 1.2361221313476562, 12.718887329101562, 14.692794799804688, -0.5894317626953125, 12.3399658203125, 1.5416259765625, -10.307861328125, 29.53510284423828, 23.886497497558594, 12.365341186523438, -14.099517822265625, 22.703208923339844, -6.756992340087891, -12.766326904296875, -1.700063705444336, 6.496559143066406, 2.1413116455078125, 38.998512268066406, 1.1793594360351562, 7.845703125, 6.648571014404297, 1.0672760009765625, 0.5193252563476562, 13.017147064208984, 29.96221160888672, 1.1372528076171875, 12.587646484375, 5.3707427978515625, 21.521194458007812, 4.486328125, 13.860671997070312, 0.62640380859375, 14.858497619628906, 5.963050842285156, 15.608596801757812, 23.806198120117188, 12.046836853027344, 0.485870361328125, 0.153717041015625, 13.927753448486328, -1.1090850830078125, 22.22613525390625, 4.239324569702148, 12.94167709350586, 17.12664794921875, 4.1682891845703125, 21.931121826171875, 15.487953186035156, 1.0503005981445312, 10.862403869628906, 36.13508605957031, 0.3395576477050781, 12.62338638305664, 5.216617584228516, 19.013824462890625, 8.682296752929688, 17.77753448486328, 9.226631164550781, 7.608922958374023, -11.412017822265625, 4.475009918212891, 10.711151123046875, 1.1475334167480469, 9.815780639648438, 15.419532775878906, 61.91888427734375, 9.720207214355469, 13.905677795410156, 16.03069305419922, 13.037307739257812, 13.584022521972656, 9.126029968261719, 8.742454528808594, 16.05467987060547, 3.2914485931396484, 22.366561889648438, 13.013755798339844, 16.919132232666016], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000121.npy"}
{"epoch": 0.3558823529411765, "step": 122, "batch_size": 128, "mean": 12.283262252807617, "std": 11.772542953491211, "min": -12.886482238769531, "p10": -1.7964256286621088, "median": 11.400518417358398, "p90": 27.8205322265625, "max": 53.09783935546875, "pos_frac": 0.8671875, "sample": [7.878021240234375, -7.040901184082031, 7.777885437011719, 26.086318969726562, 8.57489013671875, 13.151725769042969, -2.8080711364746094, 12.933916091918945, 5.491424560546875, 25.84830093383789, 3.392822265625, 5.825157165527344, -8.411819458007812, 3.3089542388916016, 18.84832000732422, 7.475011825561523, 10.506525039672852, 21.255197525024414, 1.20489501953125, 39.48395538330078, 20.843345642089844, 33.862579345703125, 22.691482543945312, 21.423519134521484, 21.7847900390625, -0.8415069580078125, 27.352346420288086, 5.503841400146484, 12.926506042480469, 3.709259033203125, 27.990631103515625, 2.545146942138672, 0.7421188354492188, 1.7949142456054688, 6.04730224609375, 4.200080871582031, 36.18976593017578, 7.471893310546875, 12.383419036865234, 18.086647033691406, -1.6453819274902344, 22.74616241455078, 7.207538604736328, 14.119194030761719, 10.977142333984375, 18.632797241210938, 27.374923706054688, 20.831451416015625, 24.211414337158203, 18.4779052734375, 27.863235473632812, 27.802230834960938, 15.757102966308594, 3.476276397705078, -2.1488609313964844, 9.2701416015625, 29.37460708618164, 6.258201599121094, 11.110298156738281, 15.399368286132812, 16.889801025390625, 13.884828567504883, 10.40609359741211, 0.376922607421875, 13.331939697265625, -1.05029296875, 1.2849655151367188, 17.333221435546875, -2.5200061798095703, -4.1087188720703125, 29.061363220214844, 19.553367614746094, 0.5498199462890625, 32.01591110229492, 2.4241981506347656, 21.05162811279297, -6.112285614013672, -2.971942901611328, 15.9671630859375, -1.606332778930664, 10.404647827148438, 36.9267578125, 44.541168212890625, 18.207778930664062, 15.111946105957031, 14.766212463378906, 2.425607681274414, 29.592575073242188, 0.309326171875, 3.5030784606933594, 17.27770233154297, 9.497261047363281, 7.7137451171875, 10.076337814331055, 0.00746917724609375, 13.570266723632812, -3.5132904052734375, 7.26739501953125, 3.664600372314453, 9.565284729003906, 2.182159423828125, 12.319847106933594, 15.654998779296875, 20.41594696044922, 23.6719970703125, 6.273096084594727, -5.995220184326172, 17.097110748291016, 12.494972229003906, -4.6116790771484375, 24.120864868164062, 1.7751293182373047, 13.616008758544922, -8.597099304199219, 20.12702178955078, 12.229522705078125, 10.129684448242188, -12.886482238769531, 13.732086181640625, 17.13791847229004, 11.690738677978516, 53.09783935546875, 7.475807189941406, 29.004478454589844, 6.4683685302734375, 19.462745666503906, 6.298095703125, 26.607818603515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000122.npy"}
{"epoch": 0.3588235294117647, "step": 123, "batch_size": 128, "mean": 10.717824935913086, "std": 13.417325973510742, "min": -22.458465576171875, "p10": -1.6257429122924805, "median": 8.029016494750977, "p90": 28.85168380737304, "max": 69.41068267822266, "pos_frac": 0.8515625, "sample": [4.1148529052734375, 30.656211853027344, 31.304523468017578, 6.223091125488281, 14.140495300292969, 2.5808258056640625, 5.598415374755859, -0.13189697265625, 19.92547607421875, 20.151222229003906, -0.5635986328125, 6.841733932495117, 51.11280059814453, 9.363365173339844, -1.0955944061279297, 9.549240112304688, 44.02571105957031, -7.6217041015625, 12.118049621582031, 6.335563659667969, -0.9046783447265625, 3.8572998046875, 19.526077270507812, 17.33816909790039, 5.922477722167969, 13.828338623046875, 45.824745178222656, 5.458320617675781, 33.79350280761719, 2.4521255493164062, 1.6088371276855469, 28.174781799316406, 7.973350524902344, 0.9327545166015625, 16.6302490234375, 6.695390701293945, 15.0184326171875, 11.632675170898438, 8.686429977416992, 23.56116485595703, 3.792449951171875, -0.26238250732421875, -4.107372283935547, -22.458465576171875, 2.9641895294189453, 13.972244262695312, 15.064735412597656, 6.186309814453125, 1.7946739196777344, 3.0380630493164062, 23.2705078125, 0.5647430419921875, 36.605690002441406, -15.757797241210938, 24.015487670898438, 69.41068267822266, 3.0657424926757812, 26.01538848876953, 5.404884338378906, 4.378715515136719, 19.074996948242188, 15.357955932617188, 7.558677673339844, 0.11585426330566406, 21.748809814453125, 30.431121826171875, 12.767532348632812, 11.274772644042969, 15.63116455078125, 14.455642700195312, 34.510650634765625, 5.501325607299805, 14.016281127929688, 5.770164489746094, -1.6562576293945312, 5.290863037109375, -1.6126651763916016, 23.055110931396484, 11.711666107177734, 4.103240966796875, 3.4396495819091797, 1.683135986328125, 25.64409637451172, 11.449325561523438, 6.777332305908203, 0.45226287841796875, 13.010856628417969, 7.227897644042969, 1.304779052734375, 18.64527130126953, -9.216590881347656, 8.08468246459961, 19.266921997070312, 5.294219970703125, 12.650043487548828, -8.275634765625, 3.5888595581054688, -10.362163543701172, 10.626594543457031, 15.834579467773438, 10.564992904663086, 7.465080261230469, 5.70855712890625, 14.680496215820312, 18.368473052978516, 7.2537994384765625, -6.428718566894531, -3.635894775390625, 37.101715087890625, -4.739646911621094, 1.5275039672851562, 14.632705688476562, 35.72364807128906, 11.772071838378906, 10.305511474609375, 1.5804672241210938, 10.031047821044922, 21.179519653320312, 9.366165161132812, -10.4765625, 0.5745296478271484, 14.5975341796875, 10.810554504394531, 30.5670166015625, 13.684173583984375, 4.997856140136719, -13.290298461914062, 2.128582000732422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000123.npy"}
{"epoch": 0.36176470588235293, "step": 124, "batch_size": 128, "mean": 13.286150932312012, "std": 13.206180572509766, "min": -20.54157829284668, "p10": -2.6098735809326157, "median": 11.34715461730957, "p90": 30.884592247009273, "max": 57.41236877441406, "pos_frac": 0.8671875, "sample": [1.3696136474609375, 1.9901714324951172, 7.8137969970703125, 26.584930419921875, 15.152450561523438, 38.0126953125, 30.255882263183594, 29.284584045410156, 57.41236877441406, 7.83502197265625, 24.410247802734375, 33.83494567871094, 29.632957458496094, -7.909154891967773, -6.3773651123046875, 7.299343109130859, 13.571022033691406, 20.697479248046875, 9.875160217285156, 40.73846435546875, 25.530654907226562, 29.81964111328125, 10.475250244140625, -8.845191955566406, 25.595123291015625, 12.578607559204102, -0.41780853271484375, 3.8231277465820312, 6.1300048828125, 43.469444274902344, 14.980857849121094, -3.6449546813964844, 5.691152572631836, 18.48114776611328, 20.733238220214844, 24.812789916992188, 15.931129455566406, 13.197925567626953, 7.182079315185547, 39.91172790527344, 7.9542388916015625, 20.749935150146484, 5.366424560546875, 10.447460174560547, 25.012798309326172, 4.640041351318359, 33.389732360839844, 6.497283935546875, 8.555353164672852, 21.3126220703125, -3.7209701538085938, 8.891067504882812, 4.465654373168945, 0.9443244934082031, 7.615528106689453, 17.08019256591797, 12.386627197265625, 34.07744598388672, 31.521879196166992, -11.547454833984375, 23.31774139404297, -4.27044677734375, 5.917472839355469, 27.517120361328125, 11.929431915283203, 25.97113037109375, -0.11829948425292969, -5.519025802612305, 8.960945129394531, 2.4592723846435547, 6.0804290771484375, 17.487083435058594, 4.649635314941406, -0.017230987548828125, 11.575611114501953, 4.248573303222656, 15.279296875, 16.598892211914062, -10.627861022949219, 36.007598876953125, -8.091995239257812, 15.016510009765625, 11.756134033203125, 20.07208251953125, -2.1662673950195312, 7.362190246582031, 10.210380554199219, 6.8955841064453125, 10.025772094726562, 13.573013305664062, 4.332633972167969, 2.848024368286133, 13.988227844238281, 12.66180419921875, 21.374366760253906, 11.986488342285156, 5.9441986083984375, -5.32855224609375, 0.9429874420166016, 22.002098083496094, -5.354896545410156, 4.187046051025391, 22.211692810058594, 5.414003372192383, 18.640029907226562, 17.849336624145508, 17.154090881347656, 18.079246520996094, 4.8654022216796875, 36.86330795288086, 24.240325927734375, 6.8280487060546875, 9.868839263916016, 30.611469268798828, 21.716529846191406, 34.05704116821289, 25.113937377929688, 9.474222183227539, 28.39153289794922, 39.33127975463867, 3.8000850677490234, 11.118698120117188, 4.040081024169922, 17.119464874267578, 4.710533142089844, 5.067989349365234, -20.54157829284668, 0.35573577880859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000124.npy"}
{"epoch": 0.36470588235294116, "step": 125, "batch_size": 128, "mean": 10.941524505615234, "std": 12.20726203918457, "min": -12.715587615966797, "p10": -3.910388565063476, "median": 9.90599250793457, "p90": 26.262305831909178, "max": 56.1876220703125, "pos_frac": 0.7890625, "sample": [0.8842811584472656, 23.204212188720703, 8.693504333496094, -5.211296081542969, 8.994064331054688, -0.08133316040039062, 3.7514572143554688, 13.025390625, -10.945165634155273, 56.1876220703125, 14.809734344482422, -0.6120738983154297, 18.848114013671875, 12.781440734863281, -0.3918571472167969, 12.659833908081055, -12.715587615966797, 0.687347412109375, 6.376018524169922, 2.997955322265625, 16.530128479003906, -6.322547912597656, 17.79492950439453, 24.151092529296875, 11.4044189453125, -0.34320068359375, 15.884807586669922, 26.56979751586914, 22.669593811035156, 36.09381103515625, 18.50560760498047, 49.56983184814453, 19.061996459960938, 20.31330108642578, 10.479339599609375, 8.281227111816406, 4.795963287353516, 16.728504180908203, -5.751029968261719, -4.795894622802734, -2.53948974609375, 22.825767517089844, 4.468421936035156, 10.8193359375, 34.80433654785156, 1.4589691162109375, -1.2445259094238281, 11.414665222167969, -2.205169677734375, -2.98291015625, 28.07073974609375, -8.743087768554688, 0.05260467529296875, -4.396793365478516, 20.06549835205078, 3.4624691009521484, 8.016716003417969, -6.832489013671875, 10.238235473632812, 6.189048767089844, 9.478874206542969, 29.448379516601562, 26.130523681640625, -0.05001068115234375, 29.392677307128906, 16.360057830810547, 10.1322021484375, -3.8356094360351562, -11.78900146484375, 7.339599609375, 8.689334869384766, 17.95738983154297, 9.909622192382812, 8.431571960449219, 24.024551391601562, 23.341751098632812, 19.3580322265625, -6.08856201171875, 4.271934509277344, 12.874959945678711, -3.507556915283203, 41.11619567871094, -4.084873199462891, 29.283981323242188, 25.94091796875, 7.981559753417969, -4.6055450439453125, 9.777778625488281, 8.820571899414062, 12.642059326171875, 9.35186767578125, -2.8347930908203125, 14.857183456420898, 11.573028564453125, 3.9854202270507812, 11.981552124023438, 4.517156600952148, 3.0603485107421875, 11.825628280639648, 13.62445068359375, 13.758224487304688, 31.04931640625, 9.462135314941406, 9.902362823486328, 4.404645919799805, 26.032978057861328, 8.960868835449219, -1.150217056274414, 14.631448745727539, 6.611602783203125, 2.579437255859375, 3.86016845703125, 13.47735595703125, 26.990615844726562, 17.726318359375, 10.85650634765625, 25.480438232421875, 22.533912658691406, 14.610153198242188, 20.29186248779297, 25.863597869873047, 26.90496063232422, 4.553504943847656, 12.603935241699219, 13.392623901367188, -0.22021102905273438, 1.8956756591796875, 4.258026123046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000125.npy"}
{"epoch": 0.36764705882352944, "step": 126, "batch_size": 128, "mean": 11.43801498413086, "std": 13.951143264770508, "min": -17.344284057617188, "p10": -3.060989379882811, "median": 9.77166748046875, "p90": 28.65810222625732, "max": 72.28468322753906, "pos_frac": 0.828125, "sample": [33.79136657714844, 3.148923873901367, 18.618270874023438, 7.6943359375, 5.342948913574219, -6.207588195800781, 5.2894134521484375, 31.642303466796875, -7.84686279296875, 48.87413787841797, 10.628320693969727, 20.01769256591797, 12.753303527832031, 13.472869873046875, 1.0621566772460938, 19.711315155029297, 28.370376586914062, 11.6846923828125, -10.529136657714844, 26.31827163696289, -1.4617996215820312, 6.860363006591797, 15.72346305847168, 51.01691436767578, 10.410148620605469, 17.86847686767578, -9.683464050292969, 12.899288177490234, 15.667709350585938, 14.37017822265625, 8.898880004882812, 5.1925201416015625, 18.122360229492188, 3.760408401489258, 27.00091552734375, -11.613960266113281, 15.532341003417969, 9.728889465332031, -7.0241851806640625, -8.311023712158203, -0.32501983642578125, 2.0806198120117188, -9.898120880126953, -6.760808944702148, 12.736862182617188, 52.70830535888672, 1.224151611328125, -1.6201095581054688, 11.975227355957031, 8.002824783325195, 10.973188400268555, 9.814445495605469, -10.669647216796875, 22.982681274414062, 19.361175537109375, 33.60707092285156, 3.3374557495117188, 24.07623291015625, 8.456293106079102, 3.9011802673339844, 0.9667739868164062, 13.622711181640625, 4.505575180053711, 14.9913330078125, 52.312477111816406, 19.363506317138672, 16.098251342773438, 20.433012008666992, 7.851097106933594, -2.6943206787109375, -1.0603179931640625, 22.068389892578125, 9.695562362670898, 15.257148742675781, 10.755901336669922, 18.445301055908203, 14.364120483398438, -3.9165496826171875, 12.507244110107422, 16.451122283935547, 6.365367889404297, 4.968231201171875, 17.540069580078125, -1.9179763793945312, 16.820396423339844, 8.508296966552734, 17.135723114013672, 33.47865295410156, 9.115856170654297, 29.3294620513916, 10.0787353515625, -17.344284057617188, 6.401252746582031, 24.5281982421875, 5.835243225097656, 12.55279541015625, 3.7161712646484375, 23.012104034423828, 13.274169921875, 1.6276817321777344, 30.498722076416016, 1.6234664916992188, 4.627464294433594, 20.37468719482422, 29.336624145507812, 7.29798698425293, -2.3454837799072266, -7.746337890625, 7.1940460205078125, 2.3470726013183594, 5.1544342041015625, 12.992462158203125, 2.583160400390625, -1.9626045227050781, 22.844818115234375, 12.45654296875, 7.620124816894531, 30.522438049316406, 7.627632141113281, 2.9891929626464844, 1.3903274536132812, 16.772415161132812, 72.28468322753906, -2.5070343017578125, 14.471534729003906, 0.5906639099121094, 5.676853179931641, 7.545921325683594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000126.npy"}
{"epoch": 0.37058823529411766, "step": 127, "batch_size": 128, "mean": 11.948171615600586, "std": 13.57351303100586, "min": -31.0452880859375, "p10": -4.060284042358398, "median": 10.663081169128418, "p90": 28.865316772460933, "max": 48.92472839355469, "pos_frac": 0.828125, "sample": [6.9821624755859375, 8.599346160888672, 23.0050048828125, 10.235641479492188, -3.5838470458984375, 5.244140625, 35.38996887207031, 14.476982116699219, 28.225921630859375, 2.9732322692871094, 12.366569519042969, 22.069921493530273, -13.541791915893555, -31.0452880859375, 23.53271484375, 38.898895263671875, 8.440746307373047, 24.49932861328125, 25.662567138671875, 19.6204833984375, 6.846336364746094, -6.3943023681640625, 36.53778076171875, 20.598121643066406, -9.579368591308594, -1.5809097290039062, 14.058597564697266, 7.338949203491211, -9.84857177734375, 2.8405227661132812, -1.064910888671875, 6.922216415405273, 44.489830017089844, 6.12396240234375, 7.70941162109375, 19.573165893554688, 19.096012115478516, 3.3044662475585938, 33.891326904296875, 13.125663757324219, 17.907913208007812, 5.286983489990234, 30.35723876953125, 0.17256927490234375, -5.0974273681640625, 1.6216506958007812, -7.7393951416015625, 17.827438354492188, 14.852962493896484, 8.727546691894531, 21.13134765625, 43.821571350097656, 21.578079223632812, 4.085212707519531, 10.942955017089844, 9.179428100585938, 11.511253356933594, 18.035606384277344, 19.786388397216797, 0.923187255859375, -2.7964248657226562, -2.6080780029296875, 14.556434631347656, 23.706390380859375, 19.409393310546875, -3.941944122314453, 20.49932098388672, 6.8140716552734375, 17.28067970275879, 21.873153686523438, 18.36700439453125, -2.3548851013183594, 10.130653381347656, 11.891487121582031, 11.65380859375, 38.98224639892578, -0.04895782470703125, 21.158485412597656, 21.431533813476562, 26.937088012695312, 4.090520858764648, 3.83892822265625, 1.3429393768310547, 22.79730987548828, 11.317550659179688, 21.415985107421875, 7.726551055908203, 14.352533340454102, 7.744020462036133, 10.583614349365234, 34.962890625, 0.858245849609375, 14.554519653320312, 10.072921752929688, -6.4703216552734375, 48.92472839355469, 1.4369659423828125, 7.326435089111328, -2.3518295288085938, 27.130340576171875, 24.323974609375, 10.742547988891602, 1.5441875457763672, -8.123523712158203, 25.458404541015625, -16.44662857055664, 27.458786010742188, 4.727394104003906, -4.3364105224609375, 12.623828887939453, 12.876047134399414, 33.60664749145508, 2.1142196655273438, -11.640060424804688, 3.4550247192382812, 34.9014778137207, 7.12672233581543, 10.105491638183594, 1.8604087829589844, 18.06609344482422, 14.161003112792969, 27.954727172851562, 5.5842132568359375, 4.48785400390625, 7.294029235839844, 31.673965454101562, 16.963163375854492, -4.747459411621094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000127.npy"}
{"epoch": 0.3735294117647059, "step": 128, "batch_size": 128, "mean": 12.99955940246582, "std": 14.025705337524414, "min": -22.662399291992188, "p10": -2.818960571289061, "median": 11.054227828979492, "p90": 30.59740486145019, "max": 66.06320190429688, "pos_frac": 0.828125, "sample": [66.06320190429688, 9.256561279296875, -5.110103607177734, 16.493621826171875, 20.192230224609375, -3.70013427734375, -9.37990951538086, 6.42767333984375, -0.393707275390625, 3.0383262634277344, 6.80859375, 24.224491119384766, 39.90303039550781, 9.091392517089844, 7.890724182128906, 6.064792633056641, 11.815475463867188, 34.59172821044922, 39.26445388793945, 11.29815673828125, 39.98904037475586, 14.577743530273438, 16.527206420898438, 39.185733795166016, -2.441314697265625, 46.690216064453125, 29.741180419921875, -4.160255432128906, -2.3967132568359375, -1.0437812805175781, -15.214759826660156, 7.534393310546875, -22.662399291992188, 14.792388916015625, 13.825454711914062, 6.142152786254883, 5.5855865478515625, 10.428670883178711, 28.580970764160156, 16.34864044189453, 6.184276580810547, 6.297935485839844, -11.18614387512207, 35.02513885498047, 6.219398498535156, 0.3641777038574219, 7.92779541015625, 30.234447479248047, 18.47106170654297, -5.403520584106445, 27.552906036376953, 4.664794921875, 14.698251724243164, 16.000198364257812, -0.0188446044921875, 24.774517059326172, 10.941219329833984, 11.408683776855469, 12.779556274414062, 19.152912139892578, -7.7328338623046875, 6.629386901855469, 20.797042846679688, 26.383102416992188, 15.633644104003906, 43.642372131347656, 10.594535827636719, 20.043106079101562, 14.320144653320312, 17.299179077148438, 7.218526840209961, 12.153114318847656, 4.549900054931641, 13.596027374267578, -6.583106994628906, 2.9019622802734375, -1.5247039794921875, 8.300933837890625, -0.355255126953125, 27.413482666015625, 13.505329132080078, 11.511871337890625, 2.0191268920898438, 14.231380462646484, 11.167236328125, 0.0889892578125, 27.32659912109375, 4.537590026855469, 6.112693786621094, 14.709375381469727, 5.0077362060546875, 5.724052429199219, 15.888351440429688, 24.446563720703125, 13.93349838256836, 45.656517028808594, -6.413555145263672, 16.63946533203125, 9.275764465332031, 23.347900390625, 31.444305419921875, 32.48960876464844, 29.582935333251953, 1.424072265625, 7.6150665283203125, 14.85543441772461, 9.156726837158203, -0.38776397705078125, 9.86737060546875, 12.386375427246094, 24.00033187866211, -1.2328033447265625, -6.520668029785156, 7.33648681640625, 10.643569946289062, 4.127967834472656, 6.386688232421875, 7.219474792480469, 14.216915130615234, 25.20287322998047, 9.195655822753906, -5.096805572509766, 23.963272094726562, 29.384689331054688, 20.348838806152344, 28.359891891479492, 9.17131233215332, 42.84515380859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000128.npy"}
{"epoch": 0.3764705882352941, "step": 129, "batch_size": 128, "mean": 13.170269966125488, "std": 15.811575889587402, "min": -37.797637939453125, "p10": -2.0960783004760737, "median": 12.361425399780273, "p90": 36.93915710449218, "max": 52.2513427734375, "pos_frac": 0.828125, "sample": [19.033935546875, 2.5078811645507812, 8.948974609375, 16.56934928894043, 14.871341705322266, -2.7055511474609375, 28.554901123046875, 41.68049621582031, 28.79936981201172, 19.79463768005371, 0.6365833282470703, 42.465087890625, 11.469085693359375, 45.99644470214844, 46.514034271240234, -7.142982482910156, 16.383808135986328, 44.3485107421875, 14.379463195800781, -1.110260009765625, 9.903379440307617, 0.5615768432617188, 13.550537109375, -6.5032958984375, 51.781951904296875, 15.634620666503906, 13.344261169433594, 9.960063934326172, 0.6050872802734375, 10.198074340820312, 12.932380676269531, 52.2513427734375, 31.35999298095703, 11.205253601074219, -6.054004669189453, -2.3428115844726562, 8.827743530273438, 28.420440673828125, 18.08667755126953, 5.2701873779296875, 12.018974304199219, 8.802433013916016, 6.018688201904297, -0.3492584228515625, 1.587472915649414, 35.981414794921875, 17.12896728515625, 4.163734436035156, 5.141853332519531, 16.609153747558594, 4.988487243652344, 45.16035842895508, 18.46440887451172, 13.316375732421875, 12.789043426513672, 11.736265182495117, 12.706008911132812, -0.7124042510986328, 6.89422607421875, -13.12809944152832, 0.7342338562011719, 9.682868957519531, 6.571876525878906, 25.467910766601562, 26.818336486816406, 2.1510448455810547, -0.28987884521484375, 18.183486938476562, 7.285980224609375, -0.563690185546875, 41.367164611816406, -25.140411376953125, 2.2659225463867188, 5.060310363769531, 22.237930297851562, 14.879409790039062, 12.703876495361328, 23.390146255493164, 48.24333953857422, 14.3885498046875, 19.879653930664062, 6.2323150634765625, -8.762418746948242, -0.7000350952148438, 16.435157775878906, -37.797637939453125, -1.8866043090820312, 13.062215805053711, -5.519083023071289, 12.979827880859375, 39.17388916015625, -7.951194763183594, 14.7786865234375, 33.753482818603516, 16.545310974121094, 25.597270965576172, 12.780754089355469, 4.0948638916015625, -1.990335464477539, -6.510398864746094, 1.4348030090332031, 11.794349670410156, 23.71036720275879, -1.0524978637695312, 45.931976318359375, 3.5475234985351562, 10.35516357421875, 6.486026763916016, 1.5504302978515625, 17.701122283935547, 12.876850128173828, 24.606903076171875, 30.82331085205078, 8.030220031738281, 5.264427185058594, 13.631160736083984, 6.719730377197266, -25.302841186523438, 46.79852294921875, 7.4394683837890625, 17.014297485351562, 29.00457763671875, 20.56853485107422, 27.616222381591797, 26.9110107421875, 1.8202629089355469, 13.02130126953125, 7.550510406494141], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000129.npy"}
{"epoch": 0.37941176470588234, "step": 130, "batch_size": 128, "mean": 11.169023513793945, "std": 14.326637268066406, "min": -33.401397705078125, "p10": -3.711578941345215, "median": 9.996437072753906, "p90": 26.252699661254884, "max": 75.52166748046875, "pos_frac": 0.8125, "sample": [18.587467193603516, -1.8509635925292969, 9.327102661132812, 19.363876342773438, 4.026439666748047, -33.401397705078125, 16.65094757080078, 37.47765350341797, 3.5072097778320312, -10.107681274414062, 27.609420776367188, 16.883392333984375, 20.600967407226562, -2.096527099609375, 5.35125732421875, 6.295368194580078, 28.92852783203125, 17.299232482910156, -10.272506713867188, 10.499786376953125, 4.455253601074219, 25.178359985351562, -2.8418350219726562, 26.410778045654297, -0.65264892578125, 14.871810913085938, -3.3475189208984375, 25.231929779052734, 24.217937469482422, 6.9927978515625, 25.47186279296875, -10.276718139648438, 34.81109619140625, 9.615859985351562, 2.3760757446289062, -14.903160095214844, 15.077644348144531, -7.551219940185547, 11.401351928710938, 3.082530975341797, 25.108505249023438, 3.662860870361328, 17.705814361572266, 6.12611198425293, 47.971832275390625, -10.868579864501953, 7.377166748046875, -3.4866104125976562, 6.448272705078125, 5.103424072265625, 22.801902770996094, 5.019493103027344, -3.5724334716796875, 1.7611885070800781, 14.389762878417969, 27.485992431640625, 9.924802780151367, 14.101150512695312, 6.7710723876953125, -3.703664779663086, -0.6540374755859375, 9.819175720214844, 14.127235412597656, 10.452934265136719, 15.525344848632812, 1.521707534790039, 4.3698883056640625, -3.7300453186035156, 19.463430404663086, 15.191646575927734, 13.929088592529297, 8.283447265625, 16.664657592773438, 1.1419677734375, 14.955558776855469, 33.93952941894531, -17.718677520751953, 8.204828262329102, 11.101848602294922, 20.588993072509766, 16.212387084960938, 19.950698852539062, 1.488424301147461, -3.1112518310546875, 55.736785888671875, 8.637481689453125, 8.658275604248047, 14.9866943359375, 15.063739776611328, 15.309829711914062, 15.021270751953125, -4.448516845703125, -3.2894134521484375, 27.457786560058594, 17.94996452331543, 7.762287139892578, 31.718231201171875, 10.0511474609375, 10.932231903076172, 24.051918029785156, 3.20330810546875, 9.941726684570312, 20.257705688476562, 75.52166748046875, -3.9917373657226562, 3.3695526123046875, 16.2147274017334, 5.50396728515625, 17.37354278564453, 18.654308319091797, 2.0211009979248047, 24.922119140625, -8.336170196533203, 14.665327072143555, 12.466194152832031, 4.512748718261719, 11.274738311767578, 9.0213623046875, 1.225341796875, 2.987628936767578, 4.0042572021484375, 15.8011474609375, 1.7148151397705078, 19.014495849609375, -5.275421142578125, 50.92660903930664, 14.706720352172852, 26.184951782226562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000130.npy"}
{"epoch": 0.38235294117647056, "step": 131, "batch_size": 128, "mean": 12.677631378173828, "std": 13.388530731201172, "min": -17.81757354736328, "p10": -2.1741390228271484, "median": 10.973214149475098, "p90": 30.998920440673828, "max": 54.8206787109375, "pos_frac": 0.828125, "sample": [26.174423217773438, 23.88824462890625, 7.19976806640625, -0.953033447265625, 0.839263916015625, 54.8206787109375, 7.3889007568359375, 31.171920776367188, 0.9037895202636719, 10.950294494628906, 20.86371612548828, -0.27906036376953125, 10.790779113769531, 15.813125610351562, -2.6643543243408203, 2.7016372680664062, 1.9821434020996094, 20.846118927001953, 23.53803253173828, -0.5681896209716797, 28.535438537597656, 9.4359130859375, 29.362869262695312, 15.5233154296875, -5.4711151123046875, 8.500520706176758, 35.804264068603516, 16.32354736328125, 6.499298095703125, -5.864051818847656, -17.81757354736328, 17.638362884521484, -0.19672012329101562, 7.959564208984375, -4.509246826171875, 6.608278274536133, -2.09906005859375, 18.321151733398438, 14.95821762084961, 15.106910705566406, 30.930702209472656, 6.882171630859375, 36.25556182861328, 16.071914672851562, 35.905189514160156, 5.401344299316406, -2.2981605529785156, 14.316688537597656, 54.67108154296875, 13.674278259277344, 21.8221435546875, 45.03550720214844, 0.8883419036865234, 1.0748748779296875, 0.4238090515136719, 19.651460647583008, 17.114364624023438, -5.550683975219727, 32.45209503173828, 3.6210250854492188, 1.928558349609375, -2.8788070678710938, 36.13152313232422, -0.1506500244140625, 4.8642425537109375, 17.068939208984375, 4.684173583984375, 15.875534057617188, -2.1209869384765625, 3.10247802734375, 30.656463623046875, 31.158096313476562, 12.652397155761719, 10.734310150146484, -3.565898895263672, 9.815460205078125, 17.829866409301758, 6.927936553955078, 2.314737319946289, 32.11578369140625, 8.716644287109375, 8.318225860595703, 24.28638458251953, -0.5838661193847656, 22.25811767578125, 8.507057189941406, 14.462791442871094, 14.321815490722656, 11.352691650390625, 0.9805755615234375, 0.912261962890625, 14.641803741455078, 11.422897338867188, 44.96168518066406, 5.951131820678711, 1.49505615234375, 23.560985565185547, 8.698860168457031, -9.44683837890625, -11.907211303710938, 10.996133804321289, 19.842788696289062, 8.876434326171875, -4.9619293212890625, 20.193668365478516, 14.216110229492188, 2.8949127197265625, 27.456951141357422, 28.158348083496094, -1.5962677001953125, 1.9641952514648438, 4.39605712890625, 5.003570556640625, 15.939178466796875, 15.830711364746094, 28.671340942382812, 13.593355178833008, 42.68049621582031, 14.304290771484375, -3.825592041015625, 12.111282348632812, 11.607994079589844, 10.590438842773438, 19.21868133544922, 8.409370422363281, 24.14051055908203, 21.211380004882812, 20.38542938232422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000131.npy"}
{"epoch": 0.38529411764705884, "step": 132, "batch_size": 128, "mean": 12.452516555786133, "std": 12.223723411560059, "min": -20.935073852539062, "p10": -0.9305526733398437, "median": 11.064081192016602, "p90": 30.332655334472655, "max": 55.89909362792969, "pos_frac": 0.8828125, "sample": [18.0828857421875, -5.52252197265625, 7.40608024597168, 16.25018310546875, 16.811080932617188, 20.949729919433594, 22.48676300048828, 4.306877136230469, 14.568256378173828, 36.942413330078125, 8.393730163574219, 7.0139923095703125, -5.537162780761719, 2.3767318725585938, 19.98995590209961, 17.383941650390625, 17.09369659423828, 5.087394714355469, 11.969783782958984, 30.758255004882812, 10.066593170166016, 4.7829437255859375, 20.149585723876953, 2.058349609375, 16.43965721130371, 18.462482452392578, 11.376029968261719, 37.34705352783203, 41.60182189941406, -14.919811248779297, 6.6707763671875, 19.75518798828125, 3.1703338623046875, 15.240806579589844, 15.2366943359375, 23.186086654663086, 4.213953018188477, 17.761550903320312, 12.486175537109375, 9.608783721923828, 0.16048240661621094, 8.814083099365234, 8.015029907226562, 14.901399612426758, -4.038787841796875, 10.635372161865234, 13.658226013183594, -0.5350761413574219, 3.913240432739258, 22.227745056152344, 19.304513931274414, 11.318016052246094, 26.392826080322266, 25.88494873046875, 22.586864471435547, 8.1689453125, 12.023658752441406, 13.163047790527344, 4.291465759277344, 26.11138916015625, 35.04088592529297, 55.89909362792969, 6.33404541015625, 8.870574951171875, 20.476320266723633, 13.603569030761719, 6.534648895263672, 37.639617919921875, 10.81014633178711, -0.9147872924804688, 4.9335784912109375, 34.66847229003906, 31.843826293945312, 21.023971557617188, 9.284431457519531, 7.057952880859375, 3.0906982421875, 21.673980712890625, -1.546356201171875, 16.211288452148438, 18.99070167541504, 5.108917236328125, -0.9673385620117188, 14.880378723144531, 12.206253051757812, 6.944488525390625, 11.484840393066406, 37.29431915283203, 3.5338821411132812, 3.81768798828125, 35.098697662353516, -5.152046203613281, 23.943763732910156, 17.558982849121094, 3.1812915802001953, 17.037973403930664, 8.452278137207031, 33.79435729980469, 1.081787109375, 1.8501758575439453, 7.325254440307617, -20.935073852539062, 19.63422203063965, 7.1622772216796875, 2.8068103790283203, 14.382087707519531, 6.8397064208984375, 2.5636672973632812, 4.415966033935547, 10.644943237304688, 0.4061698913574219, -3.5574798583984375, 5.985389709472656, 15.204231262207031, 30.24090576171875, -1.2633285522460938, 0.0711669921875, 28.82811737060547, 26.33917236328125, -4.0282440185546875, 0.11567306518554688, 13.441160202026367, -4.9506378173828125, 30.546737670898438, 15.158706665039062, 10.759811401367188, 4.748920440673828, -10.146110534667969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000132.npy"}
{"epoch": 0.38823529411764707, "step": 133, "batch_size": 128, "mean": 12.217084884643555, "std": 15.100349426269531, "min": -16.02727508544922, "p10": -3.9432807922363278, "median": 8.706405639648438, "p90": 34.007773590087886, "max": 60.43334197998047, "pos_frac": 0.7890625, "sample": [1.5509719848632812, 38.88372039794922, 21.03418731689453, 21.34943389892578, 3.590740203857422, -4.89796257019043, 2.8536529541015625, 3.34539794921875, 3.0362014770507812, -9.198360443115234, 16.968772888183594, 12.459037780761719, 9.470932006835938, 40.77388000488281, -11.810371398925781, 56.19349670410156, 16.42247772216797, 8.622444152832031, -2.7715835571289062, -0.18358993530273438, 1.6650962829589844, 5.886810302734375, 16.525405883789062, -0.811920166015625, -1.489593505859375, 14.007476806640625, 17.083084106445312, -8.151321411132812, 17.650131225585938, 24.78900909423828, 11.98263931274414, 22.478797912597656, 45.513885498046875, -1.939056396484375, 36.671485900878906, 5.916053771972656, 4.3871002197265625, -9.233711242675781, -0.8289337158203125, 8.13465690612793, 0.1675262451171875, 34.74763488769531, 13.614738464355469, 60.43334197998047, 7.668056488037109, 28.529159545898438, -3.9032516479492188, 5.6983642578125, 21.812583923339844, 6.597930908203125, -12.667581558227539, 10.7196044921875, 12.146951675415039, 0.8878097534179688, -4.513587951660156, 31.181941986083984, 21.612350463867188, 4.053995132446289, 14.186519622802734, -11.355487823486328, 30.241230010986328, 0.36493873596191406, 7.360935211181641, 9.303773880004883, 6.9944000244140625, 33.756752014160156, 13.789529800415039, 16.188888549804688, 19.02020263671875, 21.251571655273438, 1.673065185546875, 8.619770050048828, 23.356414794921875, 25.20733642578125, 24.716455459594727, 3.244129180908203, -2.9765625, -9.722732543945312, -8.227149963378906, 34.59349060058594, 46.813385009765625, 12.247329711914062, 12.620903015136719, 7.99859619140625, 24.69605255126953, 32.61041259765625, 2.1061439514160156, 16.095504760742188, 41.92900466918945, 7.3084716796875, -4.03668212890625, 2.9889354705810547, 6.9021148681640625, 9.36065673828125, -3.195209503173828, 10.340827941894531, 19.583709716796875, 45.75665283203125, 44.33536911010742, 35.99501419067383, -0.420562744140625, 23.397846221923828, -3.8452072143554688, 13.896141052246094, 8.30999755859375, 24.019180297851562, 2.938629150390625, 7.661115646362305, -0.24740982055664062, 29.747055053710938, 27.733051300048828, 22.363937377929688, 8.280956268310547, 7.942832946777344, 3.2953968048095703, 15.14924430847168, 17.389612197875977, 8.790367126464844, -1.5021209716796875, 3.98876953125, 0.019617080688476562, 2.521991729736328, -16.02727508544922, 32.477699279785156, -4.9529266357421875, 12.668853759765625, -3.0356884002685547, 10.463077545166016], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000133.npy"}
{"epoch": 0.3911764705882353, "step": 134, "batch_size": 128, "mean": 12.698369979858398, "std": 12.700094223022461, "min": -19.955474853515625, "p10": -1.1350622177124023, "median": 11.564261436462402, "p90": 29.151518249511717, "max": 77.12747192382812, "pos_frac": 0.8671875, "sample": [4.703311920166016, 9.640487670898438, 17.243247985839844, -1.1210803985595703, 20.326082229614258, 4.955045700073242, 27.043380737304688, 4.073997497558594, 0.836029052734375, 20.42529296875, 36.16548156738281, 77.12747192382812, 10.042030334472656, 8.347145080566406, 21.296062469482422, -2.97052001953125, 40.528228759765625, 7.558868408203125, 4.094440460205078, 20.067291259765625, 9.613542556762695, 2.7731056213378906, 4.600914001464844, 12.875, 18.200546264648438, 29.132278442382812, 8.096755981445312, 10.283241271972656, 4.377338409423828, 24.107826232910156, 26.477134704589844, 12.117761611938477, 8.26165771484375, 19.596912384033203, -14.07843017578125, -2.86065673828125, 7.22607421875, 13.247507095336914, 14.374465942382812, 7.766727447509766, -0.8260345458984375, 7.454120635986328, 11.896026611328125, 5.9161376953125, 29.52178955078125, 22.394241333007812, 25.4249267578125, 11.2440185546875, 21.884521484375, 1.5052337646484375, 4.920469284057617, 25.107650756835938, -9.7677001953125, 9.655906677246094, 2.2092132568359375, 31.870391845703125, 18.367233276367188, -1.1676864624023438, 0.45044517517089844, 12.254953384399414, 39.654762268066406, 19.681671142578125, 29.894481658935547, 11.52029800415039, -0.8960304260253906, 32.14469909667969, 13.627805709838867, 11.608224868774414, 12.863479614257812, 3.7927188873291016, 15.740936279296875, 7.385343551635742, 1.9471359252929688, 6.408054351806641, -3.9180755615234375, 26.340335845947266, 8.297595977783203, 22.5802001953125, 6.326671600341797, 27.547971725463867, 13.496013641357422, 5.991415023803711, 39.88749694824219, 8.785285949707031, 15.038461685180664, 20.53426742553711, 14.6146240234375, 14.867616653442383, 10.371192932128906, 14.956809997558594, 15.829231262207031, 12.202743530273438, 12.189407348632812, 14.328062057495117, -1.9427967071533203, 10.09283447265625, 16.846763610839844, 16.394245147705078, 36.60862731933594, -19.955474853515625, 18.166259765625, 6.9964447021484375, 27.63254165649414, 0.2979278564453125, 5.6039581298828125, 29.786727905273438, 15.424118041992188, 29.935333251953125, 0.6707706451416016, 8.547714233398438, 4.102396011352539, -3.1855621337890625, 19.04004669189453, 29.1964111328125, -4.9671173095703125, -0.47612953186035156, 18.76850128173828, 4.2112884521484375, 10.353469848632812, 12.396209716796875, 18.411678314208984, -4.138092041015625, 19.925735473632812, 18.862316131591797, 6.427337646484375, -8.439376831054688, 3.2289657592773438, -1.961578369140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000134.npy"}
{"epoch": 0.3941176470588235, "step": 135, "batch_size": 128, "mean": 12.915111541748047, "std": 13.840676307678223, "min": -19.804100036621094, "p10": -3.727539825439452, "median": 12.158042907714844, "p90": 31.180243682861327, "max": 48.2283935546875, "pos_frac": 0.796875, "sample": [13.55657958984375, 29.926795959472656, 11.021385192871094, 17.476104736328125, 3.276153564453125, 6.451290130615234, 20.599891662597656, 11.734718322753906, 15.100017547607422, -0.5906753540039062, 7.24493408203125, -11.383926391601562, 36.874908447265625, 5.2494659423828125, 2.0318069458007812, -19.804100036621094, 36.36334228515625, 37.03312683105469, -2.3618850708007812, 27.67450714111328, 28.4664306640625, 14.878910064697266, 26.871414184570312, 18.577117919921875, 29.6466064453125, 21.609466552734375, 19.456024169921875, 10.011390686035156, 20.866111755371094, 40.05072021484375, 3.3314685821533203, 17.875503540039062, -0.0471038818359375, -3.0691299438476562, 26.095806121826172, 1.6414661407470703, 21.149368286132812, 25.108627319335938, 19.543777465820312, 20.758453369140625, 3.767364501953125, 17.005157470703125, 11.24997329711914, 25.44769287109375, 31.13622283935547, 4.792266845703125, 38.14630126953125, 2.7807159423828125, -2.545196533203125, 6.518863677978516, 3.058990478515625, -7.97528076171875, 2.7087459564208984, 23.92474365234375, 16.763107299804688, 15.540901184082031, -3.2878875732421875, 12.372116088867188, -3.450347900390625, 20.654884338378906, 21.590179443359375, 41.133056640625, -9.98825454711914, 16.096515655517578, -0.03542327880859375, 34.09807205200195, 29.03662872314453, -9.883941650390625, -12.606819152832031, 2.584259033203125, 15.656723022460938, 12.201004028320312, 15.806770324707031, 10.944097518920898, 6.0706329345703125, 24.21044921875, 11.321281433105469, 12.115081787109375, 18.816726684570312, 15.111534118652344, 48.2283935546875, -0.06592178344726562, -4.374320983886719, 14.143943786621094, 36.807044982910156, 3.489917755126953, 16.455780029296875, 11.861673355102539, 8.896072387695312, 11.395515441894531, 27.97332763671875, -0.377471923828125, -6.110939025878906, -2.561330795288086, 3.34130859375, 20.402366638183594, 29.108078002929688, 31.282958984375, 19.88831329345703, 10.041122436523438, 6.549644470214844, 6.19560432434082, 1.6431503295898438, 7.765842437744141, 27.812103271484375, 15.0889892578125, 0.5496749877929688, 7.477020263671875, 29.80181884765625, 6.694892883300781, -8.881813049316406, 18.11750030517578, 31.64837646484375, -7.396888732910156, 19.098194122314453, -1.7210006713867188, 5.109840393066406, 10.73916244506836, -8.104717254638672, 29.1602783203125, -2.0582504272460938, 0.2543487548828125, -6.30426025390625, 13.468894958496094, 21.092529296875, 35.64886474609375, -9.96225357055664, 40.63612365722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000135.npy"}
{"epoch": 0.39705882352941174, "step": 136, "batch_size": 128, "mean": 13.579862594604492, "std": 12.998879432678223, "min": -22.410186767578125, "p10": -0.11257286071777287, "median": 12.450740814208984, "p90": 31.432032775878906, "max": 40.462799072265625, "pos_frac": 0.8984375, "sample": [35.73402404785156, 26.902679443359375, 16.176822662353516, 4.033153533935547, 10.5186767578125, 0.09519195556640625, 9.098976135253906, 31.880416870117188, 2.47735595703125, 23.580326080322266, 3.3842391967773438, 1.299856185913086, 0.045276641845703125, 3.0615234375, 29.865570068359375, 13.573974609375, 9.699028015136719, 1.720184326171875, -0.48088836669921875, 17.46027374267578, 0.5599594116210938, 0.15714645385742188, 6.013664245605469, 1.0615825653076172, 30.820499420166016, 8.55572509765625, 29.57312774658203, 12.19793701171875, 5.821155548095703, 2.94232177734375, 26.0040283203125, -7.890674591064453, 26.510147094726562, 1.2739372253417969, 6.076911926269531, -22.410186767578125, 27.023529052734375, 10.586807250976562, 9.285362243652344, 0.9128952026367188, 40.40283203125, 25.74718475341797, 11.49608039855957, 5.387886047363281, 9.730478286743164, 18.49273681640625, -5.700550079345703, 7.454736709594727, 10.21456527709961, 33.846923828125, 13.195024490356445, 1.496164321899414, 21.665172576904297, 6.095954895019531, 32.87774658203125, 2.999675750732422, 24.197250366210938, 26.616058349609375, 13.63604736328125, 25.206066131591797, 6.5151824951171875, 10.223705291748047, 13.486251831054688, 18.342517852783203, 14.797767639160156, 2.9697494506835938, 10.854255676269531, -6.9322509765625, 10.390670776367188, 30.293807983398438, 15.333938598632812, 31.06725311279297, 13.126518249511719, 11.491630554199219, 5.640380859375, 22.37267303466797, -8.682571411132812, 31.2398681640625, 27.532760620117188, 38.601715087890625, 10.430999755859375, 12.8863525390625, 5.915946960449219, 18.353652954101562, 40.16791534423828, -4.613334655761719, 21.979267120361328, 14.271537780761719, 5.48626708984375, 21.63937759399414, -5.285961151123047, 26.084064483642578, -6.5583953857421875, -5.443630218505859, -2.8180198669433594, 22.91394805908203, 36.43934631347656, 15.707725524902344, 26.081771850585938, 27.808517456054688, 3.2746543884277344, 17.328128814697266, -18.377012252807617, 13.997047424316406, 6.829917907714844, 8.509414672851562, 12.703544616699219, 30.675399780273438, 7.004737854003906, 38.82586669921875, 15.264938354492188, 16.94432830810547, 33.245880126953125, 18.130828857421875, 17.508056640625, 6.494482040405273, 19.0224609375, -3.9229583740234375, 35.05364990234375, 3.6397552490234375, 16.02367401123047, 4.205133438110352, 40.462799072265625, 2.4848861694335938, 12.924140930175781, 16.348121643066406, 36.832069396972656, 6.414588928222656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000136.npy"}
{"epoch": 0.4, "step": 137, "batch_size": 128, "mean": 11.993515014648438, "std": 15.155049324035645, "min": -55.68085479736328, "p10": -4.239518737792968, "median": 9.604711532592773, "p90": 33.68623313903808, "max": 58.737510681152344, "pos_frac": 0.828125, "sample": [16.243789672851562, 36.11243438720703, 6.454734802246094, 12.288421630859375, 35.17401885986328, 57.95164489746094, 0.310791015625, 10.624275207519531, 1.3410015106201172, 3.8699989318847656, 28.41726303100586, 25.64523696899414, 20.597393035888672, 7.894994735717773, 0.141021728515625, -2.6142921447753906, 28.741531372070312, 33.08789825439453, 8.138683319091797, 44.033111572265625, 27.71377944946289, 22.204620361328125, 7.522468566894531, 18.13048553466797, 2.8057174682617188, -4.6014556884765625, 35.223167419433594, 5.41615104675293, -0.6779708862304688, 18.374786376953125, 13.956268310546875, 22.340721130371094, 23.587814331054688, 3.756134033203125, 17.323265075683594, 6.687225341796875, 19.121994018554688, 14.283369064331055, 0.17921066284179688, 18.210420608520508, 1.5914783477783203, 12.425636291503906, 9.49395751953125, 13.848794937133789, 6.867164611816406, 20.76825714111328, 4.620307922363281, 8.652587890625, -1.0983657836914062, 19.085556030273438, 17.92865753173828, -1.2342853546142578, 32.39567565917969, 20.583473205566406, 16.6649169921875, -2.444072723388672, -6.177909851074219, 3.9791030883789062, 4.8707122802734375, 3.5085220336914062, 13.640052795410156, -5.343898773193359, 20.19232177734375, 27.613876342773438, 15.222230911254883, 2.561227798461914, 9.715465545654297, -7.5237579345703125, 9.259994506835938, -4.1277923583984375, 19.407306671142578, -8.137395858764648, 8.01055908203125, 39.941078186035156, -21.71137237548828, -6.468757629394531, 11.229721069335938, -1.4321098327636719, 12.004047393798828, 7.028820037841797, 10.222160339355469, 6.821220397949219, 5.580699920654297, 18.212692260742188, 36.42015838623047, 5.1857757568359375, 41.99518585205078, 0.4189300537109375, 5.62101936340332, 40.2237548828125, 14.910781860351562, 3.2816848754882812, 1.7205848693847656, -7.386940002441406, 18.387935638427734, 16.92314910888672, 1.5805835723876953, 6.672615051269531, 15.72365951538086, 35.08234786987305, 28.316070556640625, 20.209732055664062, -1.5822219848632812, 14.490341186523438, -55.68085479736328, 22.340560913085938, 58.737510681152344, -4.500213623046875, 3.9125137329101562, 38.54231262207031, 3.718412399291992, 5.9429931640625, 36.76906204223633, 7.2309112548828125, 13.490943908691406, 5.507194519042969, 14.182182312011719, -7.5242156982421875, -5.220252990722656, 9.16325569152832, -0.9203376770019531, -5.144596099853516, 7.450836181640625, 13.809616088867188, 11.915176391601562, 28.72464370727539, 3.4942779541015625, 16.772159576416016], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000137.npy"}
{"epoch": 0.40294117647058825, "step": 138, "batch_size": 128, "mean": 11.923599243164062, "std": 12.877086639404297, "min": -11.727291107177734, "p10": -2.5872741699218746, "median": 10.304117202758789, "p90": 31.272022247314453, "max": 57.068702697753906, "pos_frac": 0.8515625, "sample": [-0.6609725952148438, -0.2165813446044922, 14.559825897216797, 5.044990539550781, 2.8529186248779297, 1.2313079833984375, 27.837005615234375, 10.523422241210938, 6.119899749755859, 14.791275024414062, -3.45965576171875, 13.604591369628906, 9.40158462524414, 12.876148223876953, 46.483001708984375, 10.08481216430664, 8.935073852539062, 12.732681274414062, -4.496620178222656, 10.97662353515625, -6.872123718261719, 5.912651062011719, -2.052705764770508, 0.28908538818359375, 25.307390213012695, 35.86895751953125, 25.792728424072266, -9.39204216003418, 22.366127014160156, 19.946407318115234, 9.760883331298828, 14.492767333984375, 57.068702697753906, 6.647342681884766, 9.455787658691406, 11.755485534667969, 40.69481658935547, 17.050586700439453, 5.574184417724609, 13.447402954101562, 5.51959228515625, 3.828338623046875, 5.050693511962891, 9.065811157226562, 22.62921142578125, 7.409233093261719, 4.782323837280273, 6.480232238769531, 13.242385864257812, 18.8272705078125, -10.60369873046875, 10.844612121582031, 17.831178665161133, 29.71416473388672, 13.880622863769531, 32.46928024291992, 0.2608184814453125, 34.85645294189453, -7.864051818847656, 13.018171310424805, 11.422393798828125, 17.659141540527344, -1.425811767578125, 18.841537475585938, 11.096145629882812, 38.13566589355469, 13.86431884765625, 8.585220336914062, 31.23003387451172, 19.745647430419922, -2.4430007934570312, 43.99420166015625, 2.2058639526367188, 5.647064208984375, 14.066993713378906, 12.460718154907227, 6.308340072631836, 0.34951019287109375, 6.073841094970703, 22.212234497070312, 25.269317626953125, -3.202056884765625, 40.58552551269531, 16.43490982055664, 4.05523681640625, -2.9239120483398438, 3.2653732299804688, 8.62989616394043, 26.75493621826172, 13.576789855957031, 42.60948181152344, 3.181446075439453, 9.018014907836914, 14.588996887207031, -1.2351531982421875, 20.88737678527832, 0.5483074188232422, 18.43343162536621, 6.631841659545898, 8.957115173339844, 1.4010772705078125, 3.3075904846191406, 11.824806213378906, 23.413475036621094, 7.7242584228515625, -6.8667449951171875, 0.054065704345703125, 12.713790893554688, 9.475692749023438, 18.52843475341797, -8.155494689941406, 4.8138275146484375, 0.5337429046630859, 7.688385009765625, -6.947635650634766, 17.084243774414062, 8.412311553955078, 1.2440032958984375, 12.682640075683594, 19.344436645507812, 14.112607955932617, -11.727291107177734, 32.21124267578125, 11.917274475097656, -5.397727966308594, 34.21271514892578, 11.569660186767578, 31.3699951171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000138.npy"}
{"epoch": 0.40588235294117647, "step": 139, "batch_size": 128, "mean": 12.775571823120117, "std": 12.38089370727539, "min": -12.105438232421875, "p10": -1.6811523437499991, "median": 11.519096374511719, "p90": 28.873039245605465, "max": 58.800537109375, "pos_frac": 0.84375, "sample": [10.463237762451172, 17.74654197692871, -1.4576034545898438, 11.596954345703125, 14.518787384033203, 25.8001708984375, 9.056991577148438, 16.035778045654297, -0.8932476043701172, 11.112201690673828, 16.4055118560791, 4.626182556152344, 18.08391571044922, 24.089462280273438, 19.239952087402344, 18.368759155273438, 9.772750854492188, 26.574356079101562, 15.474288940429688, -0.6120319366455078, 24.95362091064453, 17.344940185546875, 4.2389678955078125, 0.5147552490234375, 18.372848510742188, 15.660087585449219, -1.30035400390625, 26.957015991210938, 11.362403869628906, 3.5144691467285156, 3.240447998046875, 9.147890090942383, 2.8970489501953125, 7.3641815185546875, 15.465171813964844, 10.571863174438477, 33.24011993408203, -4.68181037902832, 16.383018493652344, 8.866592407226562, -7.9363250732421875, -2.2027664184570312, 27.140777587890625, -4.39349365234375, 2.8890380859375, 16.952789306640625, 2.034027099609375, 36.596771240234375, 14.531143188476562, 14.432758331298828, 24.891563415527344, 2.6667442321777344, 4.869319915771484, 15.225479125976562, 10.083824157714844, 29.584049224853516, 1.9568977355957031, 2.1778640747070312, 25.339706420898438, 10.758277893066406, 6.458345413208008, 21.123920440673828, 6.290445327758789, 8.035232543945312, 7.379980087280273, 37.20913314819336, -3.0091590881347656, 11.177753448486328, 13.119857788085938, 29.394332885742188, 21.7457275390625, 12.57370376586914, -2.6466903686523438, 24.424232482910156, 25.294586181640625, -12.105438232421875, 7.498897552490234, 58.800537109375, 17.731155395507812, 9.114501953125, 8.985183715820312, -11.744155883789062, 19.21445083618164, 30.23554229736328, -11.784637451171875, 3.160125732421875, 5.166738510131836, 22.886356353759766, 12.436599731445312, 20.108802795410156, 15.607892990112305, 9.473506927490234, 2.5730552673339844, -0.126617431640625, 26.146099090576172, 30.324050903320312, 32.281280517578125, 11.441238403320312, 3.9528045654296875, 12.251602172851562, 24.450368881225586, -5.920143127441406, 23.30508041381836, 13.668075561523438, 7.70721435546875, 25.502517700195312, 15.221145629882812, 0.08363533020019531, 18.743385314941406, 40.16626739501953, 41.946388244628906, -10.456306457519531, 4.783447265625, 33.361854553222656, 12.97042465209961, -2.988128662109375, 1.818115234375, 32.20906066894531, -1.2890548706054688, 12.825103759765625, -3.198772430419922, 6.907812118530273, 28.649627685546875, 0.9012680053710938, 23.249408721923828, 20.420318603515625, 10.487106323242188, -0.16771316528320312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000139.npy"}
{"epoch": 0.4088235294117647, "step": 140, "batch_size": 128, "mean": 14.026040077209473, "std": 13.56105899810791, "min": -10.496570587158203, "p10": -1.4039424896240233, "median": 14.160496711730957, "p90": 30.218843269348145, "max": 66.47454833984375, "pos_frac": 0.859375, "sample": [0.07110786437988281, 7.753215789794922, 6.691738128662109, 7.642601013183594, -4.2198944091796875, -6.221107482910156, 37.2213134765625, 15.430465698242188, 0.28676605224609375, 14.172664642333984, 24.975284576416016, 2.2263412475585938, 24.542312622070312, 20.58551788330078, 17.72698402404785, -8.792121887207031, 10.912925720214844, 12.552997589111328, -1.3945274353027344, 24.7003173828125, 0.6791954040527344, 66.47454833984375, 24.31378936767578, 22.520709991455078, 31.017311096191406, 9.544330596923828, 31.840911865234375, 27.66631317138672, 21.36004638671875, -5.288463592529297, 24.56731414794922, 1.3246116638183594, 2.8763885498046875, 2.805706024169922, 5.8587646484375, 16.29437255859375, 16.509971618652344, 4.0338134765625, 30.659278869628906, 27.710426330566406, 9.56795883178711, 14.14832878112793, 19.991661071777344, 7.8343963623046875, -3.1920089721679688, 56.88371276855469, 3.4739990234375, 32.05621337890625, 4.563468933105469, -10.496570587158203, 1.9244766235351562, -1.4259109497070312, 11.466176986694336, 35.39439392089844, 4.491493225097656, 13.46670913696289, 19.74029541015625, 25.94085693359375, -8.114370346069336, -1.0095291137695312, 18.767166137695312, 15.794189453125, -8.321889877319336, 13.924516677856445, 1.9591064453125, 24.672210693359375, 4.840667724609375, 18.989654541015625, -9.854812622070312, 6.020713806152344, 17.167343139648438, 23.634140014648438, 5.399570465087891, 54.201141357421875, 6.9066314697265625, 1.7198143005371094, -0.8258609771728516, 18.04979705810547, 30.173431396484375, 10.381050109863281, 15.147552490234375, 29.388015747070312, 14.510719299316406, 11.510456085205078, 5.496368408203125, 43.71621322631836, 6.5602569580078125, 22.64629554748535, 11.86187744140625, 26.025646209716797, 21.097545623779297, 27.032379150390625, 16.011962890625, 11.343507766723633, -4.737701416015625, 4.745780944824219, 15.063911437988281, 3.8582611083984375, 17.152618408203125, 15.395576477050781, 16.07465362548828, 19.47960662841797, 30.757568359375, 18.43096923828125, 21.609861373901367, 11.669391632080078, 24.536842346191406, 21.215652465820312, -0.8244476318359375, 25.142974853515625, 38.119903564453125, 16.667957305908203, 15.689224243164062, 0.080810546875, -10.163711547851562, 21.24968719482422, -2.3432083129882812, -0.712432861328125, 6.756406784057617, 22.522689819335938, 11.1185302734375, 11.111434936523438, 12.7982177734375, 22.734130859375, 30.324804306030273, 3.04632568359375, 18.78515625, 15.688339233398438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000140.npy"}
{"epoch": 0.4117647058823529, "step": 141, "batch_size": 128, "mean": 12.411436080932617, "std": 12.392840385437012, "min": -24.739883422851562, "p10": 0.2564422607421875, "median": 10.423828125, "p90": 25.249197006225586, "max": 80.84255981445312, "pos_frac": 0.90625, "sample": [1.9260387420654297, 41.11152648925781, 2.192106246948242, 28.72260284423828, 7.977350234985352, 10.450149536132812, 9.028045654296875, 18.41387176513672, 12.551445007324219, 19.028427124023438, 7.225429534912109, 8.878654479980469, 16.477783203125, 27.196380615234375, 17.31902313232422, 25.072357177734375, 16.073036193847656, 23.0723876953125, 15.20550537109375, -24.739883422851562, 7.94635009765625, 5.468086242675781, 6.940521240234375, 4.4311065673828125, 31.967010498046875, 31.29770278930664, -2.0389404296875, 11.534820556640625, -1.1123428344726562, -2.343505859375, 0.259490966796875, 9.518264770507812, 28.48029136657715, 7.950477600097656, 13.637596130371094, 23.167068481445312, 19.682945251464844, 8.901676177978516, 14.56214714050293, -2.5476531982421875, 10.24184799194336, 23.94725799560547, 19.571197509765625, 31.519474029541016, -7.913360595703125, 17.06409454345703, 12.018310546875, 3.243925094604492, 23.725955963134766, 4.4563751220703125, 6.274383544921875, 20.224578857421875, 22.610137939453125, 14.769144058227539, -1.1148147583007812, 9.189765930175781, -4.996297836303711, 26.0291748046875, 22.51702117919922, 9.036823272705078, 11.136865615844727, 9.528289794921875, 0.7413825988769531, 24.743560791015625, 28.017837524414062, 5.6407012939453125, 5.929363250732422, 14.112281799316406, 3.799978256225586, 26.278118133544922, 8.910194396972656, 5.144895553588867, 80.84255981445312, 18.879905700683594, 17.943191528320312, 22.728607177734375, 19.36016845703125, 10.993705749511719, 15.492666244506836, 4.6045989990234375, 7.830474853515625, -4.033683776855469, 14.253000259399414, 12.445098876953125, 2.6706466674804688, 11.580108642578125, 6.66253662109375, 10.594612121582031, 8.543869018554688, 15.052352905273438, 1.336343765258789, 3.2946090698242188, 11.004676818847656, 15.512285232543945, 7.0571441650390625, 4.1100006103515625, 0.43183135986328125, 21.919898986816406, 21.251548767089844, 3.980438232421875, 24.650367736816406, 0.4204254150390625, 0.24932861328125, 4.565521240234375, 17.620742797851562, 22.619476318359375, 15.15226936340332, 58.9111328125, 15.707542419433594, 13.225875854492188, 25.661823272705078, 0.8180046081542969, 2.7491302490234375, 9.506221771240234, 3.0717315673828125, 10.397506713867188, 23.187713623046875, 7.932258605957031, -1.115142822265625, 9.149974822998047, 9.23760986328125, 2.8005142211914062, -7.9369049072265625, 21.622573852539062, 5.691444396972656, 5.46272087097168, -1.6175384521484375, 15.294471740722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000141.npy"}
{"epoch": 0.4147058823529412, "step": 142, "batch_size": 128, "mean": 14.122258186340332, "std": 13.08596134185791, "min": -13.481895446777344, "p10": -1.8352989196777343, "median": 12.633525848388672, "p90": 34.89403839111328, "max": 62.493133544921875, "pos_frac": 0.84375, "sample": [0.2145843505859375, 5.303672790527344, 24.456954956054688, 2.477029800415039, 14.520133972167969, 35.291259765625, -3.8617706298828125, 11.110527038574219, -3.4210052490234375, 9.123188018798828, 16.06890869140625, 4.189189910888672, 39.168724060058594, 23.94781494140625, 10.720733642578125, 36.712158203125, 19.87804412841797, 16.609710693359375, 13.140106201171875, -3.893798828125, 29.718963623046875, 0.6710739135742188, 8.224416732788086, -0.4831809997558594, 12.611602783203125, 23.84058380126953, 20.579788208007812, 12.655448913574219, 23.53802490234375, 16.002037048339844, 14.992721557617188, 26.930679321289062, 31.944366455078125, -3.790811538696289, 5.27142333984375, 24.947967529296875, -13.481895446777344, -1.793365478515625, 2.7095870971679688, 19.86237335205078, 11.26988410949707, 9.34417724609375, -0.33925437927246094, 27.280494689941406, 10.398452758789062, 37.829708099365234, 14.51513671875, 26.319351196289062, 12.178947448730469, 28.652816772460938, 19.518165588378906, -2.4425086975097656, 4.510345458984375, -1.9331436157226562, 20.01470947265625, 11.256134033203125, 14.817359924316406, 8.74420166015625, 22.253524780273438, 23.87676239013672, 12.148895263671875, 9.637184143066406, 16.260162353515625, -5.3885498046875, -2.86273193359375, 11.369277954101562, 23.94476318359375, 18.991256713867188, 17.39543914794922, 6.407215118408203, 20.607269287109375, 14.573249816894531, 2.714183807373047, 12.027175903320312, 17.581771850585938, 44.113426208496094, 1.3572845458984375, 39.70538330078125, 10.619029998779297, 43.311134338378906, -1.9949722290039062, -0.1834259033203125, 3.6191864013671875, 18.06024169921875, 4.0888519287109375, 19.08856201171875, 13.495281219482422, 62.493133544921875, 9.75539779663086, 11.509500503540039, 38.95527648925781, -1.2872085571289062, 2.588916778564453, 18.360931396484375, 12.947227478027344, 5.567995071411133, 14.141853332519531, 26.882530212402344, 36.80158996582031, 12.521434783935547, 34.72380065917969, -1.5636043548583984, 9.806312561035156, 3.234182357788086, 14.877822875976562, 10.891551971435547, 21.44438934326172, 13.51078987121582, 15.026473999023438, -1.3356361389160156, -6.003997802734375, 37.067596435546875, 2.1766815185546875, 8.820404052734375, 28.385299682617188, 15.74506950378418, 5.713253021240234, -2.9216995239257812, -5.58160400390625, 37.47761535644531, 11.368331909179688, 0.4638824462890625, 35.45391845703125, 25.28785514831543, 6.9388885498046875, 1.626220703125, 19.984233856201172, 18.33271026611328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000142.npy"}
{"epoch": 0.4176470588235294, "step": 143, "batch_size": 128, "mean": 11.675070762634277, "std": 14.121016502380371, "min": -26.962234497070312, "p10": -4.037053871154785, "median": 10.244588851928711, "p90": 30.704155731201173, "max": 53.616912841796875, "pos_frac": 0.7890625, "sample": [23.099929809570312, 11.850616455078125, 4.644172668457031, 1.549224853515625, 5.674747467041016, 12.507514953613281, 4.256103515625, -1.4177169799804688, 8.640708923339844, -0.090850830078125, 32.30615234375, 22.186813354492188, 21.27142333984375, -10.382911682128906, 0.3842926025390625, 15.588432312011719, 1.629425048828125, -3.9152297973632812, 30.752098083496094, 30.683609008789062, 2.819042205810547, -8.686721801757812, 9.671295166015625, 6.1395263671875, 3.2428512573242188, 16.907066345214844, 7.725837707519531, 3.3445091247558594, 14.442306518554688, 7.857330322265625, 1.7685279846191406, -6.934356689453125, 36.193016052246094, -1.6017684936523438, 12.657142639160156, 6.293552398681641, 16.507831573486328, 6.469064712524414, 38.77298355102539, 18.813796997070312, 20.733177185058594, -1.28216552734375, -0.5945549011230469, 20.842437744140625, 6.9757080078125, -2.61279296875, 19.18634796142578, -4.888221740722656, 10.27304458618164, -6.3397216796875, 53.616912841796875, 24.961578369140625, 2.29345703125, 27.315841674804688, -1.9463043212890625, 22.669687271118164, 13.386474609375, -3.5938796997070312, 10.314773559570312, 9.678146362304688, -26.962234497070312, 10.311538696289062, 21.677806854248047, 12.752267837524414, 1.2889556884765625, -1.0125007629394531, 41.65477752685547, 18.092819213867188, 10.98516845703125, 22.573410034179688, 3.241485595703125, 22.13243865966797, 33.98699951171875, 4.7157440185546875, 15.363311767578125, 20.665145874023438, 24.639793395996094, -4.549413681030273, -24.6424560546875, 40.01160430908203, 5.758796691894531, 9.468605041503906, 7.768463134765625, 12.741424560546875, 7.420501708984375, 21.381759643554688, 7.845802307128906, 15.194351196289062, -5.019397735595703, 52.512451171875, 7.541721343994141, 18.21819305419922, -1.7991580963134766, 32.253631591796875, 13.301727294921875, 33.999664306640625, 12.519603729248047, -13.396942138671875, 24.192298889160156, 13.335983276367188, 6.7092437744140625, -4.321310043334961, 5.0921173095703125, 7.4066925048828125, 32.63728332519531, -3.5271568298339844, 1.369720458984375, 20.519989013671875, 9.099639892578125, 25.503250122070312, 15.01763916015625, 5.5207366943359375, 14.910064697265625, 3.510814666748047, 13.463310241699219, -5.583843231201172, -2.5184249877929688, 13.953166961669922, 30.54546356201172, 16.623517990112305, -1.628875732421875, 10.216133117675781, 25.24587059020996, -8.010894775390625, 49.7197265625, 24.078857421875, 11.929878234863281, 30.149017333984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000143.npy"}
{"epoch": 0.42058823529411765, "step": 144, "batch_size": 128, "mean": 11.589110374450684, "std": 12.144137382507324, "min": -19.754680633544922, "p10": -2.505950164794921, "median": 10.399658203125, "p90": 26.22706089019775, "max": 43.637489318847656, "pos_frac": 0.8515625, "sample": [26.39095115661621, 9.522796630859375, 10.406494140625, 2.625762939453125, -3.0477142333984375, 3.5214462280273438, 16.352813720703125, 43.637489318847656, 10.106983184814453, 17.367713928222656, 22.97125244140625, 26.156822204589844, 4.349248886108398, 4.386739730834961, 17.082542419433594, -8.2696533203125, -11.061134338378906, 3.6488265991210938, 12.790706634521484, 17.90416717529297, 9.997566223144531, 10.568431854248047, 3.3242111206054688, 13.016326904296875, 42.62986755371094, 16.352325439453125, -5.517473220825195, 0.12650299072265625, 21.060033798217773, 18.669458389282227, 17.14362335205078, -8.419002532958984, 27.040237426757812, 3.019317626953125, 41.00865936279297, 16.031478881835938, 5.656045913696289, 20.79778289794922, 6.548425674438477, -2.2737655639648438, 40.55802917480469, 9.682676315307617, 9.970081329345703, -5.2421875, -0.7068367004394531, 4.8148345947265625, 4.333881378173828, 10.216835021972656, 0.9939079284667969, 10.103057861328125, -3.2862396240234375, 4.740333557128906, 1.1635284423828125, 14.431222915649414, 3.7810592651367188, 2.3913192749023438, -19.754680633544922, 10.566314697265625, 9.177745819091797, 18.638092041015625, 25.232513427734375, 30.702987670898438, 13.795612335205078, 33.232948303222656, 13.225761413574219, 22.044631958007812, -13.825847625732422, 11.126115798950195, -0.21329498291015625, 14.02939224243164, -5.632303237915039, 41.068878173828125, 18.253280639648438, 25.080734252929688, 6.6323394775390625, 17.863739013671875, 9.009382247924805, 39.87969970703125, 15.046844482421875, 17.505653381347656, 23.959457397460938, 13.446907043457031, 23.760452270507812, 8.945571899414062, 3.3498611450195312, 2.514068603515625, 19.936241149902344, 4.579677581787109, 29.348182678222656, 10.392822265625, 12.550064086914062, 13.766311645507812, 18.0604248046875, 21.059829711914062, 19.720375061035156, 14.158912658691406, -0.3188743591308594, 22.33837890625, 9.790077209472656, 11.7459716796875, -7.340610504150391, 15.038917541503906, 8.635351181030273, 17.16004180908203, 21.901798248291016, 14.122535705566406, 26.587554931640625, 7.044685363769531, 7.5684814453125, 19.325361251831055, -1.7441482543945312, 8.305557250976562, 2.141063690185547, 17.498992919921875, 11.210430145263672, 9.4112548828125, 2.6892471313476562, -0.11456298828125, 9.076793670654297, 5.182165145874023, 25.069412231445312, -3.9195327758789062, 6.010047912597656, 19.87531280517578, 3.447298049926758, 30.560829162597656, -19.55389404296875, 0.8547210693359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000144.npy"}
{"epoch": 0.4235294117647059, "step": 145, "batch_size": 128, "mean": 15.807048797607422, "std": 16.773767471313477, "min": -22.227500915527344, "p10": -0.2883934020996093, "median": 11.03139877319336, "p90": 39.58317947387695, "max": 86.93121337890625, "pos_frac": 0.8828125, "sample": [16.38054656982422, 12.433929443359375, 44.595115661621094, 55.33320617675781, -4.50529670715332, 17.43022918701172, 24.565414428710938, 9.48324966430664, 21.595443725585938, 63.023895263671875, -0.8191413879394531, 26.02733612060547, 7.124767303466797, 3.8825607299804688, 10.901458740234375, 3.5810813903808594, 13.371917724609375, -0.9149150848388672, -12.437698364257812, 11.067939758300781, 39.608787536621094, 7.2468719482421875, 3.030548095703125, 0.4874763488769531, 9.97514533996582, 5.868995666503906, 8.705482482910156, 8.36810302734375, -5.977947235107422, 13.765012741088867, 29.051727294921875, 8.962287902832031, 7.14483642578125, 17.17066192626953, -0.34783935546875, 13.90850830078125, 10.124900817871094, 59.65415954589844, 24.5648193359375, 17.323707580566406, 2.303447723388672, 10.835494995117188, 25.368446350097656, 9.786865234375, 14.042373657226562, 5.019065856933594, 45.0184326171875, 35.32114028930664, 9.057254791259766, 39.57220458984375, -22.227500915527344, 25.33316421508789, 0.2707557678222656, 21.453231811523438, 45.05123519897461, 11.174966812133789, 2.2505245208740234, 4.875740051269531, 5.305110931396484, 32.42230224609375, 17.32708740234375, 7.5600128173828125, 21.415077209472656, -0.26291656494140625, 23.354522705078125, 5.8578033447265625, 24.09966278076172, 5.782463073730469, 3.8464431762695312, 10.785636901855469, 37.53450012207031, -1.2286453247070312, 27.967369079589844, 18.829513549804688, 4.237249374389648, 15.547386169433594, 10.075401306152344, 34.606201171875, -6.958404541015625, 20.025251388549805, 17.04357147216797, 27.580909729003906, 0.0104217529296875, -7.778886795043945, 34.85401153564453, 10.201498031616211, 7.097343444824219, 15.668800354003906, 14.457157135009766, 1.7233009338378906, 28.231353759765625, 40.55525207519531, 10.994857788085938, 39.99922180175781, 2.9784908294677734, 47.36524963378906, 15.719345092773438, -0.15842437744140625, 30.38854217529297, 3.7053604125976562, 0.7830047607421875, 2.6065216064453125, 3.519054412841797, 27.847213745117188, 7.0173187255859375, -10.388751983642578, 15.57794189453125, 26.99329376220703, 86.93121337890625, 41.504852294921875, 8.041709899902344, 21.306068420410156, 11.161636352539062, 30.330284118652344, 9.980606079101562, 25.59180450439453, 2.0250244140625, 32.088531494140625, 4.737083435058594, 4.993646621704102, 47.790313720703125, 18.74139404296875, -8.72292709350586, -1.2739791870117188, 1.1578826904296875, 36.880126953125, 27.871524810791016, 4.179414749145508], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000145.npy"}
{"epoch": 0.4264705882352941, "step": 146, "batch_size": 128, "mean": 13.946379661560059, "std": 13.879666328430176, "min": -35.15802001953125, "p10": -0.6496089935302729, "median": 11.681265830993652, "p90": 28.651256561279297, "max": 75.46248626708984, "pos_frac": 0.875, "sample": [9.568096160888672, 14.65350341796875, -0.296173095703125, 12.578880310058594, 6.405582427978516, 24.21237564086914, 49.087303161621094, 10.530128479003906, 28.09888458251953, 17.56713104248047, 21.427831649780273, 8.095451354980469, 13.571952819824219, -13.432670593261719, 15.590215682983398, 29.7449951171875, 24.00658416748047, 9.761764526367188, 19.346298217773438, -4.9726715087890625, -0.135711669921875, 12.145050048828125, -35.15802001953125, 15.4263916015625, 6.664941787719727, 20.15258026123047, -3.3722190856933594, 4.8690643310546875, 40.96856689453125, 6.209781646728516, 2.89202880859375, 14.513843536376953, 23.51336669921875, 8.47641372680664, -2.125030517578125, 27.313255310058594, 28.559463500976562, 4.778343200683594, 24.914968490600586, 8.378883361816406, 10.467788696289062, 26.00432586669922, 18.013980865478516, 28.339492797851562, 6.0827178955078125, -1.1691360473632812, 23.50688934326172, 2.9294815063476562, 11.604429244995117, 15.257522583007812, 16.013538360595703, 10.192047119140625, 3.515604019165039, -3.2269248962402344, -4.6362762451171875, 23.465927124023438, 24.441879272460938, 17.714210510253906, 24.502059936523438, 14.724784851074219, -3.1832427978515625, 11.279777526855469, 29.725875854492188, 3.1677284240722656, 17.68682098388672, 25.87224006652832, 10.972085952758789, 11.758102416992188, -0.488983154296875, 8.360321044921875, 4.2061920166015625, 4.3934478759765625, 8.178268432617188, 57.00874328613281, -11.229011535644531, 20.285995483398438, 1.207244873046875, 38.87889099121094, 18.30499267578125, 16.84049415588379, 2.0705699920654297, 15.393325805664062, 6.459205627441406, 11.18600845336914, 3.5770263671875, 30.328794479370117, 24.561508178710938, -5.012725830078125, 29.23248291015625, 11.05678939819336, 6.155582427978516, 12.0704345703125, 8.555273056030273, 9.486995697021484, 26.221481323242188, 7.620388031005859, 9.356704711914062, 8.107711791992188, 27.995697021484375, 8.387748718261719, 39.72880554199219, 14.448883056640625, 9.281105041503906, 21.248750686645508, 22.991744995117188, 75.46248626708984, 34.419273376464844, 12.49237060546875, 10.104812622070312, 8.877948760986328, -10.359966278076172, 8.893146514892578, 14.249244689941406, 17.310073852539062, 15.641464233398438, 7.695487976074219, 4.72645378112793, 21.77520179748535, 9.562744140625, 12.3385009765625, 45.728424072265625, 3.863067626953125, 7.685661315917969, 21.258987426757812, 28.865440368652344, 6.329891204833984, 23.22832489013672, -1.0244026184082031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000146.npy"}
{"epoch": 0.4294117647058823, "step": 147, "batch_size": 128, "mean": 11.176324844360352, "std": 14.672014236450195, "min": -23.581268310546875, "p10": -4.086846542358399, "median": 8.714184761047363, "p90": 29.633131790161126, "max": 69.67305755615234, "pos_frac": 0.7734375, "sample": [12.991756439208984, 4.858310699462891, -0.3912200927734375, 3.795461654663086, 9.862728118896484, 4.694194793701172, 21.490127563476562, 22.405807495117188, 4.907257080078125, 22.397857666015625, -6.577728271484375, 11.611328125, 7.616424560546875, -18.363304138183594, 14.804725646972656, -2.4395694732666016, 13.14621353149414, 16.807144165039062, 13.187698364257812, -19.393463134765625, 20.199317932128906, 2.055940628051758, 16.053253173828125, 4.761148452758789, 9.825752258300781, -10.506233215332031, 9.269929885864258, -4.054939270019531, -0.1109161376953125, 20.593666076660156, 69.67305755615234, 3.240692138671875, 24.81903839111328, 2.240793228149414, 15.085731506347656, 5.686798095703125, 3.700634002685547, -0.8310508728027344, 45.871002197265625, -1.0151519775390625, 1.4722137451171875, 5.108768463134766, -3.2643680572509766, 23.091598510742188, 6.444725036621094, 27.807960510253906, 18.95855712890625, 16.531864166259766, 14.692787170410156, 5.517539978027344, 4.66680908203125, 4.7518310546875, 22.055015563964844, 52.244239807128906, 3.0585861206054688, 30.917404174804688, 4.8217010498046875, 18.900711059570312, -3.574186325073242, 8.72589111328125, -6.920501708984375, 21.402530670166016, 17.579505920410156, -0.43886566162109375, -5.937767028808594, 27.771297454833984, 12.612510681152344, 27.036903381347656, -11.48974609375, 18.93517303466797, -4.506317138671875, 9.05743408203125, 3.4709320068359375, 29.08272933959961, 6.248723983764648, 14.821598052978516, 42.157859802246094, 18.181678771972656, 14.164138793945312, 24.54908561706543, 17.019622802734375, 37.75669860839844, -2.427732467651367, -4.46684455871582, 37.00152587890625, 17.184534072875977, 0.630035400390625, 8.257591247558594, 19.49140167236328, 1.8932781219482422, 13.383140563964844, 21.331634521484375, 6.981719970703125, 7.358041763305664, 17.605613708496094, 13.090156555175781, 1.7154083251953125, 26.14593505859375, -0.04052734375, 6.936820983886719, 2.5693187713623047, 39.49349594116211, -2.8008651733398438, 3.924457550048828, 35.80094909667969, 6.3946685791015625, 34.3323974609375, 8.702478408813477, 22.007884979248047, -0.11281585693359375, -4.161296844482422, 43.01982879638672, -23.581268310546875, -1.0394058227539062, 33.28068542480469, 13.250511169433594, 33.30031967163086, 9.736249923706055, -5.243366241455078, 10.253753662109375, 4.135599136352539, -15.844249725341797, 3.7123851776123047, -0.3853435516357422, 22.74658203125, 2.982513427734375, -2.228973388671875, 14.796260833740234], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000147.npy"}
{"epoch": 0.4323529411764706, "step": 148, "batch_size": 128, "mean": 10.601465225219727, "std": 14.660598754882812, "min": -28.814895629882812, "p10": -5.68075942993164, "median": 8.408435821533203, "p90": 30.427197265625, "max": 59.470947265625, "pos_frac": 0.78125, "sample": [8.335411071777344, 9.473348617553711, 24.326560974121094, -2.975099563598633, -8.258636474609375, 16.55884552001953, 8.869781494140625, 9.882644653320312, 6.213127136230469, 1.4036674499511719, 1.2430038452148438, 6.242271423339844, 26.983837127685547, 1.6250171661376953, 19.10540771484375, 13.836875915527344, -8.328163146972656, -12.666046142578125, 21.957481384277344, 11.545547485351562, 5.829643249511719, 8.481460571289062, -28.814895629882812, 30.414443969726562, 14.048458099365234, 29.776954650878906, 7.609928131103516, 11.263114929199219, 13.8089599609375, 39.23419189453125, 29.3309326171875, 29.64641571044922, 6.166404724121094, 9.293327331542969, 25.441925048828125, 1.50579833984375, 15.995590209960938, 12.125499725341797, 18.651466369628906, 35.691375732421875, 1.4576778411865234, -0.31240081787109375, 14.7919921875, -1.5973052978515625, 27.791152954101562, -5.368896484375, 10.605926513671875, 4.345455169677734, 14.485923767089844, 31.608856201171875, -2.1606674194335938, -4.696128845214844, -1.074392318725586, 2.888763427734375, 40.49560546875, 17.793228149414062, 12.73813247680664, 21.90212059020996, 17.03339385986328, -6.627468109130859, -0.7554416656494141, 1.9770736694335938, 37.31280517578125, -3.3561019897460938, 1.64990234375, -6.408439636230469, 19.672271728515625, 59.470947265625, 2.3400115966796875, 12.435546875, -0.7322845458984375, -9.672264099121094, 3.1189327239990234, 11.609626770019531, 30.456954956054688, 12.063301086425781, 19.63677978515625, 16.937171936035156, -7.50677490234375, 11.338386535644531, 10.489212036132812, 33.02113342285156, -8.046310424804688, 20.283248901367188, 26.43279266357422, 2.2499923706054688, -15.045467376708984, 27.42022705078125, 1.8615856170654297, -4.444034576416016, 10.193275451660156, 6.705493927001953, 6.407073974609375, 39.66162109375, 19.316795349121094, 12.290489196777344, 3.632781982421875, -18.369491577148438, 16.078628540039062, 5.2565155029296875, 3.17578125, 3.285024642944336, -3.8189315795898438, -12.743782043457031, 7.035331726074219, 10.714530944824219, 2.6075820922851562, 5.5876312255859375, 2.6738510131835938, 2.972705841064453, 29.933319091796875, -0.656707763671875, 7.265262603759766, 32.55872344970703, -9.873741149902344, -1.9243297576904297, 39.829803466796875, 1.6016731262207031, 17.331077575683594, 3.3904037475585938, 5.668912887573242, 57.76914978027344, 22.06682586669922, 3.3676280975341797, 32.719032287597656, 6.7107696533203125, -1.9829864501953125, 15.762184143066406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000148.npy"}
{"epoch": 0.43529411764705883, "step": 149, "batch_size": 128, "mean": 13.319561004638672, "std": 14.388172149658203, "min": -14.842824935913086, "p10": -2.619169235229492, "median": 10.968332290649414, "p90": 31.454669189453124, "max": 65.3084716796875, "pos_frac": 0.84375, "sample": [6.60972785949707, 28.00189208984375, 4.814453125, 15.027645111083984, 15.541717529296875, 13.978782653808594, 34.950706481933594, 23.835723876953125, -2.875152587890625, 11.088428497314453, 7.1177978515625, -10.68170166015625, 5.9093017578125, 7.587709426879883, -1.0801048278808594, 0.6152572631835938, 20.099586486816406, 11.159915924072266, 23.59298324584961, 3.195842742919922, 0.0103759765625, 6.7426300048828125, 24.903106689453125, 26.24140167236328, 50.20880126953125, 6.6913909912109375, 1.556509017944336, 31.18545913696289, 31.27392578125, -0.005615234375, -4.9830322265625, 1.2856216430664062, 2.9651718139648438, -8.185943603515625, 6.874528884887695, 35.33863067626953, 20.6695556640625, 8.266128540039062, 52.1685676574707, -2.703510284423828, 17.504119873046875, -2.729522705078125, 40.56559371948242, 1.0219802856445312, 1.3182220458984375, 5.5967254638671875, 25.164718627929688, 15.987037658691406, 2.3246841430664062, 17.567996978759766, 13.291763305664062, 18.409748077392578, -0.34348297119140625, 0.5543346405029297, 0.9442405700683594, -3.4571609497070312, 22.200597763061523, 13.559303283691406, 48.102203369140625, 31.87640380859375, 6.096826553344727, 24.795501708984375, 6.965892791748047, -2.9683170318603516, 13.210041046142578, 2.8645782470703125, 8.699392318725586, -1.78521728515625, 10.018863677978516, 0.753021240234375, -0.8145656585693359, 20.293167114257812, 19.09912872314453, 5.420143127441406, 20.49560546875, 15.526239395141602, 43.2908935546875, -0.17999267578125, 24.937240600585938, -14.842824935913086, 4.09454345703125, 0.011278152465820312, -9.224010467529297, 19.233200073242188, 26.99847412109375, 25.486366271972656, 9.508285522460938, 7.554317474365234, 18.770179748535156, 6.883167266845703, 7.597259521484375, 35.10230255126953, 31.200210571289062, 4.783214569091797, 43.295066833496094, 17.843441009521484, 12.036727905273438, 11.650161743164062, 9.073062896728516, 25.945846557617188, -3.8104095458984375, -2.5830230712890625, 10.312339782714844, 14.8460693359375, 37.8984375, 29.144302368164062, 10.848236083984375, 14.567626953125, 17.3533935546875, 41.407737731933594, 2.6342124938964844, -4.228878021240234, 2.8360939025878906, 1.8750038146972656, 65.3084716796875, 4.156425476074219, 16.11296844482422, 11.392669677734375, 25.140487670898438, 17.279582977294922, 0.8776283264160156, 30.309158325195312, -3.9598159790039062, 1.4490203857421875, 18.898300170898438, 13.90948486328125, 20.368934631347656, 22.38677978515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000149.npy"}
{"epoch": 0.43823529411764706, "step": 150, "batch_size": 128, "mean": 14.269369125366211, "std": 16.215646743774414, "min": -18.99347496032715, "p10": -2.663428115844726, "median": 11.7770357131958, "p90": 36.082884216308585, "max": 59.684669494628906, "pos_frac": 0.8515625, "sample": [0.8297271728515625, -13.166881561279297, -0.5821609497070312, 17.14198112487793, 9.45428466796875, -1.1268138885498047, 7.325813293457031, 25.66387939453125, -7.49822998046875, 17.59239959716797, 10.906570434570312, 14.790267944335938, 2.325174331665039, 31.079784393310547, 1.3755569458007812, 14.048038482666016, 15.07049560546875, 21.480571746826172, 27.705482482910156, 9.455986022949219, 33.401611328125, 29.159809112548828, 24.9693603515625, 15.015361785888672, 11.934173583984375, 23.117233276367188, 5.77252197265625, -4.280500411987305, 2.2688980102539062, 18.6409912109375, 11.549446105957031, 23.910110473632812, 0.42702674865722656, -18.99347496032715, 50.10765075683594, 31.95153045654297, 40.14527893066406, 28.95394515991211, 16.595619201660156, 45.467010498046875, 9.680351257324219, 10.604095458984375, 27.796478271484375, 2.9853687286376953, 3.8458175659179688, 10.007572174072266, 11.619897842407227, 18.640419006347656, 5.322059631347656, 2.971874237060547, 10.261699676513672, 5.867057800292969, 44.453330993652344, 14.540092468261719, -3.176494598388672, 58.412330627441406, 10.305889129638672, 41.31153106689453, 34.07707977294922, 0.01000213623046875, 0.290283203125, 24.358749389648438, 2.0945358276367188, 19.078575134277344, 9.348981857299805, 14.434814453125, 28.3974666595459, 38.34112548828125, 5.744422912597656, -14.679054260253906, 8.370635986328125, 46.554534912109375, 15.589763641357422, 16.489906311035156, 3.516254425048828, 3.2926177978515625, -0.9290618896484375, 18.862457275390625, -2.44354248046875, -1.3455543518066406, 6.63690185546875, -12.384414672851562, 20.34619903564453, -11.201553344726562, 49.001312255859375, 18.825767517089844, 35.11506652832031, 17.925277709960938, 48.113616943359375, 34.452491760253906, 25.062641143798828, 6.035383224487305, 2.044219970703125, 3.446025848388672, 4.891717910766602, 18.01910400390625, 45.65099334716797, -8.828178405761719, -14.783851623535156, 0.5200233459472656, 26.576988220214844, 18.16705322265625, -2.2887420654296875, 15.927902221679688, 5.7391815185546875, 51.72477722167969, -9.404037475585938, 6.0670928955078125, 6.8363189697265625, 12.578422546386719, 11.527229309082031, 59.684669494628906, 0.8578529357910156, 16.407791137695312, 7.346832275390625, 11.940200805664062, -6.5976409912109375, 15.634201049804688, 16.52740478515625, -11.375015258789062, 5.3283538818359375, 13.66015625, 27.524436950683594, 25.637710571289062, 1.794281005859375, 28.636627197265625, 29.480758666992188, 2.7597999572753906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000150.npy"}
{"epoch": 0.4411764705882353, "step": 151, "batch_size": 128, "mean": 13.793989181518555, "std": 17.014408111572266, "min": -13.117923736572266, "p10": -2.9987354278564453, "median": 10.107769012451172, "p90": 36.05855369567871, "max": 104.85818481445312, "pos_frac": 0.7890625, "sample": [7.985015869140625, 12.629409790039062, 10.573736190795898, 48.42877960205078, 19.61943817138672, 25.578704833984375, 20.246612548828125, 10.159042358398438, 7.062904357910156, 43.77716064453125, -6.7364654541015625, 28.843536376953125, 48.28303527832031, -2.1282997131347656, -2.9891624450683594, 0.6277313232421875, 2.7456016540527344, 1.4127349853515625, 10.656187057495117, 6.441089630126953, 9.97723388671875, -3.0210723876953125, 5.944408416748047, 1.7807769775390625, 24.725418090820312, 8.709224700927734, 53.847007751464844, 30.478729248046875, 5.456737518310547, 20.07159423828125, 39.382354736328125, 9.856847763061523, 28.213775634765625, 3.3728809356689453, 47.081512451171875, 19.100540161132812, -6.061836242675781, 14.047821044921875, -0.5138778686523438, 19.463298797607422, 21.67853546142578, 5.307960510253906, 10.056495666503906, 11.1419677734375, 12.380651473999023, 15.93600845336914, 28.6934814453125, 5.860942840576172, 11.94740104675293, 7.015338897705078, -2.2677459716796875, -1.909088134765625, 11.314537048339844, 46.31986999511719, 30.44891357421875, 16.51495361328125, -9.755989074707031, 4.884193420410156, 50.15968322753906, -0.7737236022949219, -2.6209716796875, -1.0208663940429688, 1.6250877380371094, 25.03095245361328, 12.880126953125, 29.683578491210938, 22.6961669921875, -2.9290733337402344, 18.268463134765625, 5.638677597045898, 35.80725860595703, 1.5299415588378906, 28.6397705078125, -3.1127853393554688, 13.694618225097656, 21.70006561279297, 6.875236511230469, 24.60704803466797, 15.466590881347656, 12.498397827148438, 36.6449089050293, 1.7470703125, 23.771282196044922, 50.74180603027344, 104.85818481445312, -6.875274658203125, 11.118721008300781, -13.117923736572266, 11.375991821289062, 4.9567718505859375, 9.640880584716797, 24.278701782226562, 3.6167373657226562, 9.864181518554688, 23.737937927246094, 0.3424568176269531, 7.795997619628906, 11.641342163085938, -2.2597808837890625, 1.7988777160644531, 7.5796966552734375, -5.581428527832031, 5.85845947265625, 4.2914886474609375, -5.355388641357422, -2.2459049224853516, 18.64703369140625, 29.97760009765625, -0.33324623107910156, -6.490692138671875, 12.562118530273438, 11.135169982910156, -4.56878662109375, 25.541664123535156, -4.575920104980469, 30.200891494750977, 10.012290954589844, 8.116371154785156, 10.378776550292969, 44.54705810546875, -4.122505187988281, 33.577659606933594, 2.2642669677734375, 43.02308654785156, 13.447845458984375, -0.07157516479492188, -2.0475425720214844, 7.140359878540039], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000151.npy"}
{"epoch": 0.4441176470588235, "step": 152, "batch_size": 128, "mean": 13.841017723083496, "std": 14.127635955810547, "min": -32.69441223144531, "p10": -3.5865367889404296, "median": 13.16490650177002, "p90": 31.73999099731445, "max": 50.11933898925781, "pos_frac": 0.859375, "sample": [32.325408935546875, -4.412710189819336, 5.607177734375, 33.07200241088867, -4.381891250610352, 15.794490814208984, 21.575668334960938, 41.507164001464844, 11.564201354980469, 6.324792861938477, 0.7861156463623047, 19.248626708984375, -9.683349609375, 13.46536636352539, -6.759666442871094, 11.694389343261719, 39.133262634277344, 9.366186141967773, 36.14051818847656, 4.7138824462890625, -14.999359130859375, 18.205978393554688, 10.437637329101562, 28.639663696289062, 9.904823303222656, -3.659881591796875, 7.07000732421875, 14.618362426757812, 16.638885498046875, 19.520944595336914, 12.614114761352539, 17.969497680664062, 1.3417816162109375, -12.541366577148438, 24.748031616210938, 2.365997314453125, 35.89304733276367, 4.7149810791015625, 9.052024841308594, 1.9944229125976562, 48.277198791503906, 19.813926696777344, -8.213468551635742, 23.678924560546875, 5.905509948730469, 17.70990753173828, -5.952461242675781, 18.62865447998047, 34.48027038574219, 16.81121826171875, 13.646697998046875, 46.15208435058594, -6.1589508056640625, 27.2032470703125, 29.25835418701172, 40.15815353393555, -7.425994873046875, 0.15326690673828125, 2.643247604370117, 3.350170135498047, 7.305171966552734, 27.657672882080078, -0.6771926879882812, 20.27239990234375, 0.17125511169433594, 19.974159240722656, 24.808067321777344, 4.9526824951171875, 6.589202880859375, 8.114501953125, 12.860885620117188, -0.4371795654296875, 8.394271850585938, 15.533607482910156, 7.288688659667969, 7.3260345458984375, 21.975467681884766, 2.3497314453125, 13.394683837890625, 5.8550567626953125, 1.7200546264648438, 8.914932250976562, 3.4161643981933594, -14.588699340820312, 23.302017211914062, 9.413856506347656, 26.594207763671875, 32.48370361328125, 31.489097595214844, 34.97724151611328, 22.426986694335938, -3.555103302001953, 6.75811767578125, 31.462753295898438, 22.44751739501953, 4.153785705566406, 15.461273193359375, 20.755203247070312, 30.32030487060547, 30.941818237304688, 18.27484130859375, 28.54407501220703, 14.584121704101562, 18.045310974121094, 21.143348693847656, 11.791885375976562, 50.11933898925781, 7.177982330322266, 24.52680206298828, 9.331855773925781, 8.90365219116211, 30.111160278320312, -32.69441223144531, 22.861083984375, -0.4693412780761719, 1.1362686157226562, 16.920089721679688, -0.8119850158691406, 4.751884460449219, 4.370351791381836, 12.935129165649414, 16.771617889404297, 26.62126922607422, 12.109390258789062, 18.767364501953125, 15.28270149230957, 29.90167236328125, 26.3072509765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000152.npy"}
{"epoch": 0.4470588235294118, "step": 153, "batch_size": 128, "mean": 13.821128845214844, "std": 12.777645111083984, "min": -39.75556945800781, "p10": 1.6821975708007815, "median": 12.94497299194336, "p90": 30.630123901367185, "max": 62.53706359863281, "pos_frac": 0.9296875, "sample": [21.306533813476562, 19.85681915283203, 5.1165924072265625, 19.290321350097656, 21.290512084960938, 17.630325317382812, 27.353439331054688, 5.198299407958984, 14.2659912109375, 18.918983459472656, 12.93316650390625, 0.7440414428710938, 62.53706359863281, 22.523845672607422, 14.642333984375, 12.945884704589844, 5.5556488037109375, 7.24315071105957, 16.736663818359375, 15.197305679321289, 28.554954528808594, 11.859184265136719, 3.913829803466797, 8.105621337890625, 3.805877685546875, 21.308822631835938, 7.59808349609375, 31.856292724609375, 8.416152954101562, 9.942832946777344, 1.2043609619140625, 2.905548095703125, 33.20257568359375, 39.984703063964844, 4.5589752197265625, 5.37324333190918, 22.071277618408203, -8.197982788085938, 5.4022064208984375, 16.46759033203125, 19.65365219116211, 2.381216049194336, 17.051362991333008, 6.6220550537109375, 26.045578002929688, 3.08538818359375, 7.520660400390625, 27.70425033569336, 11.366327285766602, -1.4524955749511719, 11.009620666503906, 5.892459869384766, 38.081809997558594, 27.446640014648438, 17.13121795654297, 0.7048797607421875, 4.9108123779296875, 3.3013992309570312, 19.1036376953125, -7.805030822753906, 1.716552734375, 17.675750732421875, 1.6020355224609375, 10.703405380249023, 9.004390716552734, -1.5934371948242188, 15.11984634399414, 10.393131256103516, -13.424652099609375, 2.228240966796875, 42.088714599609375, 3.1916542053222656, 24.318592071533203, 39.876426696777344, 20.53194236755371, 13.162811279296875, 3.5384979248046875, 9.453109741210938, 3.2884140014648438, 5.268016815185547, 32.53263854980469, 16.550254821777344, 4.532615661621094, 28.920330047607422, 5.6720733642578125, 3.2580947875976562, 11.803308486938477, 8.291419982910156, 22.613555908203125, 12.944061279296875, 14.749130249023438, 35.36798858642578, 16.956405639648438, 17.255599975585938, 34.84789276123047, 14.158500671386719, 13.139801025390625, 16.709632873535156, 31.555816650390625, 13.526775360107422, 2.371786117553711, 9.239151000976562, 17.963645935058594, 12.61172103881836, 6.091102600097656, 13.797204971313477, -0.5191879272460938, 7.232597351074219, 12.111930847167969, 35.26734924316406, 5.83074951171875, 2.252002716064453, -39.75556945800781, 18.90093994140625, 18.442176818847656, 11.4971923828125, -0.2828197479248047, 5.108997344970703, -0.1354522705078125, 23.046749114990234, 13.373605728149414, 25.621726989746094, 16.423877716064453, 20.110408782958984, 26.45294952392578, 30.2333984375, 19.503326416015625, 43.40718078613281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000153.npy"}
{"epoch": 0.45, "step": 154, "batch_size": 128, "mean": 12.828712463378906, "std": 14.879647254943848, "min": -22.880813598632812, "p10": -5.220618820190429, "median": 12.554229736328125, "p90": 30.226297760009764, "max": 63.56475830078125, "pos_frac": 0.796875, "sample": [27.614368438720703, 8.166851043701172, -22.880813598632812, 19.024940490722656, 18.867534637451172, 19.06624984741211, -16.073028564453125, -2.3567581176757812, 30.74236297607422, -5.9002532958984375, 24.581024169921875, -9.467903137207031, 13.085861206054688, 11.226367950439453, 3.8528671264648438, 15.763481140136719, 27.14666748046875, 5.567901611328125, 5.625396728515625, 11.673210144042969, 12.2257080078125, -0.1758575439453125, 31.879409790039062, 22.40716552734375, 1.5223369598388672, 14.105606079101562, 15.44970703125, 4.299644470214844, 18.214599609375, 16.157554626464844, 22.559181213378906, 47.484092712402344, -3.8364410400390625, 57.13328552246094, 14.934698104858398, 2.7405242919921875, 27.522048950195312, 4.3512725830078125, 30.005126953125, -12.814491271972656, 37.98035430908203, 17.01310920715332, 13.459991455078125, 20.030590057373047, 25.206295013427734, 19.247283935546875, 2.763216018676758, 15.461959838867188, 8.107460021972656, 14.349639892578125, -4.742061614990234, 37.03544616699219, 29.961463928222656, -7.621131896972656, 32.67723083496094, 25.092727661132812, -5.104240417480469, 3.224447250366211, 10.055595397949219, 8.393692016601562, 12.934837341308594, 13.201484680175781, 3.39483642578125, -9.423341751098633, 21.912643432617188, 43.33140563964844, -1.7364883422851562, -3.0819931030273438, 1.2919998168945312, 42.11009979248047, -7.82861328125, -5.492168426513672, -12.337562561035156, 27.334251403808594, 5.058326721191406, -12.71563720703125, 8.325347900390625, 44.57515335083008, 19.43292999267578, 8.715980529785156, 38.16241455078125, 32.71170425415039, 24.274551391601562, 24.326574325561523, 19.254074096679688, 14.902114868164062, 12.307884216308594, 5.914981842041016, -2.9897727966308594, 17.278762817382812, 28.507720947265625, 10.076988220214844, -3.0187759399414062, 7.220149993896484, -2.3179244995117188, -0.7554397583007812, 22.18780517578125, 5.551862716674805, 13.980255126953125, 4.816677093505859, 2.871185302734375, 4.176544189453125, 19.466644287109375, 28.368515014648438, 12.313247680664062, 9.239158630371094, 22.820045471191406, -0.9713649749755859, 13.582244873046875, -8.507030487060547, 23.68859100341797, 20.692058563232422, -7.920705795288086, 16.08112335205078, 5.5012054443359375, 15.506994247436523, 17.600290298461914, 63.56475830078125, 6.304176330566406, 24.3619384765625, 12.795211791992188, 25.249820709228516, 10.264095306396484, 4.4748687744140625, 7.6752166748046875, -1.465057373046875, 5.008356094360352, 3.852294921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000154.npy"}
{"epoch": 0.45294117647058824, "step": 155, "batch_size": 128, "mean": 11.822868347167969, "std": 12.368313789367676, "min": -19.9246826171875, "p10": -3.5357622146606436, "median": 10.944892883300781, "p90": 25.962247467041013, "max": 48.920257568359375, "pos_frac": 0.8515625, "sample": [0.07428741455078125, 7.627347946166992, 6.878776550292969, -6.2378387451171875, 5.071807861328125, 17.41411590576172, 4.815290451049805, 1.0640602111816406, -5.983184814453125, 18.12428092956543, 29.8121337890625, 23.51866912841797, 19.136581420898438, -18.7327880859375, 13.443973541259766, 1.9176216125488281, 20.341590881347656, -4.361259460449219, 18.994422912597656, 14.689460754394531, 48.920257568359375, 12.623729705810547, 19.017784118652344, 2.5412139892578125, 24.20368194580078, -6.0258331298828125, 21.176109313964844, -3.2439327239990234, 7.662647247314453, 37.33308410644531, 8.413476943969727, 16.951736450195312, 26.535552978515625, 18.631423950195312, 33.872520446777344, 21.986068725585938, 8.702526092529297, 23.385879516601562, 6.931480407714844, -7.816107749938965, 2.2117156982421875, -1.2472610473632812, 23.25830078125, 15.97689437866211, 6.060871124267578, 0.4190025329589844, -5.009517669677734, 33.89129638671875, 1.5983543395996094, 5.92784309387207, 7.383508682250977, 42.684898376464844, 17.629920959472656, 23.006752014160156, 24.039344787597656, 6.136726379394531, 19.404922485351562, 41.5247802734375, 13.126983642578125, 24.293174743652344, 0.035579681396484375, 4.381887435913086, 10.271163940429688, 20.166534423828125, -8.064899444580078, 8.4237060546875, 6.883453369140625, 9.632293701171875, 24.556053161621094, 35.56058120727539, 23.21483612060547, 4.927761077880859, 2.258363723754883, 17.695274353027344, 2.8040618896484375, 26.85805892944336, 21.203800201416016, 2.98797607421875, 25.852890014648438, -1.4583663940429688, 8.856773376464844, 1.4579524993896484, 26.21741485595703, 11.618621826171875, 17.780548095703125, 7.182147979736328, 3.897777557373047, -1.6666908264160156, 19.137630462646484, 5.26666259765625, 19.610519409179688, 8.68604850769043, 12.167716979980469, 6.1323394775390625, -11.03057861328125, -0.5780563354492188, 8.966936111450195, 14.01007080078125, -6.36865234375, 25.614864349365234, 34.12498474121094, 7.239391326904297, 2.650676727294922, 4.483116149902344, 6.993377685546875, 15.696807861328125, 30.1236572265625, 14.315437316894531, 14.680843353271484, 17.605857849121094, -9.998291015625, 15.8150634765625, 7.864776611328125, 14.748992919921875, 21.720264434814453, 12.634990692138672, 8.862419128417969, 19.94489288330078, 25.08277130126953, -1.977874755859375, -19.9246826171875, 13.731109619140625, 17.615478515625, 0.6785087585449219, -4.216697692871094, 16.87738800048828, 6.493419647216797, 12.57832145690918], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000155.npy"}
{"epoch": 0.45588235294117646, "step": 156, "batch_size": 128, "mean": 15.738296508789062, "std": 15.36113166809082, "min": -16.374881744384766, "p10": -2.1963733673095702, "median": 13.242084503173828, "p90": 39.05028457641602, "max": 55.89300537109375, "pos_frac": 0.8671875, "sample": [7.536628723144531, 45.48480224609375, 22.984458923339844, -14.966751098632812, -2.400411605834961, 11.330276489257812, 10.151512145996094, 15.575721740722656, 10.705879211425781, 14.024866104125977, 10.13311767578125, 44.475555419921875, 17.910491943359375, 29.40265655517578, 44.677276611328125, 12.526378631591797, 8.340961456298828, 15.210372924804688, 10.40045166015625, -3.6819610595703125, 3.8843154907226562, 38.380584716796875, -5.90838623046875, 41.554649353027344, 13.961570739746094, 35.61827850341797, 0.051395416259765625, 41.77136993408203, 9.796676635742188, 9.136772155761719, -0.6807212829589844, 34.84954833984375, 39.02052307128906, 18.162551879882812, 36.97822570800781, 11.919166564941406, 23.86016082763672, 25.929550170898438, 0.6973686218261719, 3.3562774658203125, 2.8592185974121094, 8.860939025878906, 3.4332923889160156, 11.82928466796875, 27.1160888671875, 29.44269561767578, 3.536773681640625, 8.254966735839844, -1.817911148071289, 6.0078125, 31.026363372802734, 15.391754150390625, 45.14263916015625, 27.097183227539062, 6.739971160888672, 0.4410362243652344, 20.63085174560547, 18.58171844482422, 15.844680786132812, -5.786827087402344, 32.855064392089844, 24.694578170776367, 6.796180725097656, 33.9319953918457, 22.289588928222656, 55.89300537109375, 25.416488647460938, 8.8736572265625, 13.345695495605469, 1.109649658203125, 8.427207946777344, 5.425500869750977, -14.65205192565918, 10.463043212890625, 20.55615997314453, 46.8701171875, 24.1988525390625, 3.4539642333984375, 12.584114074707031, 17.177230834960938, 0.4662933349609375, 18.89007568359375, 13.304222106933594, 13.233001708984375, 9.075393676757812, 34.166927337646484, 13.251167297363281, 18.2152099609375, -11.803756713867188, 19.53418731689453, 10.651281356811523, -6.636203765869141, 11.629060745239258, -4.25318717956543, -4.797679901123047, 18.54627227783203, 22.06964683532715, 17.56774139404297, -16.374881744384766, 46.12763977050781, 49.240142822265625, 41.524803161621094, 35.744651794433594, 21.467193603515625, 5.104133605957031, 2.0237388610839844, 17.659286499023438, 12.884307861328125, 16.78223419189453, 11.122207641601562, 5.668750762939453, 20.537994384765625, 39.119728088378906, 10.078895568847656, -4.120145797729492, 2.458759307861328, -1.251871109008789, 23.216705322265625, -2.1156272888183594, 52.976776123046875, 24.262046813964844, 1.0534114837646484, -2.3847808837890625, 17.666915893554688, 14.166976928710938, 13.225074768066406, 6.2010498046875, 30.821475982666016], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000156.npy"}
{"epoch": 0.4588235294117647, "step": 157, "batch_size": 128, "mean": 12.120649337768555, "std": 14.114447593688965, "min": -11.777034759521484, "p10": -2.790998840332031, "median": 8.947784423828125, "p90": 30.209304809570305, "max": 60.90007781982422, "pos_frac": 0.796875, "sample": [28.518096923828125, -0.0328521728515625, -5.524574279785156, 13.05389404296875, -1.9027290344238281, 20.64422607421875, 11.224517822265625, 8.772232055664062, 20.31597137451172, 3.350921630859375, 16.87249755859375, 7.257041931152344, 6.546560287475586, 28.529220581054688, -11.777034759521484, -4.912908554077148, -4.566741943359375, 29.640625, -2.7614593505859375, -1.5795860290527344, -1.3681049346923828, 3.3183441162109375, -3.350008010864258, 12.811294555664062, 60.90007781982422, 3.960817337036133, 49.669219970703125, 19.158199310302734, 14.194103240966797, 4.638345718383789, -3.9207420349121094, 27.955902099609375, 29.491188049316406, 0.40682220458984375, -5.738742828369141, -0.7574462890625, 11.003662109375, 14.909774780273438, 1.7573089599609375, 1.72406005859375, 13.451522827148438, 29.132125854492188, 18.201997756958008, 18.400665283203125, 14.152280807495117, 29.466110229492188, 16.91802215576172, 10.83984375, -3.267181396484375, 3.3826217651367188, 3.352741241455078, 12.116943359375, 15.595115661621094, 13.185447692871094, 0.22231292724609375, 16.323516845703125, 16.27361297607422, 11.08616828918457, 0.7327423095703125, 13.910392761230469, 4.93609619140625, 27.680877685546875, 13.705291748046875, 54.95811462402344, 42.3709716796875, -0.26519775390625, 0.8923511505126953, -3.2444095611572266, 7.5137176513671875, 39.45838928222656, 3.3254241943359375, 17.109039306640625, -3.5496444702148438, 14.60097885131836, 29.491867065429688, 32.882843017578125, 8.793106079101562, -0.6948089599609375, 1.4666595458984375, 8.514991760253906, 31.536224365234375, 27.432876586914062, 12.541267395019531, 1.1589241027832031, 20.170310974121094, 37.03837966918945, 41.2388916015625, 13.702674865722656, 5.6840057373046875, 4.520851135253906, 19.581974029541016, -2.5385303497314453, 9.965347290039062, 8.057884216308594, 22.02129364013672, 9.102462768554688, 6.510011672973633, -2.6414871215820312, 6.584383010864258, -0.21148681640625, 14.707069396972656, 10.463371276855469, 6.982521057128906, -9.6209716796875, -6.5205230712890625, -2.85992431640625, -0.75762939453125, 7.407674789428711, 4.2963104248046875, 14.329231262207031, 23.87604522705078, 3.0917434692382812, 22.108123779296875, 44.32611083984375, 50.164772033691406, 34.83477783203125, 5.4171905517578125, 4.154518127441406, 8.478775024414062, 18.477272033691406, 4.885219573974609, 1.8980178833007812, 8.128501892089844, -0.606292724609375, 13.28775405883789, 0.6476802825927734, 32.273216247558594, 10.260581970214844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000157.npy"}
{"epoch": 0.46176470588235297, "step": 158, "batch_size": 128, "mean": 12.997393608093262, "std": 14.2236967086792, "min": -13.333724975585938, "p10": -1.9685195922851562, "median": 10.925504684448242, "p90": 35.539984130859374, "max": 61.43830871582031, "pos_frac": 0.8359375, "sample": [20.6044921875, 12.261970520019531, 9.75146484375, 0.9061317443847656, -1.066986083984375, 4.9211883544921875, 7.688743591308594, 8.120826721191406, 7.392539978027344, 21.019054412841797, 13.973085403442383, 14.109794616699219, 11.922119140625, 22.70281982421875, 11.539047241210938, 2.289215087890625, 9.541069030761719, 3.2812976837158203, 12.895683288574219, 42.58616638183594, -2.99420166015625, 14.507354736328125, 22.16431427001953, 3.080066680908203, -9.38143539428711, 16.0762939453125, 2.3243865966796875, 13.643871307373047, 32.56900405883789, 11.53802490234375, 12.415199279785156, 4.107189178466797, 10.583984375, -1.6416149139404297, 13.664031982421875, -6.958707809448242, -13.207710266113281, 7.931629180908203, 17.11944580078125, 26.0946044921875, 15.861724853515625, 16.652633666992188, 20.93548583984375, 2.600717544555664, 29.659461975097656, -4.198036193847656, 27.13205337524414, -1.74346923828125, 12.186004638671875, 14.286956787109375, 46.4052734375, 9.129068374633789, 17.55268096923828, 10.421089172363281, -13.333724975585938, -1.935638427734375, 20.608722686767578, 1.9318618774414062, 18.717559814453125, 22.796310424804688, -2.0452423095703125, 12.999282836914062, 2.676393508911133, 1.2903995513916016, 5.16790771484375, -2.1199283599853516, 22.43498992919922, 13.44781494140625, 3.0738067626953125, 35.4635009765625, 12.036476135253906, 8.072967529296875, 36.23936462402344, 13.49410629272461, -0.05307960510253906, 8.116897583007812, 3.5401458740234375, 10.977497100830078, 50.86449432373047, 7.1239471435546875, 6.451751708984375, 22.625755310058594, 14.291450500488281, 34.64641571044922, 15.042098999023438, -0.9151172637939453, 35.71844482421875, -3.7425460815429688, 47.20097351074219, 11.51296615600586, 48.121185302734375, 5.76824951171875, 1.6885452270507812, 6.6819610595703125, 36.807586669921875, 3.8911514282226562, 25.55328369140625, 18.27983856201172, 10.873512268066406, 4.373016357421875, 3.2283477783203125, -5.352043151855469, 1.1094627380371094, 36.474273681640625, 36.545570373535156, 10.827587127685547, 12.560344696044922, 61.43830871582031, 7.975898742675781, 47.80388641357422, 1.8535194396972656, 21.307289123535156, 45.457855224609375, 21.250442504882812, -2.0608673095703125, 19.72496795654297, 16.551435470581055, -1.35614013671875, 4.692176818847656, -3.707286834716797, 4.127510070800781, 7.0252227783203125, -4.039529800415039, -1.1144180297851562, 21.7429141998291, 0.5829391479492188, 6.847652435302734, 28.75457763671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000158.npy"}
{"epoch": 0.4647058823529412, "step": 159, "batch_size": 128, "mean": 15.3021240234375, "std": 14.925435066223145, "min": -16.511398315429688, "p10": -1.0880868911743162, "median": 13.281045913696289, "p90": 34.90443572998047, "max": 85.58522033691406, "pos_frac": 0.8671875, "sample": [27.252227783203125, 10.579132080078125, 11.579662322998047, 16.83112335205078, 38.34246826171875, -4.9549560546875, 19.004684448242188, 8.596115112304688, 11.664535522460938, 13.604850769042969, 13.878463745117188, 12.776222229003906, 2.248077392578125, 14.121635437011719, 10.970623016357422, 17.200515747070312, -5.619293212890625, 24.293533325195312, 22.783172607421875, 0.5138702392578125, 29.3352108001709, 5.168998718261719, 11.781227111816406, 21.84375762939453, 34.89154052734375, 3.1661911010742188, 11.476163864135742, 24.241905212402344, 20.102418899536133, 18.365196228027344, 9.552360534667969, 13.869895935058594, -3.8983726501464844, 12.512031555175781, 85.58522033691406, 3.118633270263672, 12.956584930419922, -4.119964599609375, 17.143272399902344, 23.479202270507812, 2.264965057373047, 33.14380645751953, -6.890590667724609, 10.142242431640625, 29.38421630859375, 4.2367095947265625, 35.260589599609375, 2.151641845703125, 18.390533447265625, 29.4774169921875, -1.0585994720458984, 20.474475860595703, 26.6346435546875, 28.538223266601562, 35.3221435546875, -1.477783203125, 10.725440979003906, 7.830833435058594, 47.42144775390625, 8.688941955566406, 5.6672515869140625, 10.269340515136719, 10.775630950927734, 17.103408813476562, 2.5276222229003906, -8.663360595703125, 49.332427978515625, 19.755050659179688, 15.536121368408203, 16.99710464477539, 7.073814392089844, 11.607671737670898, -1.0438175201416016, 18.543445587158203, 13.842605590820312, 21.313247680664062, 6.810657501220703, 20.06336212158203, 0.0242462158203125, 34.93452453613281, 20.830615997314453, 47.92982482910156, 2.791584014892578, 33.72875213623047, 42.58422088623047, -1.156890869140625, -16.511398315429688, 37.36163330078125, -7.006591796875, -2.893503189086914, -0.16455459594726562, 5.689430236816406, 8.597946166992188, 16.66217041015625, 24.57322883605957, 10.718368530273438, 3.97674560546875, 48.87590026855469, 3.5300045013427734, 18.954681396484375, 11.984565734863281, 31.154159545898438, 54.72810363769531, 11.444267272949219, 17.223617553710938, 39.150634765625, -1.7361698150634766, 0.8159255981445312, 23.616931915283203, 10.08856201171875, 3.492523193359375, 18.441329956054688, 22.992507934570312, 9.260478973388672, 6.2524871826171875, 28.01724624633789, 22.764625549316406, 17.356088638305664, -0.39305877685546875, 14.199050903320312, 16.131315231323242, 6.0897064208984375, 13.422798156738281, 24.475128173828125, -12.38033676147461, 20.271564483642578, 8.22662353515625, 13.139293670654297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000159.npy"}
{"epoch": 0.4676470588235294, "step": 160, "batch_size": 128, "mean": 14.821147918701172, "std": 12.427233695983887, "min": -11.499889373779297, "p10": 0.9764465332031256, "median": 12.329536437988281, "p90": 32.024091720581055, "max": 50.88189697265625, "pos_frac": 0.9296875, "sample": [16.148818969726562, 18.081899642944336, 8.724002838134766, 12.950130462646484, 32.73564147949219, 14.898811340332031, 34.999114990234375, 18.700485229492188, 17.617767333984375, 19.448869705200195, 31.770484924316406, 25.650203704833984, 10.87432861328125, 26.113876342773438, 5.781543731689453, 5.433406829833984, 9.417152404785156, 18.113990783691406, -4.267799377441406, 5.077110290527344, 24.63581085205078, 11.142642974853516, -2.1966476440429688, 26.737022399902344, 22.26523208618164, 41.94073486328125, 50.88189697265625, 32.615840911865234, 8.806556701660156, 19.43407440185547, 5.041084289550781, 4.766332626342773, 28.282421112060547, 12.85694694519043, 2.6563282012939453, 0.5161762237548828, 13.351539611816406, 12.488754272460938, 5.63165283203125, 19.17523193359375, 8.61944580078125, 3.237241744995117, 12.912025451660156, 29.28229522705078, 4.778266906738281, 42.0970458984375, 5.143230438232422, 23.248836517333984, 40.70521545410156, 40.907047271728516, 21.056442260742188, 8.321868896484375, 27.749488830566406, 6.419233322143555, 43.80509948730469, 24.23815155029297, -11.499889373779297, 6.62748908996582, 8.721611022949219, 19.568363189697266, 27.204833984375, 5.180011749267578, 28.737537384033203, 6.047203063964844, 6.947528839111328, 29.801071166992188, 35.967987060546875, 4.651092529296875, 20.45665168762207, 7.364555358886719, 32.70616912841797, 7.8076324462890625, 20.950164794921875, 4.8468475341796875, -1.4411163330078125, 1.7332763671875, 5.5985260009765625, 27.20294761657715, 7.338212966918945, 7.469085693359375, -5.228309631347656, 16.883331298828125, 30.760986328125, 12.145402908325195, 7.340423583984375, 12.170318603515625, 30.817733764648438, 11.529083251953125, 5.593841552734375, 4.0413818359375, 8.304569244384766, 8.679344177246094, 6.726997375488281, 29.310409545898438, 7.184364318847656, 21.623756408691406, 47.699798583984375, 6.302463531494141, 29.842979431152344, 18.835128784179688, 1.132598876953125, 12.59957504272461, 21.97840118408203, 17.095169067382812, 14.617534637451172, 17.446258544921875, 11.577682495117188, 15.129592895507812, 1.1376953125, 17.13287353515625, -1.1391105651855469, 7.780921936035156, 6.9408416748046875, 0.13584518432617188, 0.4394111633300781, 0.612091064453125, 33.31257629394531, 4.640998840332031, 20.556106567382812, 10.999267578125, 22.618728637695312, -1.1695289611816406, 9.80108642578125, -1.8489532470703125, 14.578170776367188, 12.614950180053711, 3.0469818115234375, -9.03289794921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000160.npy"}
{"epoch": 0.47058823529411764, "step": 161, "batch_size": 128, "mean": 14.066232681274414, "std": 14.643601417541504, "min": -14.47421646118164, "p10": -4.440991210937499, "median": 13.267683029174805, "p90": 33.43673248291015, "max": 51.376007080078125, "pos_frac": 0.8203125, "sample": [32.727386474609375, 15.218355178833008, 13.892013549804688, 14.133949279785156, -6.705686569213867, 6.575283050537109, 23.681716918945312, -0.1196441650390625, 23.265411376953125, 26.654956817626953, 38.08245849609375, -0.204315185546875, 21.35064697265625, 3.4726028442382812, 1.2593994140625, 10.534568786621094, 0.4278984069824219, 17.514175415039062, -4.084709167480469, 30.917627334594727, 5.344261169433594, -10.187463760375977, 25.795806884765625, 17.77634048461914, 14.420608520507812, 17.73366928100586, 30.431793212890625, 35.84638977050781, 6.5022735595703125, -6.462928771972656, 5.864532470703125, 16.26022720336914, 12.76153564453125, 20.079971313476562, 30.558860778808594, 2.1966400146484375, -11.156890869140625, -5.363594055175781, -7.9797515869140625, 24.464332580566406, 22.26479721069336, 16.79389190673828, 17.567062377929688, -11.747833251953125, 2.883514404296875, 9.963014602661133, 21.910690307617188, 28.226404190063477, 3.9132080078125, -0.08637809753417969, 7.21925163269043, 11.641902923583984, -0.33687591552734375, 25.22844696044922, 10.068267822265625, -14.47421646118164, 27.22747039794922, 30.056365966796875, 3.177827835083008, 51.376007080078125, 6.36175537109375, 29.657928466796875, 15.143798828125, -10.911239624023438, 14.66851806640625, 35.09187316894531, 12.799423217773438, 39.616668701171875, 8.874967575073242, -8.566511154174805, 24.29021453857422, 25.31085205078125, 23.966583251953125, 14.341392517089844, 4.934059143066406, 4.0980072021484375, 37.667755126953125, 25.65753173828125, -1.5637588500976562, 32.7027587890625, 1.9311027526855469, 8.754093170166016, 11.482887268066406, 13.003555297851562, 4.511531829833984, 23.68337059020996, -5.272315979003906, 30.496246337890625, 13.531810760498047, 3.554656982421875, 18.202476501464844, 41.714752197265625, 15.191268920898438, 9.065731048583984, 14.955680847167969, 14.65625, 50.87041473388672, -1.061422348022461, -2.003023147583008, 8.712120056152344, 39.51923370361328, 38.02153015136719, 9.846900939941406, 22.242034912109375, -2.89837646484375, 10.518035888671875, 20.69677734375, 30.26352882385254, -7.3645477294921875, 27.251739501953125, 7.0751953125, 10.412328720092773, 5.824180603027344, 28.144927978515625, 51.34801483154297, 5.42169189453125, 20.75644302368164, 15.238275527954102, 6.4229736328125, -12.098569869995117, 7.142662048339844, 20.797264099121094, 3.9691848754882812, 1.0509796142578125, 37.919586181640625, 0.9349212646484375, 39.2509765625, -3.7073822021484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000161.npy"}
{"epoch": 0.47352941176470587, "step": 162, "batch_size": 128, "mean": 14.570540428161621, "std": 16.52007484436035, "min": -26.245071411132812, "p10": -3.195614242553711, "median": 12.40934944152832, "p90": 36.716417694091795, "max": 63.24525451660156, "pos_frac": 0.8359375, "sample": [15.345001220703125, 22.056360244750977, 12.165771484375, 4.33123779296875, 26.446266174316406, -19.569915771484375, 26.94310760498047, 20.110198974609375, 10.317272186279297, 12.39266586303711, 23.976818084716797, 42.28025817871094, -2.439006805419922, 4.824357986450195, 29.7659912109375, 16.812488555908203, 12.64666748046875, 14.484809875488281, 4.760353088378906, 3.644500732421875, -7.7059478759765625, 10.680904388427734, 46.46649932861328, 49.131004333496094, 3.212066650390625, -5.6280975341796875, 36.650657653808594, 36.86985778808594, 1.0274200439453125, 20.364070892333984, -5.199794769287109, 24.01174545288086, 4.158653259277344, -3.657073974609375, 21.061721801757812, 5.399078369140625, 5.18463134765625, 25.45635986328125, -3.2249794006347656, 20.3756103515625, 6.709987640380859, 37.41307067871094, 20.1953067779541, -1.87030029296875, 13.53558349609375, 3.9544525146484375, 11.794593811035156, 19.235702514648438, 2.7265548706054688, 13.657150268554688, 14.520130157470703, 59.57008361816406, 9.037445068359375, 33.79475402832031, 12.694992065429688, 25.432086944580078, 2.385364532470703, 17.114471435546875, 9.29742431640625, 28.984882354736328, 10.474929809570312, -8.093439102172852, 5.806854248046875, 6.566965103149414, 4.895751953125, 16.69814682006836, 6.764808654785156, 28.585660934448242, 7.02301025390625, 32.20538330078125, 46.93113708496094, 10.571781158447266, -25.525299072265625, 7.4203033447265625, 6.682281494140625, 40.487979888916016, 17.007972717285156, 18.60968017578125, -8.928703308105469, 22.103382110595703, 22.1593017578125, 11.63992691040039, 14.472869873046875, 19.948989868164062, 10.904762268066406, 12.426033020019531, 17.189231872558594, 18.169612884521484, 9.318618774414062, 16.16980743408203, 23.619659423828125, 9.27801513671875, 15.014030456542969, -0.5212440490722656, -3.1830291748046875, 3.684661865234375, 21.61792755126953, 22.01232147216797, 63.24525451660156, -3.128631591796875, 51.07972717285156, 10.742368698120117, 7.5774078369140625, 62.88580322265625, 27.244483947753906, 41.47479248046875, 28.29364013671875, -24.017669677734375, 9.346153259277344, 36.51072692871094, 36.366111755371094, 17.370849609375, 7.120944976806641, -5.08056640625, 8.373619079589844, 4.5018157958984375, 10.700325012207031, -3.0550613403320312, -2.8647117614746094, -26.245071411132812, 16.33887481689453, -1.3848381042480469, 4.067502975463867, 48.47120666503906, 19.64856719970703, 10.183174133300781, -6.274688720703125, 19.21351432800293], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000162.npy"}
{"epoch": 0.4764705882352941, "step": 163, "batch_size": 128, "mean": 13.468847274780273, "std": 13.000287055969238, "min": -18.252037048339844, "p10": -1.9220451354980466, "median": 12.35400676727295, "p90": 29.842824554443357, "max": 61.185142517089844, "pos_frac": 0.875, "sample": [-2.7728443145751953, -2.9423446655273438, 23.329208374023438, -2.3228111267089844, 16.60011863708496, 48.834388732910156, 12.806739807128906, -5.762916564941406, 33.06952667236328, 11.590599060058594, -0.33062744140625, 6.177818298339844, 9.308334350585938, 23.823402404785156, 46.02151870727539, 43.15669250488281, 13.6165771484375, -4.649192810058594, 20.907066345214844, 11.588064193725586, 34.212005615234375, 17.889537811279297, 24.90686798095703, 12.357503890991211, 0.02911376953125, 11.697532653808594, 13.749618530273438, -12.361358642578125, 3.9436569213867188, 61.185142517089844, 25.280166625976562, 0.8979263305664062, 12.725685119628906, 33.760833740234375, 10.945999145507812, 19.93170166015625, 29.945358276367188, 11.387081146240234, -2.4507675170898438, 10.939796447753906, 8.551342010498047, 6.762277603149414, 31.903724670410156, 30.607269287109375, 16.217300415039062, 6.618677139282227, 17.916624069213867, 22.418502807617188, 11.239383697509766, 10.7464599609375, 22.125885009765625, -7.059684753417969, 16.639389038085938, 6.141336441040039, 15.138687133789062, 12.605264663696289, 14.757835388183594, 39.5035514831543, 15.685501098632812, 27.171836853027344, 12.984092712402344, -0.1896514892578125, 12.576488494873047, 26.92371368408203, 7.119712829589844, 1.683868408203125, 4.9078216552734375, 29.749469757080078, 10.820831298828125, -5.476104736328125, 0.26279449462890625, 15.052604675292969, 8.528997421264648, 7.153482437133789, 15.722042083740234, 23.927337646484375, 6.6084747314453125, 10.409187316894531, 20.627912521362305, 16.859664916992188, 29.79888153076172, 2.6470794677734375, 10.559707641601562, -3.37103271484375, 1.0081024169921875, 4.280769348144531, -2.1331100463867188, 15.1536865234375, 10.066734313964844, 14.169445037841797, 6.3336181640625, 27.975997924804688, 12.792278289794922, 11.27703857421875, 29.355636596679688, 14.634010314941406, 19.74077606201172, 2.40814208984375, 17.678932189941406, 15.608566284179688, 43.657318115234375, -18.252037048339844, -10.31352424621582, 3.634653091430664, 18.074554443359375, 15.487083435058594, 18.9637451171875, 20.477760314941406, 3.815145492553711, 4.060548782348633, 12.350509643554688, 21.331050872802734, 3.483428955078125, 7.1668548583984375, 7.2850189208984375, 9.12689208984375, 24.466384887695312, 2.8774642944335938, 8.780387878417969, 14.882339477539062, 11.513008117675781, 13.713516235351562, 0.16170501708984375, -1.8315887451171875, 43.33521270751953, 24.434768676757812, 0.7044792175292969, 5.6738739013671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000163.npy"}
{"epoch": 0.47941176470588237, "step": 164, "batch_size": 128, "mean": 15.449541091918945, "std": 14.520604133605957, "min": -16.10296630859375, "p10": 1.1672969818115235, "median": 12.564384460449219, "p90": 34.82481536865234, "max": 62.32630920410156, "pos_frac": 0.921875, "sample": [21.08587646484375, 5.251808166503906, 34.18474578857422, 32.064056396484375, 3.3900299072265625, 32.43287658691406, -8.0750732421875, 9.4036865234375, -4.673248291015625, 34.35591125488281, 16.589141845703125, 45.859275817871094, -7.644557952880859, 21.799545288085938, 13.799453735351562, 41.162837982177734, 23.680160522460938, 22.304672241210938, 19.266735076904297, 14.183269500732422, 11.172882080078125, 9.288558959960938, 6.211334228515625, 20.36199951171875, 48.801483154296875, 7.7631683349609375, 11.234588623046875, 5.809883117675781, 53.64241027832031, 4.299705505371094, 27.540924072265625, 5.194002151489258, 21.314163208007812, 21.910484313964844, 20.580230712890625, 9.805099487304688, 16.68707275390625, 12.069427490234375, 22.524139404296875, 5.222694396972656, 5.755720138549805, 10.216827392578125, 7.865272521972656, -2.6859989166259766, 30.392547607421875, 12.845901489257812, 17.934593200683594, -3.881072998046875, 10.108978271484375, 1.0766220092773438, 10.146129608154297, 17.10614013671875, 21.72662353515625, 15.817375183105469, 5.932884216308594, 32.91827392578125, 10.010276794433594, 42.13855743408203, 19.078060150146484, 12.282867431640625, 2.6243247985839844, 13.843757629394531, 9.777114868164062, -16.10296630859375, 21.530345916748047, 6.24114990234375, 2.1012420654296875, 9.389408111572266, 13.419227600097656, 27.569976806640625, 19.116958618164062, 6.009086608886719, 0.3607749938964844, -2.9909610748291016, 4.184513092041016, 20.430755615234375, -10.338508605957031, 9.984100341796875, 19.671363830566406, 50.51481628417969, 38.713932037353516, 9.324462890625, 1.415496826171875, 1.060699462890625, 5.5982513427734375, 19.928565979003906, 18.996658325195312, 6.390647888183594, 49.25514221191406, 16.622390747070312, 35.90571594238281, 62.32630920410156, 11.11773681640625, -15.266754150390625, 10.734764099121094, 34.361572265625, 5.703216552734375, 7.918510437011719, 17.24942398071289, 2.9538116455078125, 26.066593170166016, 21.69287109375, 17.65857696533203, 18.191680908203125, 9.561771392822266, 22.586105346679688, 3.2749481201171875, 7.7499542236328125, 48.274967193603516, 39.881866455078125, 1.2061576843261719, 17.86767578125, 5.86552619934082, 22.070682525634766, -3.7707557678222656, 14.276912689208984, 2.4988441467285156, 49.26606750488281, 4.074512481689453, 16.414648056030273, 28.70343017578125, 8.006050109863281, 9.243911743164062, 4.479209899902344, 21.207656860351562, 6.856056213378906, 14.230562210083008, 3.8157386779785156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000164.npy"}
{"epoch": 0.4823529411764706, "step": 165, "batch_size": 128, "mean": 16.738054275512695, "std": 15.859901428222656, "min": -18.86962890625, "p10": -1.853395462036132, "median": 16.41799545288086, "p90": 37.669132995605466, "max": 58.984375, "pos_frac": 0.859375, "sample": [37.243202209472656, 27.72887420654297, 48.476654052734375, 17.285079956054688, 6.43865966796875, 13.003021240234375, 35.527061462402344, 23.5731201171875, 26.378387451171875, 29.922409057617188, 4.443967819213867, 31.726165771484375, 3.7441253662109375, 18.775924682617188, 6.457084655761719, 15.94229507446289, 30.34386444091797, 10.755781173706055, 31.446752548217773, 25.115753173828125, 33.449012756347656, 37.6195068359375, 19.6007080078125, 38.419410705566406, 13.017290115356445, 4.454896926879883, 6.854072570800781, -4.6022796630859375, 2.962493896484375, 34.04827880859375, 11.679389953613281, 20.55687713623047, 47.70655822753906, 18.072647094726562, 22.9498291015625, 31.781387329101562, -8.413339614868164, 5.534088134765625, 8.906028747558594, 26.711849212646484, 1.59405517578125, 4.061637878417969, 4.4381866455078125, 49.21880340576172, 32.92103576660156, 12.747390747070312, 27.02623748779297, 4.907989501953125, 2.4110641479492188, 2.9082183837890625, -0.2175445556640625, 24.8087158203125, 22.940597534179688, -4.611419677734375, 32.30708312988281, -18.86962890625, 31.862159729003906, 49.27508544921875, 37.78492736816406, 25.100250244140625, -17.228256225585938, 22.420143127441406, 24.747055053710938, 26.60356903076172, 58.984375, 7.0567626953125, -2.409870147705078, -5.110370635986328, 14.898902893066406, 17.29852294921875, 6.739414215087891, 11.919174194335938, -16.729248046875, 20.0048828125, 17.98101043701172, 20.136337280273438, 8.563150405883789, 6.462627410888672, 23.554161071777344, 8.500114440917969, -0.7492523193359375, 9.685829162597656, 38.446266174316406, 20.639198303222656, 11.9281005859375, 40.916542053222656, 20.539276123046875, 6.4765167236328125, 18.472885131835938, 10.038724899291992, 6.279937744140625, 17.127235412597656, -0.095794677734375, 25.165679931640625, 13.053020477294922, 36.189029693603516, 11.187000274658203, 8.220727920532227, -5.74462890625, 6.216686248779297, 23.393287658691406, 22.389564514160156, 29.919219970703125, 3.7681045532226562, 7.587287902832031, 31.845083236694336, -8.404045104980469, -1.6149063110351562, 12.735485076904297, 10.637935638427734, 29.880508422851562, 7.110010147094727, -11.468032836914062, 45.7666015625, 7.511295318603516, 19.521835327148438, -5.650413513183594, 55.51731872558594, 33.92340087890625, 8.432243347167969, 6.74652099609375, 3.081878662109375, -10.946670532226562, 17.368186950683594, 38.59234619140625, 16.893695831298828, 42.319366455078125, -1.02325439453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000165.npy"}
{"epoch": 0.4852941176470588, "step": 166, "batch_size": 128, "mean": 15.25341510772705, "std": 13.336236000061035, "min": -17.697250366210938, "p10": 0.10469017028808598, "median": 14.108869552612305, "p90": 31.649540710449216, "max": 60.48454666137695, "pos_frac": 0.90625, "sample": [2.789043426513672, 27.654281616210938, 26.516708374023438, 6.110160827636719, 15.323169708251953, 10.781753540039062, 25.400470733642578, 5.82293701171875, 24.145111083984375, 17.09093475341797, 12.624282836914062, 35.20458984375, 35.235107421875, 26.086925506591797, 31.13813018798828, 20.82741355895996, 18.519790649414062, 6.115795135498047, 23.20557403564453, 4.898216247558594, 3.5160293579101562, 7.340000152587891, 4.664581298828125, 41.84269714355469, 16.628387451171875, 19.75604248046875, -17.697250366210938, 9.235586166381836, 6.889305114746094, 31.2906494140625, 19.595909118652344, 18.9847412109375, 6.218727111816406, 20.353805541992188, 32.48695373535156, 14.013969421386719, 49.588844299316406, -0.48911285400390625, 25.09510040283203, -5.296867370605469, 9.275796890258789, -4.2046051025390625, 29.52088165283203, 0.8845138549804688, 26.446533203125, 12.609176635742188, 7.06390380859375, 14.056259155273438, 11.000747680664062, 9.077384948730469, 7.717206954956055, 26.183837890625, 10.316574096679688, 7.409873962402344, 14.8875732421875, 32.766937255859375, 22.456146240234375, 18.597183227539062, 23.321090698242188, 26.862258911132812, 22.382450103759766, 5.509185791015625, 41.055686950683594, 19.23596954345703, 40.56315612792969, 6.989662170410156, 8.982994079589844, 14.161479949951172, 5.7961578369140625, 1.0098724365234375, 16.47394371032715, 36.77544403076172, 21.567298889160156, 9.229248046875, 3.35888671875, -1.8064708709716797, 27.767807006835938, 23.011566162109375, 24.32562255859375, 12.196807861328125, 5.4058685302734375, -2.5305252075195312, -3.06842041015625, 7.500312805175781, 60.48454666137695, 37.70916748046875, 1.5578269958496094, 7.8412628173828125, -0.4140472412109375, 30.68564224243164, 9.029275894165039, 1.9908447265625, 24.61580467224121, 18.822036743164062, 21.82335662841797, 4.057229995727539, 17.94159698486328, 26.517417907714844, 18.836082458496094, 0.34735679626464844, 8.140037536621094, 53.416290283203125, 4.796169281005859, 23.93914222717285, 29.871047973632812, 0.11934661865234375, -3.525848388671875, 7.310115814208984, 23.189743041992188, 24.045246124267578, 22.227510452270508, 2.7651424407958984, 14.05206298828125, 0.07049179077148438, 16.845054626464844, 18.021568298339844, 9.981391906738281, 35.06085205078125, 20.779953002929688, 2.7044677734375, -4.206047058105469, 0.5847549438476562, -1.7423763275146484, 14.622457504272461, 15.256393432617188, 2.4089126586914062, 11.12109375, -12.915061950683594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000166.npy"}
{"epoch": 0.48823529411764705, "step": 167, "batch_size": 128, "mean": 15.292102813720703, "std": 14.3504056930542, "min": -22.15130615234375, "p10": -1.7523336410522459, "median": 11.968021392822266, "p90": 34.69494209289551, "max": 63.863067626953125, "pos_frac": 0.8671875, "sample": [-4.039165496826172, 7.544944763183594, 47.4805908203125, -5.051628112792969, 7.0341644287109375, 11.498199462890625, 11.007247924804688, 24.832611083984375, 31.375518798828125, 17.97745132446289, 36.65592956542969, 14.094932556152344, -6.4400634765625, 11.981254577636719, 2.378631591796875, 23.911094665527344, 8.209320068359375, 19.12014389038086, 20.015899658203125, 14.487930297851562, -2.481670379638672, 2.021566390991211, 51.331199645996094, 24.245079040527344, 63.863067626953125, 34.681983947753906, 34.72517776489258, 29.874290466308594, 28.006057739257812, 7.9031219482421875, 20.76654815673828, 2.5073280334472656, 1.2873077392578125, -0.8494777679443359, 49.9239501953125, -10.697315216064453, 22.382766723632812, -3.1036148071289062, -1.236053466796875, 18.954452514648438, 39.28999328613281, 26.205177307128906, -2.962810516357422, -1.94464111328125, 32.52764892578125, 13.233818054199219, 7.381204605102539, 3.799276351928711, 19.086593627929688, 27.851226806640625, 4.09161376953125, 11.085136413574219, 22.40277862548828, 8.036176681518555, 23.644569396972656, 16.810043334960938, 32.861534118652344, 18.01336669921875, 11.92249870300293, 26.143508911132812, 8.101211547851562, -22.15130615234375, 17.214935302734375, 8.18975830078125, 42.278404235839844, 4.99896240234375, -2.646869659423828, 28.433330535888672, 3.6151046752929688, -0.6375350952148438, 2.5851669311523438, 0.6680469512939453, 7.95244026184082, 21.51123046875, -2.380767822265625, 22.131942749023438, 8.922660827636719, 15.042232513427734, 14.703933715820312, -5.117641448974609, 13.906517028808594, -5.704227447509766, 10.002532958984375, 27.12938690185547, 20.140369415283203, 25.59716796875, 15.317983627319336, 17.690658569335938, 37.0096435546875, 35.784629821777344, 32.50164031982422, 10.091705322265625, 45.37861633300781, 18.06760025024414, 20.955467224121094, 31.45333480834961, 11.381929397583008, 9.463119506835938, 9.209220886230469, 9.85312271118164, 11.683029174804688, 8.483604431152344, 6.676628112792969, 24.38970947265625, 20.827301025390625, 16.56005859375, 23.880027770996094, 4.2351531982421875, 5.637718200683594, 2.4797439575195312, 10.488296508789062, 1.9563980102539062, 28.299942016601562, 10.634307861328125, 11.954788208007812, 4.671791076660156, 5.5901641845703125, 12.166292190551758, 9.598098754882812, 39.54498291015625, 5.064697265625, 24.36548614501953, 9.794197082519531, 6.165740966796875, -1.6699161529541016, 46.19038391113281, 22.723217010498047, 8.692098617553711], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000167.npy"}
{"epoch": 0.49117647058823527, "step": 168, "batch_size": 128, "mean": 14.695899963378906, "std": 16.821781158447266, "min": -29.96435546875, "p10": -3.5158336639404295, "median": 13.832868576049805, "p90": 35.34593467712401, "max": 63.50425720214844, "pos_frac": 0.8125, "sample": [24.976593017578125, 19.845184326171875, 12.594284057617188, 3.8087310791015625, 27.369409561157227, 4.982074737548828, 12.363492965698242, 16.062049865722656, 2.685150146484375, 16.612991333007812, -1.804086685180664, 0.538604736328125, 14.711807250976562, 2.154247283935547, 37.97774124145508, 21.142547607421875, 30.722675323486328, 4.2598114013671875, 39.2170295715332, 15.713825225830078, 50.116214752197266, 48.18452453613281, 9.985000610351562, -3.626434326171875, -10.418508529663086, 1.58416748046875, 7.441289901733398, -1.5978965759277344, 45.13568115234375, -1.8413848876953125, 2.277536392211914, 30.80027198791504, 18.538333892822266, -5.3111572265625, -20.167390823364258, 8.349922180175781, 7.309043884277344, 16.530067443847656, 25.483245849609375, 29.441436767578125, 21.54010772705078, 25.45258331298828, 21.172950744628906, 11.476058959960938, 3.0288314819335938, 34.218017578125, 56.37060546875, 9.124881744384766, -0.7764472961425781, 56.624977111816406, -23.244033813476562, -10.2220458984375, 15.657051086425781, -1.9870586395263672, 17.478862762451172, 16.947067260742188, 18.7310791015625, 22.142250061035156, -0.9745635986328125, -3.468433380126953, 3.6225929260253906, 24.98321533203125, 11.51290512084961, 6.812744140625, 10.5911865234375, 10.95305061340332, 54.520294189453125, 21.931381225585938, 10.474542617797852, 15.900001525878906, 6.9623870849609375, 9.120368957519531, 40.442169189453125, 8.212352752685547, 11.418212890625, 20.538681030273438, -6.81805419921875, 21.27410888671875, 27.21680450439453, -9.317001342773438, 14.00457763671875, -1.8056106567382812, 13.88116455078125, 31.272449493408203, -9.6429443359375, 6.635078430175781, 15.638046264648438, -0.3668060302734375, -1.0945816040039062, 17.521408081054688, 18.39794921875, 21.964590072631836, 41.99952697753906, 13.78457260131836, 12.20499038696289, 31.712814331054688, 4.076496124267578, 63.50425720214844, -4.631935119628906, 3.7904281616210938, 18.747169494628906, 17.64129638671875, 62.407989501953125, -2.741182327270508, 7.503242492675781, 19.598251342773438, 17.180980682373047, 0.9927825927734375, 0.34291839599609375, 16.774784088134766, 25.802270889282227, 12.821264266967773, 19.067291259765625, 32.33287048339844, 13.494316101074219, 26.775394439697266, 14.259750366210938, 8.885421752929688, -29.96435546875, 15.422019958496094, 26.668228149414062, 46.271728515625, -5.784271240234375, 13.64849853515625, -10.813873291015625, 32.167869567871094, 28.375553131103516, 6.557708740234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000168.npy"}
{"epoch": 0.49411764705882355, "step": 169, "batch_size": 128, "mean": 14.494401931762695, "std": 13.16498851776123, "min": -13.155204772949219, "p10": -1.3666481018066405, "median": 13.809589385986328, "p90": 31.285660552978516, "max": 63.38215637207031, "pos_frac": 0.875, "sample": [42.19287109375, 50.20262145996094, -9.413177490234375, -6.508308410644531, 24.941635131835938, 33.212371826171875, 7.6208343505859375, 7.739341735839844, 30.891817092895508, 23.61852264404297, -1.3606491088867188, 20.749588012695312, 7.4616241455078125, 13.107070922851562, 8.431825637817383, 13.772415161132812, 26.467201232910156, 0.5189228057861328, 1.5431900024414062, 19.01447296142578, 10.786727905273438, -4.982269287109375, 13.926643371582031, 21.646583557128906, 21.61650848388672, 15.65365219116211, 23.06353759765625, 15.026466369628906, 24.559417724609375, 26.672645568847656, 15.749641418457031, 40.795166015625, 4.016387939453125, 14.034469604492188, 1.35064697265625, 22.16730499267578, 13.851207733154297, 9.122528076171875, 33.636383056640625, 4.3342742919921875, 14.460784912109375, 24.416778564453125, 19.035133361816406, 11.452102661132812, 8.675743103027344, 8.056924819946289, 11.593681335449219, 12.971492767333984, -4.7665252685546875, 63.38215637207031, 0.6026382446289062, 4.827934265136719, -4.403007507324219, 7.779335021972656, 29.823806762695312, 5.202362060546875, 19.78302764892578, 16.499557495117188, 8.071395874023438, 24.091201782226562, 14.317092895507812, 14.062393188476562, 1.1026420593261719, 23.508838653564453, 15.77756118774414, 42.88587951660156, -13.155204772949219, -3.082897186279297, -4.588958740234375, 10.083930969238281, 15.520669937133789, 29.225440979003906, 14.690692901611328, 31.28399658203125, 29.443439483642578, 24.32147216796875, 17.778173446655273, 13.233436584472656, 33.368858337402344, 15.8284912109375, 29.697614669799805, -2.8591041564941406, 7.073638916015625, 18.607589721679688, 6.556396484375, 15.24801254272461, 12.348499298095703, 14.858732223510742, 12.679969787597656, 10.030710220336914, 10.61981201171875, 0.31960296630859375, 40.368499755859375, 28.882476806640625, 7.6118316650390625, 31.28954315185547, 33.943084716796875, 13.846763610839844, -0.96258544921875, 3.64300537109375, 5.740205764770508, 27.953231811523438, -1.380645751953125, -2.819927215576172, 8.027202606201172, -1.5399932861328125, 8.656204223632812, -6.892818450927734, 4.6280975341796875, 6.07850456237793, 20.29207992553711, 34.327430725097656, 10.224605560302734, 5.024890899658203, -1.1991081237792969, 3.389495849609375, 22.17218780517578, 6.047100067138672, 18.205177307128906, 8.326332092285156, 44.89337158203125, 6.224327087402344, 19.969688415527344, 18.32973861694336, 25.3349609375, 4.950050354003906, 2.505565643310547, 15.616806030273438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000169.npy"}
{"epoch": 0.4970588235294118, "step": 170, "batch_size": 128, "mean": 15.164199829101562, "std": 16.265958786010742, "min": -19.688034057617188, "p10": -3.997781181335449, "median": 13.873355865478516, "p90": 33.57171859741211, "max": 88.41285705566406, "pos_frac": 0.8828125, "sample": [0.9374771118164062, 4.9835357666015625, 29.731033325195312, -3.9893932342529297, 19.371925354003906, 8.563491821289062, 5.4409637451171875, 2.3640899658203125, 0.5192928314208984, 0.8689346313476562, 6.829521179199219, 24.053504943847656, 20.613353729248047, 8.371650695800781, 13.433769226074219, -10.654022216796875, 1.0769081115722656, 5.90022087097168, 66.37845611572266, 26.363021850585938, -16.83978271484375, 1.3311996459960938, 20.945510864257812, 12.492988586425781, 4.372333526611328, 2.40606689453125, 32.99287796020508, 38.889991760253906, 23.001060485839844, -2.9827804565429688, 14.916259765625, 27.189987182617188, 33.20957946777344, -7.906337738037109, 34.475006103515625, 4.355743408203125, 13.893486022949219, -8.265853881835938, 24.595870971679688, 17.81481170654297, 18.791412353515625, 17.819026947021484, 54.64605712890625, 7.708549499511719, 31.014366149902344, 42.23396301269531, 2.9639434814453125, 24.28467559814453, 15.010124206542969, 22.45567512512207, 16.1480712890625, 26.327972412109375, 27.436508178710938, 31.891403198242188, -10.93045425415039, 24.938262939453125, 3.545787811279297, -5.3153533935546875, 10.039138793945312, 0.1796875, 43.58869934082031, 16.719329833984375, 23.07904815673828, 21.84229278564453, 30.57465362548828, 21.18090057373047, 4.594230651855469, 19.985084533691406, 13.853225708007812, 20.6407470703125, 44.06758499145508, 5.6080169677734375, 23.379104614257812, 13.02431869506836, 17.27820587158203, 2.446666717529297, 8.451515197753906, 24.161598205566406, -19.688034057617188, 27.13292694091797, 17.277633666992188, 8.68389892578125, 3.4416637420654297, 12.0552978515625, 0.66961669921875, 8.653518676757812, -7.274513244628906, 18.849342346191406, 34.416709899902344, 49.170013427734375, 6.957733154296875, 28.751052856445312, 21.65533447265625, 23.337173461914062, 7.9490966796875, 15.876953125, 18.959335327148438, 8.867656707763672, 18.662281036376953, 88.41285705566406, 11.071739196777344, -10.057754516601562, 40.837249755859375, 8.913433074951172, 11.85394287109375, 14.98244857788086, 2.588693618774414, 22.471542358398438, -8.686752319335938, 16.860153198242188, 28.748348236083984, 8.641082763671875, 40.618446350097656, 11.445816040039062, 7.923799514770508, 15.786794662475586, -9.562026977539062, 43.916542053222656, 11.418365478515625, 12.109214782714844, 0.04982566833496094, 2.2137508392333984, 6.883350372314453, 26.55175018310547, -5.322257995605469, 14.576850891113281, -4.017353057861328, 7.673065185546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000170.npy"}
{"epoch": 0.5, "step": 171, "batch_size": 128, "mean": 16.00217056274414, "std": 15.340432167053223, "min": -25.986129760742188, "p10": -2.6087387084960936, "median": 14.62997055053711, "p90": 37.56338424682617, "max": 61.33068084716797, "pos_frac": 0.8671875, "sample": [41.17414855957031, -25.986129760742188, 5.831329345703125, 15.663774490356445, 13.708908081054688, 36.65184020996094, 4.536285400390625, 28.395156860351562, 1.3365249633789062, 36.745452880859375, 9.993080139160156, 17.582386016845703, 4.256345748901367, 29.381332397460938, 22.22446060180664, 25.556652069091797, 38.45155334472656, -1.2181320190429688, 23.455703735351562, 12.490226745605469, 27.904037475585938, -5.767988204956055, -7.221155166625977, 12.438774108886719, -6.1707763671875, 36.8273811340332, 20.708118438720703, 19.530689239501953, 21.669387817382812, -1.7756881713867188, 16.859512329101562, 8.461555480957031, 10.251976013183594, 15.514266967773438, 4.0711822509765625, 21.476295471191406, 20.252565383911133, 0.43536376953125, 8.309371948242188, 1.9197444915771484, -12.326736450195312, 61.33068084716797, 8.16795539855957, 11.69418716430664, 10.514623641967773, -21.514846801757812, 6.4289703369140625, 38.726165771484375, 38.3609619140625, 12.4136962890625, 17.002870559692383, 13.108001708984375, -3.4505386352539062, 17.774322509765625, 16.836639404296875, 41.05535888671875, 32.739898681640625, 15.391326904296875, 27.439956665039062, 12.570283889770508, 28.93523406982422, 12.861047744750977, 36.27351379394531, 8.866744995117188, 31.558372497558594, -0.52947998046875, -3.3072357177734375, 15.992103576660156, 18.549758911132812, 18.596521377563477, 33.015132904052734, 14.394561767578125, 5.089332580566406, 14.008110046386719, 14.735183715820312, -6.66632080078125, 16.00212860107422, 11.49337387084961, 37.96553039550781, 6.0120849609375, 22.867599487304688, 3.3563385009765625, -2.5757293701171875, -5.6527862548828125, 38.98247528076172, 43.430076599121094, 25.267990112304688, 17.83126449584961, 33.915592193603516, 10.752487182617188, 10.68026351928711, 13.051841735839844, 10.5689697265625, 14.731430053710938, 23.791141510009766, 4.3983306884765625, 10.758514404296875, 4.524017333984375, -2.9044570922851562, 17.77838134765625, 37.90161895751953, 16.662673950195312, 61.275390625, 15.578567504882812, 5.374488830566406, 3.961284637451172, 21.251571655273438, 14.528511047363281, 7.973087310791016, -10.1580810546875, 3.8208465576171875, 34.81861114501953, 0.6404571533203125, 9.069353103637695, 16.228260040283203, -2.685760498046875, 50.04597473144531, 37.418426513671875, 26.936065673828125, 49.96856689453125, 17.1615047454834, 14.074661254882812, 7.0263519287109375, 7.928199768066406, 15.303295135498047, 24.747390747070312, 32.00801086425781, 9.829910278320312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000171.npy"}
{"epoch": 0.5029411764705882, "step": 172, "batch_size": 128, "mean": 15.436238288879395, "std": 15.801639556884766, "min": -21.395404815673828, "p10": -2.1313234329223625, "median": 14.4744873046875, "p90": 38.07638092041016, "max": 58.258544921875, "pos_frac": 0.875, "sample": [15.614608764648438, 2.8080291748046875, 18.6673583984375, 24.701644897460938, 8.208915710449219, 0.5798416137695312, 12.987751007080078, 14.767684936523438, 10.028060913085938, 14.687076568603516, 20.65264892578125, 14.453125, 27.839855194091797, 29.64154052734375, 12.254035949707031, 30.760658264160156, 14.495849609375, -15.199207305908203, 14.594184875488281, 20.686996459960938, 24.845008850097656, 14.862226486206055, 4.746042251586914, 29.234527587890625, 10.341796875, 17.271629333496094, 13.603744506835938, 45.776153564453125, 14.642681121826172, 4.563861846923828, 11.773262023925781, 25.258499145507812, 1.69622802734375, 15.496917724609375, 21.255340576171875, 11.575042724609375, 22.48788070678711, 19.14501190185547, 26.352027893066406, 1.5880584716796875, 43.93867874145508, 17.038000106811523, 5.258230209350586, 11.165695190429688, 30.02447509765625, 34.04423522949219, -5.872306823730469, 5.616962432861328, -16.432525634765625, 34.796234130859375, 19.56200408935547, 2.2034835815429688, 19.608192443847656, 50.766510009765625, 24.441543579101562, 30.813989639282227, 21.290437698364258, 1.5238838195800781, 15.65838623046875, 8.967460632324219, 1.8113899230957031, 50.317161560058594, 49.78007507324219, -9.377601623535156, 2.9261112213134766, 4.870944976806641, -9.28790283203125, -7.052446365356445, 10.106727600097656, 39.42326354980469, 14.157588958740234, 10.110574722290039, 8.988525390625, -10.120964050292969, -1.8653945922851562, 14.080263137817383, -0.21682167053222656, 5.374330520629883, 2.7879714965820312, 2.272096633911133, 28.083831787109375, 11.358100891113281, 16.710723876953125, 9.830659866333008, 38.0931396484375, -11.035049438476562, 9.94293212890625, 10.312969207763672, 8.008832931518555, 38.80070495605469, 1.86041259765625, 3.03314208984375, 11.200206756591797, 47.428524017333984, 2.2270851135253906, 22.530174255371094, 2.109954833984375, 4.831634521484375, 2.0179710388183594, 20.16961669921875, -21.395404815673828, 13.754737854003906, -2.9389877319335938, 15.620223999023438, 18.05175018310547, 32.9207763671875, 23.92803955078125, 40.82491683959961, 7.758687973022461, -2.64300537109375, 54.88327407836914, 15.83367919921875, 38.06919860839844, 22.674652099609375, 16.953582763671875, 19.179908752441406, -1.9120311737060547, 58.258544921875, 26.16991424560547, 37.48824691772461, -9.424259185791016, 25.344589233398438, 36.07038879394531, -10.537185668945312, 25.219995498657227, 7.494989395141602, 45.644004821777344, 1.7836227416992188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000172.npy"}
{"epoch": 0.5058823529411764, "step": 173, "batch_size": 128, "mean": 15.491931915283203, "std": 15.65981388092041, "min": -24.460723876953125, "p10": -1.0289951324462883, "median": 14.554290771484375, "p90": 35.09982833862305, "max": 71.6539306640625, "pos_frac": 0.8671875, "sample": [2.2887039184570312, 70.28314208984375, -6.462123870849609, 10.944068908691406, 4.961984634399414, 0.423736572265625, 23.16387939453125, 13.118705749511719, -6.148017883300781, 22.117385864257812, 10.421152114868164, 8.070098876953125, 0.651763916015625, 23.662185668945312, 6.863330841064453, 18.61383819580078, 9.706413269042969, 23.61901092529297, 25.66107177734375, 25.47650909423828, 23.16407012939453, 10.748374938964844, 17.349075317382812, 19.089035034179688, 9.52294921875, 16.40447235107422, -12.433197021484375, -0.06075286865234375, 15.079408645629883, 30.417823791503906, 0.8063182830810547, 0.04172515869140625, 24.06542205810547, 11.512672424316406, 2.2996444702148438, 14.227951049804688, 21.045608520507812, 21.167396545410156, 14.606605529785156, 14.648353576660156, 22.407691955566406, 26.405426025390625, 17.78827667236328, 35.113922119140625, 15.461889266967773, 15.679306030273438, 12.346111297607422, 11.604774475097656, 15.69952392578125, 53.867591857910156, 21.68592071533203, 17.001258850097656, 22.97748565673828, 9.090568542480469, -1.6163406372070312, 36.19580078125, 2.8932857513427734, -2.1453895568847656, 14.501976013183594, 18.82738494873047, 28.153209686279297, 46.678810119628906, -8.026603698730469, 6.819427490234375, 32.608665466308594, 19.606781005859375, -0.7003669738769531, 31.10985565185547, 71.6539306640625, -0.8182601928710938, 46.53565979003906, 16.54008674621582, 25.318130493164062, 26.68726348876953, 9.602714538574219, 2.530130386352539, 8.215579986572266, -8.567304611206055, -7.216217041015625, 26.486083984375, -2.7317581176757812, 16.531028747558594, 39.0025634765625, 24.807809829711914, 51.23889923095703, 4.970893859863281, 11.5506591796875, 0.7017669677734375, 14.382181167602539, 0.9743919372558594, 23.950965881347656, 5.216907501220703, 33.69914245605469, -1.5207099914550781, 15.690423965454102, 43.01118469238281, 25.842540740966797, 52.14726257324219, -14.8934326171875, 0.6541519165039062, 16.9361572265625, 3.108938217163086, 7.243827819824219, 10.240226745605469, 11.987220764160156, 8.993349075317383, 23.861717224121094, 4.570652008056641, 3.9962692260742188, 12.256256103515625, 11.53970718383789, 15.752128601074219, 9.126399993896484, 16.88318634033203, 25.682886123657227, 40.166595458984375, 9.031936645507812, 18.231842041015625, 21.822242736816406, 7.824241638183594, 6.736217498779297, 10.570796966552734, 37.699745178222656, -24.460723876953125, -6.3935394287109375, -0.187744140625, 19.282196044921875, 35.093788146972656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000173.npy"}
{"epoch": 0.5088235294117647, "step": 174, "batch_size": 128, "mean": 13.791817665100098, "std": 15.188986778259277, "min": -17.065505981445312, "p10": -2.969061279296874, "median": 11.15330982208252, "p90": 34.941368865966794, "max": 59.853546142578125, "pos_frac": 0.8359375, "sample": [-17.065505981445312, -0.4544181823730469, 20.881561279296875, 49.221214294433594, 5.796226501464844, 5.322048187255859, 26.40740203857422, 24.986610412597656, -10.800621032714844, -3.5930328369140625, 27.748672485351562, 9.059429168701172, 11.43559455871582, 1.2238426208496094, 0.27178955078125, 9.925338745117188, 31.287979125976562, 38.4315185546875, 5.595315933227539, 10.134185791015625, 29.181137084960938, 17.336750030517578, -0.7593841552734375, -10.005546569824219, 59.853546142578125, 4.792387008666992, 31.292251586914062, 35.869773864746094, 20.499649047851562, 55.61309814453125, 0.24063873291015625, 18.053756713867188, 18.400115966796875, 12.155826568603516, 31.246322631835938, 6.888824462890625, 9.733760833740234, 11.3345947265625, 1.0239982604980469, -13.082931518554688, 30.17730712890625, 9.81597900390625, 22.55530548095703, 6.667266845703125, 10.972024917602539, 22.343338012695312, 12.614959716796875, 4.2669677734375, 12.973434448242188, 29.80376434326172, 7.323478698730469, 20.885875701904297, 19.82762908935547, 10.129081726074219, 21.570465087890625, 19.038707733154297, -2.3166656494140625, -14.597457885742188, 35.34136199951172, 12.720758438110352, 35.0899772644043, 7.754875183105469, 12.83038330078125, 3.145782470703125, 16.683448791503906, -2.492462158203125, 10.566871643066406, 25.550704956054688, 20.651947021484375, 9.26507568359375, 8.983383178710938, 9.437274932861328, -0.47883033752441406, 6.34869384765625, 9.37563705444336, 5.447887420654297, 15.70610237121582, 16.454456329345703, -2.2510833740234375, 42.410400390625, 7.12664794921875, 47.68568420410156, 25.660064697265625, 24.8372802734375, 1.7397537231445312, -2.7016448974609375, 2.9693431854248047, 16.157196044921875, 8.471702575683594, 15.76605224609375, 7.387107849121094, 7.881595611572266, 43.48564147949219, 17.08980941772461, 15.334419250488281, 18.47522735595703, 23.904922485351562, -11.235542297363281, 2.6961708068847656, 16.07973289489746, 21.770339965820312, -10.235145568847656, 3.5315475463867188, 37.531951904296875, 32.26003646850586, 18.794654846191406, -4.968315124511719, 57.06056213378906, -1.3126068115234375, 14.92437744140625, 21.787261962890625, 14.867076873779297, -7.571315765380859, 2.178558349609375, 6.874946594238281, 34.94818878173828, 34.938446044921875, -6.224430084228516, 8.324226379394531, 2.3192272186279297, 3.3636856079101562, 2.4539794921875, 6.731903076171875, 33.64408874511719, 20.1107177734375, 12.796295166015625, -6.615020751953125, -5.821573257446289], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000174.npy"}
{"epoch": 0.5117647058823529, "step": 175, "batch_size": 128, "mean": 12.645984649658203, "std": 16.200803756713867, "min": -21.814178466796875, "p10": -7.495307922363281, "median": 10.928813934326172, "p90": 36.28745040893554, "max": 66.47303771972656, "pos_frac": 0.8046875, "sample": [-1.1207504272460938, -16.36663055419922, 6.5256805419921875, 11.855663299560547, 8.912338256835938, 11.160491943359375, 15.812423706054688, -2.1050758361816406, -7.558967590332031, 0.29573822021484375, 19.57866668701172, 1.9002685546875, 22.650009155273438, 9.447025299072266, 1.7428131103515625, 5.822017669677734, -10.534957885742188, 3.0377197265625, 27.572879791259766, -12.371299743652344, -21.814178466796875, 12.290294647216797, 45.73579406738281, 17.745513916015625, 35.67828369140625, 3.769500732421875, -1.7273941040039062, 22.73046112060547, 7.678680419921875, 17.90393829345703, 12.687713623046875, 7.514778137207031, 16.647388458251953, 19.737045288085938, 14.65716552734375, 12.25030517578125, 16.338869094848633, 17.978885650634766, 10.361650466918945, -0.3185710906982422, 13.449369430541992, -4.9258575439453125, 18.13555908203125, -3.1383514404296875, 10.578031539916992, -3.7249603271484375, 53.21479034423828, 4.6545257568359375, 48.916290283203125, -20.045379638671875, -8.088462829589844, 16.67523193359375, 41.980369567871094, 64.33419036865234, 37.71539306640625, 1.1282234191894531, 9.053520202636719, 35.38337707519531, 13.601566314697266, 38.14360046386719, 18.6427001953125, 13.62347412109375, -8.035888671875, 32.043853759765625, 4.111663818359375, -13.063907623291016, 0.8513031005859375, 27.35211944580078, 29.43749237060547, 17.14202117919922, 21.380477905273438, 3.28021240234375, 66.47303771972656, 9.628623962402344, 38.47283935546875, 25.40215492248535, 35.16967010498047, 23.514263153076172, 6.745479583740234, 12.154308319091797, 9.227394104003906, 2.0404319763183594, -9.970634460449219, 2.3841171264648438, 0.7575912475585938, 22.24786376953125, -3.6549301147460938, 22.084320068359375, 15.738357543945312, 14.900367736816406, -7.468025207519531, 30.231449127197266, -8.21285629272461, 26.34461212158203, 8.525978088378906, -0.10733795166015625, -14.939033508300781, 1.4854354858398438, 8.632793426513672, 37.708839416503906, -7.791229248046875, 19.614093780517578, 21.297645568847656, 10.872238159179688, -1.4934158325195312, 29.61772918701172, 5.3287811279296875, 2.069183349609375, 14.402389526367188, 8.858123779296875, 6.681173324584961, 11.424694061279297, 7.311485290527344, 40.223350524902344, 1.497344970703125, -1.497314453125, 10.985389709472656, 37.8648681640625, 18.3662109375, 41.047271728515625, 3.9744873046875, 7.99407958984375, 11.856002807617188, 13.13454818725586, 3.938812255859375, 10.771247863769531, 12.659412384033203, 24.251678466796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000175.npy"}
{"epoch": 0.5147058823529411, "step": 176, "batch_size": 128, "mean": 14.647061347961426, "std": 16.242767333984375, "min": -21.678466796875, "p10": -4.713916778564452, "median": 13.48575496673584, "p90": 31.66858081817627, "max": 86.76627349853516, "pos_frac": 0.8359375, "sample": [6.913795471191406, 13.623592376708984, 42.396575927734375, 41.023162841796875, 30.366958618164062, 14.837112426757812, -8.086982727050781, 14.69283676147461, 14.700782775878906, 22.779769897460938, 29.397789001464844, 21.758228302001953, 1.90081787109375, 26.947784423828125, 12.732208251953125, 27.373685836791992, -10.477985382080078, -2.5773868560791016, -1.0686187744140625, 31.912261962890625, 23.099716186523438, 13.561531066894531, 28.690155029296875, 24.435314178466797, 27.531688690185547, -9.542572021484375, 20.204484939575195, 37.80558776855469, 5.606590270996094, 24.701927185058594, 36.29144287109375, 14.313217163085938, 49.46488952636719, 31.25579833984375, 21.955612182617188, 20.602981567382812, 17.159889221191406, 33.36979675292969, 44.31622314453125, 17.177722930908203, 7.4673004150390625, 9.191314697265625, 20.53683090209961, 31.564146041870117, -10.538955688476562, -2.0445709228515625, -8.631866455078125, 19.127777099609375, -21.678466796875, 17.80431365966797, 9.231117248535156, 86.76627349853516, 3.774749755859375, 2.615713119506836, 29.019784927368164, 16.223541259765625, 20.978843688964844, 57.205665588378906, 3.7795486450195312, 11.197982788085938, 20.252952575683594, 18.229354858398438, 14.074718475341797, 10.315677642822266, 27.613929748535156, 26.984905242919922, 3.2128849029541016, 20.527572631835938, -5.8152313232421875, 2.359884262084961, 22.061138153076172, 20.630035400390625, -0.9718952178955078, 57.09759521484375, 13.108987808227539, 10.96524429321289, 3.2960357666015625, 55.611297607421875, 23.373580932617188, 30.648544311523438, 12.044525146484375, -0.31639671325683594, 12.121231079101562, 8.585899353027344, -7.1038360595703125, 11.868082046508789, 9.473787307739258, 9.755048751831055, 5.298774719238281, -3.38189697265625, 14.801433563232422, 2.0286407470703125, 3.928232192993164, 5.362335205078125, 5.952980041503906, 19.155364990234375, 19.182971954345703, 16.978240966796875, 38.508148193359375, 21.141738891601562, 10.809490203857422, 10.156082153320312, -6.658777236938477, -4.54045295715332, 9.583877563476562, 12.022590637207031, 0.5156135559082031, -13.4600830078125, 16.990089416503906, 11.164661407470703, 22.576416015625, 27.152854919433594, -5.11866569519043, 6.5629119873046875, 20.38127899169922, -18.20398712158203, -15.409378051757812, 3.585052490234375, 1.77667236328125, 5.739479064941406, 8.580669403076172, -1.6769332885742188, 11.68768310546875, 13.409978866577148, 21.20880126953125, 7.907073974609375, 6.542816162109375, 15.83602523803711], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000176.npy"}
{"epoch": 0.5176470588235295, "step": 177, "batch_size": 128, "mean": 14.57111930847168, "std": 15.425582885742188, "min": -17.546005249023438, "p10": -1.5066106796264647, "median": 11.291080474853516, "p90": 35.18072509765625, "max": 58.109100341796875, "pos_frac": 0.859375, "sample": [15.18438720703125, 15.099830627441406, 3.5080204010009766, 24.62065887451172, 10.522842407226562, 9.299938201904297, -0.8083438873291016, 4.140935897827148, 2.7443084716796875, 8.165740966796875, 32.08148193359375, 25.486587524414062, 0.29322242736816406, 34.80195617675781, 18.9371395111084, 0.5809574127197266, 15.820690155029297, -2.4050674438476562, -1.7888965606689453, -17.546005249023438, 33.847206115722656, 1.5102996826171875, 39.0938720703125, 1.6386604309082031, -7.661041259765625, 34.98919677734375, 10.806419372558594, 1.4520721435546875, 35.62762451171875, 28.23693084716797, 35.964385986328125, 48.72441101074219, 34.26747131347656, 12.11813735961914, 0.72052001953125, -8.164512634277344, 4.62445068359375, 26.997421264648438, -2.2522811889648438, 31.923294067382812, -9.337150573730469, 25.870468139648438, -14.147048950195312, 10.201797485351562, -0.4115142822265625, 4.842926025390625, 1.879110336303711, -1.4318962097167969, 31.136566162109375, 27.7745361328125, 8.587688446044922, 23.161502838134766, 20.921445846557617, 24.140283584594727, 38.12323760986328, 27.68994140625, -0.613922119140625, 10.152610778808594, 10.307723999023438, -15.601459503173828, 5.314453125, 14.51040267944336, 8.481765747070312, 8.561908721923828, 31.810470581054688, 32.345481872558594, 22.46141815185547, 6.277595520019531, 33.41087341308594, 15.935096740722656, 55.3453369140625, 1.4906501770019531, 15.710159301757812, 18.33294677734375, 16.40709686279297, 15.695014953613281, 6.4605865478515625, 7.8618927001953125, 10.171283721923828, 19.671554565429688, 26.05373764038086, 26.9820556640625, 10.031232833862305, 7.8312835693359375, -16.755462646484375, 18.7600040435791, -1.450326919555664, 2.0358428955078125, 58.109100341796875, 9.637115478515625, 7.748680114746094, 37.484840393066406, 14.617660522460938, 9.133241653442383, -4.256561279296875, 45.10211181640625, 6.925262451171875, -1.637939453125, 3.5553970336914062, 2.7420883178710938, 8.97125244140625, 45.50567626953125, 17.187149047851562, 4.013179779052734, -4.1460723876953125, 17.223318099975586, 36.298423767089844, 14.765445709228516, 11.984481811523438, 4.508571624755859, 13.393524169921875, 0.18384933471679688, 12.650604248046875, 7.7195587158203125, 31.269956588745117, 1.601470947265625, 18.461097717285156, 14.158920288085938, 15.161081314086914, 51.48276901245117, 29.48700714111328, 1.132537841796875, 26.387683868408203, 0.332794189453125, 42.64308547973633, 28.46820068359375, 7.1248779296875, 11.775741577148438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000177.npy"}
{"epoch": 0.5205882352941177, "step": 178, "batch_size": 128, "mean": 14.159320831298828, "std": 15.096062660217285, "min": -11.87374496459961, "p10": -2.363162994384765, "median": 12.234861373901367, "p90": 37.263957214355464, "max": 58.31550598144531, "pos_frac": 0.859375, "sample": [15.825424194335938, 35.92914581298828, 3.9594192504882812, 52.54634094238281, 6.674072265625, 13.806922912597656, 7.409996032714844, 35.691436767578125, 9.236175537109375, 41.745914459228516, -4.0062255859375, 24.426555633544922, 19.346904754638672, 13.377143859863281, 1.1339569091796875, 8.920997619628906, 3.24700927734375, -10.881378173828125, 14.747642517089844, 4.516962051391602, 21.720664978027344, 39.794349670410156, 3.54412841796875, 51.16188049316406, 12.896095275878906, 29.22293472290039, 19.502609252929688, 33.15948486328125, 17.867111206054688, 17.010448455810547, -1.731649398803711, 16.18817901611328, -2.74578857421875, 56.04829406738281, 27.709793090820312, 41.042999267578125, -2.1991806030273438, 5.596334457397461, 17.88542938232422, 17.781982421875, 23.518478393554688, 18.79891586303711, 58.31550598144531, 9.988662719726562, 1.5989990234375, 19.511375427246094, 16.040523529052734, 16.192581176757812, 21.194900512695312, 26.798362731933594, 5.8522186279296875, 7.886383056640625, 19.77737808227539, 21.519472122192383, 17.316696166992188, 12.937042236328125, 9.7896728515625, 7.528896331787109, 15.743356704711914, -7.410539627075195, 13.96356201171875, 8.586776733398438, 20.03034210205078, 0.2808380126953125, 13.186676025390625, 8.389734268188477, 26.704345703125, 39.20240783691406, 5.270404815673828, 0.9750900268554688, -1.8326568603515625, 27.16197967529297, 2.9884109497070312, 6.46630859375, 1.7030563354492188, 2.03375244140625, -10.359437942504883, 37.016151428222656, 1.1835861206054688, 14.090728759765625, 9.786745071411133, 8.307527542114258, 21.471633911132812, 5.268516540527344, 0.9352951049804688, 37.84217071533203, -7.11199951171875, 9.788787841796875, 45.402374267578125, -10.365333557128906, 18.574630737304688, -10.020050048828125, 1.8229217529296875, 1.8910980224609375, 7.078325271606445, 9.448680877685547, 16.526039123535156, 7.1004486083984375, 6.676185607910156, 11.573627471923828, 32.48248291015625, 46.47748565673828, 8.685569763183594, 19.862689971923828, 13.960887908935547, 32.48527526855469, 5.6783294677734375, 44.19798278808594, -1.3629913330078125, -11.87374496459961, -2.010345458984375, 20.57049560546875, 0.9500045776367188, -7.452911376953125, -7.213863372802734, 40.98493194580078, 7.423835754394531, 2.7267990112304688, 8.544380187988281, -6.214408874511719, 11.164531707763672, 20.447479248046875, -6.7172088623046875, 22.588111877441406, 21.65612030029297, 16.972396850585938, 11.5552978515625, 14.77437973022461], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000178.npy"}
{"epoch": 0.5235294117647059, "step": 179, "batch_size": 128, "mean": 15.6057767868042, "std": 15.712363243103027, "min": -18.49201202392578, "p10": -0.7773353576660156, "median": 13.492225646972656, "p90": 36.95936584472656, "max": 69.89727783203125, "pos_frac": 0.8828125, "sample": [38.24024200439453, 37.9404296875, 3.46453857421875, 22.518898010253906, 15.961990356445312, 9.294075012207031, 2.6432647705078125, 18.015037536621094, 13.3028564453125, 15.4754638671875, 17.529205322265625, -0.25901031494140625, 32.91258239746094, 12.126209259033203, 15.297676086425781, -7.9001312255859375, 21.247840881347656, -0.7736892700195312, 32.30644989013672, 0.5273094177246094, 0.5908584594726562, 42.39604187011719, 23.951629638671875, 35.880550384521484, 18.757911682128906, 33.781654357910156, 7.986856460571289, 31.107032775878906, 5.706764221191406, 12.966575622558594, 24.04436492919922, 17.148338317871094, 13.73508071899414, 26.410995483398438, -18.49201202392578, 4.377128601074219, 3.325531005859375, 23.908172607421875, 11.460769653320312, -2.7309722900390625, -15.761245727539062, -3.363088607788086, 24.21905517578125, 20.876449584960938, 0.4082660675048828, 23.581558227539062, 20.758743286132812, -0.7858428955078125, 6.643001556396484, 1.6547794342041016, 54.48478698730469, 22.61163330078125, 5.958961486816406, 4.413543701171875, 8.255691528320312, 15.045074462890625, 3.430755615234375, 18.647476196289062, 31.833824157714844, 20.00274658203125, 2.783567428588867, 69.58839416503906, -1.2856101989746094, 14.5208740234375, 8.444480895996094, 40.594078063964844, 13.100540161132812, 28.079288482666016, 4.688121795654297, 7.388710021972656, 11.221046447753906, 36.0758056640625, 0.3715972900390625, 5.5527496337890625, 16.8280029296875, 0.1721935272216797, -4.652433395385742, 3.029621124267578, 2.0185012817382812, 5.741233825683594, 22.792312622070312, 0.8535060882568359, 31.940582275390625, 36.69615173339844, 17.350265502929688, 13.681594848632812, 10.003496170043945, 26.45001220703125, 10.415863037109375, 5.67962646484375, 17.346343994140625, 11.197052001953125, 7.0112762451171875, 3.9984092712402344, 31.673919677734375, 69.89727783203125, 15.069869995117188, 6.547613143920898, 9.854164123535156, 19.6109619140625, 1.951202392578125, 11.076385498046875, -1.865804672241211, 21.7828369140625, 8.420883178710938, -8.70391845703125, 24.745586395263672, -3.231657028198242, 48.57539367675781, 40.89904022216797, 14.110755920410156, -3.7760257720947266, 44.05702209472656, 19.979721069335938, 4.7547149658203125, 35.85718536376953, 31.602252960205078, -4.57861328125, 42.05760955810547, 37.57353210449219, 1.1662788391113281, 27.517990112304688, 18.570301055908203, 5.7018890380859375, 41.830894470214844, 1.2792816162109375, 17.44395637512207, 7.308935165405273], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000179.npy"}
{"epoch": 0.5264705882352941, "step": 180, "batch_size": 128, "mean": 16.069049835205078, "std": 15.97512435913086, "min": -16.119068145751953, "p10": -0.6158861160278313, "median": 12.963630676269531, "p90": 39.5346565246582, "max": 57.169342041015625, "pos_frac": 0.8828125, "sample": [1.4831085205078125, 4.6476898193359375, 4.9078521728515625, 36.37901306152344, -9.720542907714844, 57.169342041015625, -8.9461669921875, 51.46342468261719, 19.782621383666992, 6.610706329345703, 32.108211517333984, 17.114578247070312, 5.940128326416016, 17.75881576538086, 47.88972473144531, 36.950889587402344, 48.647918701171875, 6.693561553955078, 12.267982482910156, 18.823354721069336, 4.310951232910156, 31.46178436279297, 3.4210891723632812, 54.677978515625, 20.001026153564453, 37.859619140625, 3.0854110717773438, -3.3539581298828125, 10.683074951171875, 9.306556701660156, 7.507007598876953, 14.184249877929688, 9.909347534179688, -6.0582122802734375, 10.998764038085938, 11.21490478515625, 7.058612823486328, 39.36567306518555, 2.4850330352783203, 17.123153686523438, -4.123207092285156, -6.158805847167969, 8.908998489379883, 25.192337036132812, -8.592239379882812, 6.4521026611328125, 47.48394012451172, 23.18817138671875, -4.070413589477539, 18.40489959716797, 52.61042785644531, 26.721813201904297, 12.267837524414062, 12.269088745117188, 31.82506561279297, -15.988372802734375, 21.03784942626953, 21.19296646118164, 44.11986541748047, 26.940460205078125, 16.12967300415039, 17.082725524902344, 2.552431106567383, 14.596954345703125, 5.326751708984375, 15.114471435546875, 8.654010772705078, 13.907012939453125, -1.1377391815185547, 39.04291915893555, 5.0731658935546875, 23.916515350341797, 6.364248275756836, 6.33738899230957, 21.81375503540039, 26.446548461914062, 40.222740173339844, 5.38743782043457, -6.6060333251953125, 26.453907012939453, 12.65338134765625, -16.119068145751953, 7.17567253112793, 11.26003646850586, 9.296333312988281, 7.238801956176758, 34.779197692871094, 12.478843688964844, 39.928951263427734, 17.7713565826416, 1.7271575927734375, 16.748844146728516, -0.08322525024414062, 38.56121826171875, 52.85084915161133, 14.291324615478516, 4.570697784423828, 16.00177001953125, 7.5079498291015625, 7.066425323486328, 1.699432373046875, 21.762802124023438, 14.788330078125, 5.8384552001953125, 4.469306945800781, 20.641189575195312, 13.121070861816406, 25.327041625976562, 22.479496002197266, 40.877418518066406, 55.195465087890625, 16.148284912109375, 6.752321243286133, 39.205116271972656, 8.976812362670898, 20.85576629638672, 27.371307373046875, 4.838592529296875, -10.2100830078125, 8.233917236328125, -0.39223480224609375, 13.130401611328125, 23.14399528503418, 12.806190490722656, 6.663379669189453, 15.688774108886719, 1.9425430297851562, 20.20104217529297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000180.npy"}
{"epoch": 0.5294117647058824, "step": 181, "batch_size": 128, "mean": 14.259788513183594, "std": 15.411890029907227, "min": -31.375686645507812, "p10": -2.8194816589355467, "median": 13.698159217834473, "p90": 33.771298217773435, "max": 67.5537338256836, "pos_frac": 0.84375, "sample": [12.905462265014648, 25.89629364013672, 4.462486267089844, 14.872589111328125, 8.435150146484375, 27.262332916259766, 18.119956970214844, 14.263301849365234, 10.373981475830078, 14.669815063476562, 3.4916000366210938, 13.396406173706055, 16.221755981445312, -6.489555358886719, 0.8063144683837891, 2.0474853515625, -8.937995910644531, -4.122871398925781, 39.44076156616211, 26.693939208984375, -0.06385421752929688, 21.214202880859375, 53.95879364013672, 15.62188720703125, 2.8413925170898438, -2.8174514770507812, 9.170907974243164, 4.934532165527344, 24.73682403564453, 7.47674560546875, 10.04449462890625, 19.664196014404297, 29.685073852539062, 17.52202606201172, 10.160797119140625, 14.45556640625, -31.375686645507812, 17.26983642578125, -2.82421875, -8.0257568359375, 31.130767822265625, -0.0743865966796875, 23.592086791992188, 15.524188995361328, 1.3375396728515625, 24.940837860107422, 2.414520263671875, 4.572807312011719, 21.41497802734375, 18.945419311523438, 59.56153869628906, 33.634971618652344, 2.2443389892578125, 39.87530517578125, 17.544570922851562, 6.620298385620117, 24.808372497558594, -0.7067031860351562, 15.504486083984375, 1.640542984008789, 5.542999267578125, -9.679229736328125, -0.8372039794921875, 19.907333374023438, 4.845684051513672, 33.156898498535156, 23.80644989013672, 30.081100463867188, 3.9830245971679688, -10.945728302001953, 46.623443603515625, 27.07870101928711, 11.149105072021484, -4.826381683349609, 25.357135772705078, 8.243408203125, 3.438934326171875, 15.606781005859375, 4.19915771484375, 5.139225006103516, 0.099212646484375, 3.5178794860839844, 13.918865203857422, 9.756481170654297, 67.5537338256836, -15.985160827636719, 22.181121826171875, 3.334197998046875, 2.3712158203125, 39.26927185058594, 13.054740905761719, 35.51826477050781, 2.0810623168945312, 36.55921936035156, 2.6539459228515625, 28.216964721679688, 15.856075286865234, 31.267303466796875, 28.328033447265625, 13.477453231811523, 9.058525085449219, 16.94976806640625, 17.62115478515625, 30.934249877929688, 6.5771026611328125, 0.8048515319824219, 18.53052520751953, 34.089393615722656, -1.033111572265625, 36.822906494140625, 20.092010498046875, 12.064533233642578, 16.340309143066406, -3.343791961669922, 31.341644287109375, 41.49406433105469, 5.001739501953125, 8.006168365478516, 16.564167022705078, 40.22785186767578, 23.15704345703125, -9.0374755859375, 14.886999130249023, 7.778350830078125, 25.770343780517578, 21.193084716796875, -0.4109344482421875, -3.1153564453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000181.npy"}
{"epoch": 0.5323529411764706, "step": 182, "batch_size": 128, "mean": 16.614910125732422, "std": 16.062992095947266, "min": -19.047119140625, "p10": -1.3954330444335932, "median": 15.408292770385742, "p90": 38.90051345825195, "max": 72.8317642211914, "pos_frac": 0.8828125, "sample": [-10.421852111816406, 12.008323669433594, 44.909515380859375, 16.248565673828125, -4.076923370361328, -0.10248565673828125, 24.849300384521484, 54.446380615234375, -1.259674072265625, 22.27716064453125, 15.282041549682617, 72.8317642211914, 14.48164176940918, 29.00811767578125, 33.41143798828125, 40.53746795654297, 22.049476623535156, 1.6846427917480469, 31.213409423828125, 26.86807632446289, 8.635744094848633, 23.29983901977539, 52.53168869018555, 24.215328216552734, 21.44016456604004, 39.86786651611328, 22.023773193359375, 8.17469596862793, 2.7240447998046875, 13.137222290039062, 41.41083526611328, 4.409309387207031, -5.0390167236328125, 20.09147834777832, 20.82684326171875, 4.57293701171875, 38.05750274658203, 24.482866287231445, 10.366283416748047, 30.44159698486328, 12.666255950927734, 20.932273864746094, 50.04180908203125, 36.446044921875, -7.20989990234375, 43.71258544921875, 4.481578826904297, 39.854225158691406, 48.644744873046875, 16.01512908935547, 3.9565658569335938, 36.04759216308594, 2.2372798919677734, 0.0655670166015625, 15.125864028930664, -19.047119140625, 8.305511474609375, 7.9859771728515625, 8.803321838378906, 23.725791931152344, 61.585479736328125, 15.464164733886719, 18.795372009277344, 20.539382934570312, 15.352420806884766, 18.994770050048828, -15.712825775146484, 5.118730545043945, -8.496139526367188, 3.606658935546875, -11.326160430908203, 25.963895797729492, 13.023910522460938, 9.099822998046875, 25.514183044433594, 0.13686561584472656, 6.308067321777344, 17.771507263183594, 19.21501922607422, 27.218231201171875, 13.326828002929688, 33.237709045410156, 6.2975006103515625, 2.8664779663085938, 24.654193878173828, 5.5650482177734375, 0.8094406127929688, 7.636238098144531, 23.385169982910156, 7.6025238037109375, -2.8946685791015625, 24.791748046875, 2.4829158782958984, 16.86620330810547, 22.13409423828125, 2.8237991333007812, 14.969192504882812, 39.80572509765625, 5.454517364501953, 22.204017639160156, 17.19611358642578, 15.013046264648438, 0.08258819580078125, 11.947555541992188, 9.82733154296875, 9.143217086791992, -1.7122039794921875, 18.34283447265625, 9.943233489990234, 24.802520751953125, 21.216766357421875, 28.744140625, 10.382457733154297, 38.51256561279297, 20.798660278320312, 20.803646087646484, 12.733497619628906, -10.046089172363281, 15.474014282226562, 27.98260498046875, 4.381378173828125, 21.7371826171875, -8.465011596679688, 24.958049774169922, 9.909431457519531, 7.6292724609375, 28.014366149902344, -9.545074462890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000182.npy"}
{"epoch": 0.5352941176470588, "step": 183, "batch_size": 128, "mean": 14.418588638305664, "std": 15.071099281311035, "min": -15.453132629394531, "p10": -1.9434391021728505, "median": 11.317100524902344, "p90": 36.246160125732416, "max": 56.684906005859375, "pos_frac": 0.8515625, "sample": [21.222976684570312, -15.453132629394531, 5.9051513671875, -5.871532440185547, 2.2974472045898438, 6.920419692993164, 25.099658966064453, 4.717010498046875, 24.427764892578125, 32.64190673828125, 37.170074462890625, 37.60826110839844, 17.26483154296875, -2.6526355743408203, 15.64105224609375, 1.22637939453125, 21.164958953857422, 9.11812973022461, 32.51045227050781, 35.850196838378906, 4.985561370849609, 8.20286750793457, 7.464996337890625, 1.9493255615234375, 4.9111328125, -6.6866302490234375, 13.017593383789062, 30.472267150878906, -1.6394977569580078, 19.51366424560547, 13.152263641357422, 8.170806884765625, 9.83377456665039, -3.8989944458007812, 19.534713745117188, 31.459579467773438, -6.628103256225586, 35.28642272949219, -0.003162384033203125, 0.2764434814453125, 20.016048431396484, 7.1059417724609375, 3.7647247314453125, 18.4232177734375, 45.72819137573242, -4.686943054199219, -3.6133270263671875, -6.575843811035156, 0.60101318359375, 2.8411026000976562, 33.755859375, -0.9454669952392578, 5.946990966796875, 24.686687469482422, 24.360918045043945, 0.2896461486816406, 56.684906005859375, 46.92108917236328, 8.661552429199219, 2.305622100830078, 26.903701782226562, 44.03912353515625, 5.224449157714844, 37.62247848510742, 6.78082275390625, 9.796764373779297, 23.243534088134766, -2.7370071411132812, 1.8037109375, 5.536983489990234, 5.036251068115234, 14.6485595703125, 12.362571716308594, -0.15843582153320312, -1.0095252990722656, 0.46373748779296875, 26.375167846679688, 17.981903076171875, 44.01749038696289, 2.7331085205078125, 7.130626678466797, 11.795967102050781, 24.494998931884766, 8.288841247558594, 44.049468994140625, 30.603296279907227, 23.727081298828125, 21.422027587890625, 22.2166748046875, 41.736541748046875, 27.103225708007812, 20.43700408935547, 11.351478576660156, 18.363906860351562, 23.552223205566406, 13.492286682128906, 14.854011535644531, 42.25328063964844, 4.016420364379883, -8.19964599609375, 22.852256774902344, 42.11898422241211, 8.588123321533203, 25.362083435058594, 25.729324340820312, 55.63011169433594, 25.62366485595703, 5.44158935546875, 13.3800048828125, 6.055217742919922, 14.067903518676758, 2.7779083251953125, 31.38904571533203, 3.4397621154785156, -11.093286514282227, 1.9499988555908203, 21.198226928710938, 6.122962951660156, 17.42730712890625, -12.591175079345703, -0.6927394866943359, 18.558502197265625, 1.6291427612304688, 4.8527679443359375, 4.83184814453125, 23.89814567565918, 11.282722473144531, 9.9375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000183.npy"}
{"epoch": 0.538235294117647, "step": 184, "batch_size": 128, "mean": 16.472187042236328, "std": 13.158830642700195, "min": -7.7605743408203125, "p10": 0.8239740371704104, "median": 15.828636169433594, "p90": 32.470421600341794, "max": 71.7291259765625, "pos_frac": 0.9140625, "sample": [3.458759307861328, 0.375518798828125, 31.704490661621094, 11.52164077758789, 47.038360595703125, 9.138082504272461, 13.577445983886719, 27.45652198791504, 12.894439697265625, 7.7882843017578125, 26.848552703857422, 2.195068359375, 24.301559448242188, 37.897037506103516, 14.446243286132812, 3.9537391662597656, 19.894512176513672, 20.323822021484375, 13.438934326171875, 19.391103744506836, 26.59107208251953, 23.306262969970703, 27.33397102355957, -6.1724853515625, 30.83631134033203, 11.315284729003906, 3.741241455078125, -4.920654296875, 15.947761535644531, 1.5393028259277344, 14.294464111328125, 17.437088012695312, 19.494163513183594, 2.537933349609375, 33.050018310546875, -0.969329833984375, -6.617057800292969, 33.42339324951172, 34.10607147216797, 33.84191131591797, 16.331573486328125, 18.628700256347656, 25.949981689453125, 7.793195724487305, 28.72644805908203, 22.32007598876953, 12.015983581542969, 0.68243408203125, 12.75698471069336, 11.404098510742188, 0.8846340179443359, 18.610916137695312, 17.102874755859375, -7.428672790527344, -7.7605743408203125, 14.463836669921875, 41.266502380371094, 16.897850036621094, 24.563262939453125, 2.91644287109375, 3.709789276123047, -2.531810760498047, 49.4420166015625, 12.65032958984375, 8.86146354675293, 5.158199310302734, 22.55443000793457, 7.030548095703125, 20.62982940673828, 16.541671752929688, -6.500667572021484, -1.3567352294921875, 21.592994689941406, 28.536766052246094, 6.07098388671875, 22.811691284179688, 19.348480224609375, 19.946775436401367, 28.96466064453125, 18.601776123046875, 18.36005401611328, 13.96551513671875, 5.042549133300781, 16.925338745117188, 24.9161376953125, 6.146398544311523, 11.915111541748047, 9.419731140136719, 7.164737701416016, 7.540000915527344, 16.975601196289062, 18.982391357421875, 27.98735809326172, 22.807342529296875, 23.498987197875977, 6.867206573486328, 26.64041519165039, 11.125194549560547, 12.64495849609375, 15.709510803222656, 23.737197875976562, 5.348932266235352, 19.679636001586914, 17.742618560791016, 34.04601287841797, 2.7588958740234375, 23.15709686279297, -2.25640869140625, 8.517801284790039, -3.312265396118164, 32.222023010253906, 14.797836303710938, 12.568082809448242, 48.471412658691406, 5.221931457519531, 13.690032958984375, 22.741193771362305, 19.19927215576172, 71.7291259765625, 8.096710205078125, 28.623947143554688, 50.17442321777344, 36.949913024902344, 9.293121337890625, 14.68072509765625, 14.282501220703125, 18.289291381835938, 15.4039306640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000184.npy"}
{"epoch": 0.5411764705882353, "step": 185, "batch_size": 128, "mean": 11.8035888671875, "std": 12.625618934631348, "min": -10.64276123046875, "p10": -3.4130764007568355, "median": 10.1097412109375, "p90": 29.30776138305664, "max": 61.12513732910156, "pos_frac": 0.8046875, "sample": [13.64190673828125, 6.540494918823242, 21.553604125976562, -3.872386932373047, -1.5267257690429688, 2.9954872131347656, 4.153236389160156, 30.88092803955078, 24.338134765625, 1.8392715454101562, -7.313167572021484, 15.111007690429688, 19.793062210083008, 8.835670471191406, -4.14434814453125, 4.8603057861328125, 31.642322540283203, -9.877395629882812, 17.08575439453125, 21.729461669921875, 19.657150268554688, 19.048736572265625, 21.448776245117188, 9.296396255493164, -0.19615936279296875, 23.389892578125, 7.215791702270508, 21.015762329101562, 22.022186279296875, 4.023292541503906, 6.22332763671875, 8.20684814453125, 12.13198471069336, 30.272430419921875, 17.368019104003906, 1.7880477905273438, 3.6041336059570312, 3.6313018798828125, 20.333343505859375, 26.61309051513672, 33.46949768066406, -9.867218017578125, -1.949737548828125, -2.303955078125, 18.927284240722656, 9.843826293945312, 18.706562042236328, 11.172515869140625, 29.498435974121094, -6.1239471435546875, 29.226043701171875, 7.575428009033203, 26.892230987548828, -3.605804443359375, 23.093917846679688, 5.126251220703125, 14.648765563964844, 4.608615875244141, 31.10602569580078, 22.348220825195312, 1.096466064453125, 1.2429161071777344, 25.19947052001953, 6.673088073730469, 11.838708877563477, 13.783134460449219, -10.64276123046875, -0.135711669921875, 31.012527465820312, -2.1475830078125, 8.29620361328125, 1.3263092041015625, 11.802703857421875, 1.183746337890625, 9.575851440429688, 27.309226989746094, 18.70684051513672, 7.974578857421875, 35.831695556640625, 12.753833770751953, 5.2933197021484375, 25.524555206298828, -7.915683746337891, 23.793399810791016, 8.14019775390625, 24.427337646484375, 7.121711730957031, -0.480255126953125, 21.971603393554688, -1.3684310913085938, 4.67547607421875, -9.378036499023438, -1.8533935546875, 34.4755859375, -1.8030166625976562, 12.443397521972656, 0.41510963439941406, 17.668228149414062, 35.65214538574219, 22.48040771484375, -2.23382568359375, 11.513328552246094, 6.395381927490234, 2.78076171875, 7.464191436767578, 9.335824966430664, 15.3712158203125, -4.64666748046875, 1.553192138671875, 2.708526611328125, 17.629653930664062, 2.730804443359375, 24.184478759765625, 29.56591796875, -4.417873382568359, 11.280799865722656, 17.834075927734375, 4.577430725097656, 10.375656127929688, 17.81188201904297, 11.161155700683594, -9.102840423583984, -3.3304786682128906, 61.12513732910156, 31.575897216796875, 28.60964584350586, 23.867042541503906, 13.420242309570312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000185.npy"}
{"epoch": 0.5441176470588235, "step": 186, "batch_size": 128, "mean": 14.612378120422363, "std": 18.586530685424805, "min": -27.905838012695312, "p10": -5.97638702392578, "median": 12.416435241699219, "p90": 35.558372497558594, "max": 87.02845764160156, "pos_frac": 0.8203125, "sample": [20.407909393310547, 30.757423400878906, 14.280830383300781, -5.313194274902344, -27.905838012695312, 2.0130386352539062, 87.02845764160156, 15.840110778808594, 13.115768432617188, 13.723498344421387, 35.19598388671875, 27.688446044921875, 2.892986297607422, 6.00128173828125, -1.91827392578125, -11.035736083984375, 18.36126708984375, 28.590179443359375, -1.144287109375, -7.775642395019531, 30.3765869140625, 2.7626419067382812, 18.532447814941406, 32.92839813232422, 33.970916748046875, 10.817169189453125, -5.7034912109375, -5.7262420654296875, 27.51458168029785, 30.51220703125, 8.591434478759766, 5.519721984863281, 20.031219482421875, 20.860374450683594, -5.144157409667969, -6.56005859375, 20.818946838378906, 23.012725830078125, -10.486831665039062, 11.877761840820312, -23.822845458984375, 31.186500549316406, 68.3909912109375, 22.709686279296875, 18.42626953125, 9.753833770751953, 62.20014190673828, 12.955108642578125, -21.0899658203125, 6.9323883056640625, 24.39520263671875, 5.076284408569336, 2.958087921142578, 23.16979217529297, 11.511566162109375, 11.25754165649414, 7.652683258056641, 4.113653182983398, -2.96484375, 30.812515258789062, 19.995769500732422, 25.1556396484375, -2.4228286743164062, 5.185529708862305, 17.193511962890625, 41.100982666015625, 63.736846923828125, 21.89666748046875, 2.975250244140625, 19.117815017700195, 6.776485443115234, 11.811731338500977, 3.0133094787597656, 16.967185974121094, 22.855697631835938, -9.270057678222656, 0.278839111328125, 10.77485466003418, 36.40394592285156, 14.09417724609375, 14.577598571777344, -9.79949951171875, 33.85771942138672, -7.40644645690918, 37.27460861206055, 21.92053985595703, 6.9945831298828125, 11.583992004394531, -7.76300048828125, 1.3443756103515625, 21.809310913085938, 31.917339324951172, 19.709491729736328, -1.8708953857421875, 68.01551818847656, -2.501617431640625, 14.197418212890625, 2.5223541259765625, 41.6488037109375, 27.714197158813477, 6.287467956542969, 21.68492889404297, 4.6171112060546875, 5.17926025390625, 38.07658386230469, 16.866607666015625, 9.722282409667969, 13.735027313232422, 3.4995651245117188, 19.14720916748047, 3.9036102294921875, 10.082542419433594, 9.8411865234375, 15.042545318603516, 21.63804054260254, -10.72955322265625, 28.632282257080078, 45.700233459472656, 8.068313598632812, 2.549144744873047, 4.396717071533203, 14.31610107421875, 2.917051315307617, 50.781402587890625, -17.41189193725586, 9.74530029296875, 45.93141174316406, 1.8410110473632812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000186.npy"}
{"epoch": 0.5470588235294118, "step": 187, "batch_size": 128, "mean": 15.730317115783691, "std": 14.705440521240234, "min": -24.223846435546875, "p10": 0.037504577636719244, "median": 14.277506828308105, "p90": 37.021284103393555, "max": 61.06512451171875, "pos_frac": 0.8984375, "sample": [0.1789398193359375, -3.8148269653320312, 25.634765625, 18.204635620117188, 8.091886520385742, 43.773536682128906, 20.34589385986328, 17.689573287963867, 8.691024780273438, 12.543251037597656, 12.882686614990234, 7.108921051025391, 4.555961608886719, 8.912200927734375, 18.661598205566406, -17.316787719726562, 17.358551025390625, 3.157470703125, 39.19671630859375, 2.992715835571289, 20.474689483642578, 48.41590881347656, 5.356163024902344, 19.603363037109375, 22.16576385498047, 20.65631103515625, 5.950958251953125, 19.55792236328125, 11.64044189453125, 18.983278274536133, 8.32077407836914, 26.644527435302734, 13.735908508300781, 14.395994186401367, 16.696395874023438, 17.606313705444336, 11.719024658203125, 4.351066589355469, 28.718551635742188, 36.597198486328125, 2.3389892578125, 4.113500595092773, 22.05186653137207, 27.06313133239746, 1.7645111083984375, 2.5363998413085938, -2.738494873046875, 38.26731872558594, 42.08142852783203, 17.391260147094727, 35.00855255126953, 29.649044036865234, 36.91551208496094, 10.588623046875, 26.446826934814453, 41.87574005126953, 23.673431396484375, 12.295145034790039, 7.3660430908203125, -6.4055328369140625, 24.471107482910156, 24.529541015625, -24.223846435546875, 12.083831787109375, 12.614620208740234, 10.148122787475586, 13.87469482421875, 61.06512451171875, 6.9005889892578125, 32.95338439941406, 0.4217796325683594, 10.664932250976562, 28.444320678710938, 14.435409545898438, 32.22515869140625, 2.997152328491211, 16.520599365234375, 5.264842987060547, 44.63160705566406, 16.597702026367188, 4.624641418457031, 30.222198486328125, -13.49725341796875, 54.50562286376953, 6.786382675170898, 40.805450439453125, 2.02984619140625, 13.3953857421875, 14.588951110839844, 24.885284423828125, 4.9340362548828125, 31.27682113647461, -5.2302703857421875, 27.545677185058594, 37.26808547973633, 14.774311065673828, 11.930648803710938, 6.107568740844727, 1.9861984252929688, 3.5411834716796875, 18.444137573242188, 4.240203857421875, 16.159255981445312, 8.978218078613281, -3.602447509765625, 27.676437377929688, 20.698150634765625, -8.026229858398438, -0.292510986328125, 20.129135131835938, 25.390819549560547, 39.74159240722656, 1.3303604125976562, 11.189933776855469, 7.047861099243164, 9.560663223266602, 12.487091064453125, -5.144767761230469, 10.019668579101562, 14.159019470214844, -10.644794464111328, 23.23424530029297, 39.08253479003906, 14.613445281982422, 24.510330200195312, -4.846885681152344, 17.84601402282715, 27.674964904785156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000187.npy"}
{"epoch": 0.55, "step": 188, "batch_size": 128, "mean": 15.706097602844238, "std": 15.448244094848633, "min": -12.646345138549805, "p10": -2.644432640075682, "median": 14.551597595214844, "p90": 38.35203247070312, "max": 58.04356384277344, "pos_frac": 0.8515625, "sample": [32.37063217163086, 18.5755672454834, 3.5724563598632812, 22.93695068359375, 16.77867889404297, 5.280757904052734, 32.9052734375, 8.35866928100586, 35.023040771484375, 11.439857482910156, 21.630674362182617, 26.38306427001953, -2.222076416015625, 21.632551193237305, -7.489501953125, 22.3814697265625, 16.341583251953125, 13.650215148925781, 1.926361083984375, 15.31899642944336, 40.19829559326172, 16.599533081054688, 18.87948989868164, 15.247100830078125, 47.65385437011719, 27.280624389648438, 39.42576599121094, -12.646345138549805, 0.79248046875, 36.61321258544922, 36.39231872558594, 27.2894287109375, 2.769306182861328, 6.0293426513671875, -2.1399612426757812, 14.389144897460938, 14.557632446289062, 2.7717971801757812, -4.2989654541015625, 26.09259033203125, 14.864883422851562, 12.038612365722656, 58.04356384277344, -3.6299304962158203, 13.032562255859375, 31.001319885253906, 7.7947998046875, 5.541513442993164, 14.261909484863281, 8.660722732543945, -9.397819519042969, -6.692298889160156, 43.44183349609375, 20.690444946289062, 25.679786682128906, -1.8734493255615234, -3.9154815673828125, 39.766197204589844, 16.45165252685547, 1.6851081848144531, -3.7672958374023438, 25.231849670410156, 29.836925506591797, 1.5258502960205078, -2.0735397338867188, 23.346221923828125, 4.6331024169921875, 3.347644805908203, 48.680030822753906, 14.888458251953125, 37.89186096191406, -5.0069732666015625, 26.01996612548828, 10.861465454101562, 11.549087524414062, -11.055309295654297, 9.801773071289062, 0.7729530334472656, -5.6564483642578125, 51.994049072265625, 3.454498291015625, 24.801467895507812, 18.76224136352539, 6.255138397216797, 23.85845184326172, 28.596511840820312, 17.183387756347656, 20.35086441040039, 0.8072700500488281, 5.87921142578125, 7.546775817871094, -6.846057891845703, 44.786521911621094, 6.593902587890625, 2.9950408935546875, 28.667877197265625, 6.0067138671875, 5.294120788574219, -0.498321533203125, 1.323160171508789, 17.766616821289062, -1.1906795501708984, 1.1952095031738281, 29.45263671875, 27.926162719726562, 14.545562744140625, 29.122482299804688, 19.03929901123047, 42.65165710449219, 13.72024917602539, 29.4271240234375, 54.82353973388672, 11.659866333007812, 18.345977783203125, 6.049530029296875, 1.95074462890625, 19.67450714111328, 9.332046508789062, 39.484710693359375, 27.360145568847656, 8.40479850769043, 45.75775146484375, 14.616559982299805, -3.819469451904297, 23.394344329833984, 4.711341857910156, 25.89952850341797, 2.299957275390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000188.npy"}
{"epoch": 0.5529411764705883, "step": 189, "batch_size": 128, "mean": 14.227302551269531, "std": 14.659168243408203, "min": -13.243789672851562, "p10": -4.4091144561767575, "median": 12.981529235839844, "p90": 31.53371353149414, "max": 71.26165771484375, "pos_frac": 0.8203125, "sample": [13.506263732910156, 31.875473022460938, 13.077255249023438, 24.965316772460938, 14.409908294677734, -9.646608352661133, 31.15984344482422, 12.718826293945312, 21.586143493652344, 13.730567932128906, 20.721715927124023, 3.3282947540283203, 5.092826843261719, 34.81256103515625, 0.462188720703125, -5.727935791015625, 9.777252197265625, 17.194122314453125, 1.7951412200927734, 19.26665496826172, 22.07073974609375, 7.827934265136719, 8.138214111328125, 28.24151611328125, 8.105701446533203, 25.585548400878906, -0.7572479248046875, 20.179176330566406, -7.3102569580078125, 4.888795852661133, -0.7823104858398438, 36.32798767089844, 27.516347885131836, 9.931621551513672, 37.7203369140625, 37.695068359375, 15.13343620300293, 36.469390869140625, 2.791341781616211, 22.294647216796875, 9.708953857421875, -3.028045654296875, 25.973594665527344, -2.1892318725585938, 23.97216796875, -6.248908996582031, 20.10470199584961, 11.319599151611328, 21.898618698120117, -4.3385467529296875, -1.780120849609375, 47.595306396484375, -3.595203399658203, 9.935733795166016, 26.563568115234375, -12.673225402832031, 6.547782897949219, 22.330078125, -11.079669952392578, -3.6975841522216797, 12.567672729492188, 13.512039184570312, 18.39429473876953, -4.9784698486328125, 7.122123718261719, 11.137044906616211, 30.14415740966797, 44.40234375, 31.387245178222656, 10.050079345703125, 14.048553466796875, 16.73247528076172, 1.9312744140625, 21.21709442138672, 16.051578521728516, 11.657842636108398, 12.12816047668457, -4.573772430419922, 9.433982849121094, 16.495040893554688, 25.945648193359375, -3.67901611328125, -9.117416381835938, 50.66904830932617, 3.9335784912109375, 71.26165771484375, 12.861007690429688, -0.830841064453125, 27.548728942871094, 10.24789810180664, 13.751077651977539, 12.924888610839844, 11.04058837890625, 12.759513854980469, 23.929977416992188, 10.560897827148438, 19.00920867919922, 5.318855285644531, 16.394948959350586, 26.707672119140625, -7.9269256591796875, 11.659866333007812, 13.038169860839844, -12.854721069335938, 10.628868103027344, 20.047075271606445, 9.217317581176758, 47.17621612548828, 17.654903411865234, 14.322517395019531, 7.133201599121094, 34.40058898925781, 20.43830680847168, 20.220237731933594, 7.033439636230469, 28.368255615234375, 9.676956176757812, -4.694236755371094, 25.00433349609375, 20.01894187927246, 8.987205505371094, 19.0689697265625, 1.48724365234375, 14.315650939941406, 22.09136962890625, -13.243789672851562, 2.319498062133789, 51.91314697265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000189.npy"}
{"epoch": 0.5558823529411765, "step": 190, "batch_size": 128, "mean": 17.549861907958984, "std": 18.404584884643555, "min": -20.359846115112305, "p10": -4.364993667602539, "median": 16.111699104309082, "p90": 37.97101135253906, "max": 83.666259765625, "pos_frac": 0.8203125, "sample": [27.90313720703125, 15.126922607421875, 21.656646728515625, 12.466873168945312, -4.748847961425781, 4.603416442871094, 1.4721527099609375, 27.3760986328125, 9.96722412109375, 43.27992248535156, -0.0348968505859375, 12.371055603027344, 19.031471252441406, 27.429946899414062, 5.725166320800781, 14.792835235595703, 0.3652820587158203, 13.914567947387695, 28.242477416992188, 37.8468017578125, 7.053428649902344, 49.36690902709961, 83.666259765625, 7.701793670654297, 30.64544677734375, 1.107086181640625, -7.013702392578125, 13.12200927734375, 18.101470947265625, 10.2734375, 72.64251708984375, 3.517375946044922, 20.551239013671875, -3.1127281188964844, 30.009552001953125, 31.741844177246094, 10.453201293945312, 19.213851928710938, 22.952686309814453, 13.323844909667969, 20.61029052734375, 13.686653137207031, 13.333686828613281, 8.471616744995117, 5.908794403076172, -0.21099090576171875, 48.55577087402344, 17.366127014160156, 19.534080505371094, 37.71066665649414, 4.715095520019531, -0.09696578979492188, 13.951194763183594, -1.4255752563476562, -4.730751037597656, 13.1036376953125, 1.0703506469726562, 17.00611114501953, 10.979774475097656, 14.650848388671875, 19.198986053466797, 82.40522766113281, -4.208240509033203, -3.6571197509765625, 19.923301696777344, 30.68402862548828, -5.7701263427734375, 23.278640747070312, 17.540376663208008, 4.404376983642578, 9.970142364501953, 23.23957061767578, 15.897552490234375, 26.54443359375, 33.946083068847656, 19.2562255859375, 36.188743591308594, -9.09707260131836, 30.42096710205078, 16.32584571838379, 34.97047424316406, 22.025100708007812, 38.260833740234375, 34.78404235839844, 20.01016616821289, 11.990333557128906, 31.654592514038086, 20.414094924926758, 13.336723327636719, -15.262332916259766, 55.554359436035156, -7.33848762512207, 54.54736328125, -3.0341567993164062, 16.419876098632812, 2.171356201171875, 14.124000549316406, 5.818000793457031, 10.727127075195312, 52.02733612060547, -3.9172821044921875, 43.45625686645508, 46.1068115234375, 4.102531433105469, 11.306528091430664, 0.6031684875488281, 31.11334228515625, -9.502670288085938, 40.22654724121094, 26.816871643066406, 16.513504028320312, 34.25587463378906, 27.53564453125, -4.73101806640625, -20.359846115112305, -8.814552307128906, -16.463088989257812, 18.43721580505371, 26.834545135498047, 30.43897247314453, 31.78070831298828, 24.857290267944336, 26.197715759277344, -5.0050048828125, 17.055145263671875, 13.609817504882812, 37.119361877441406, -1.1811447143554688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000190.npy"}
{"epoch": 0.5588235294117647, "step": 191, "batch_size": 128, "mean": 16.32958984375, "std": 16.290515899658203, "min": -20.793060302734375, "p10": -1.0047069549560523, "median": 14.730823516845703, "p90": 38.19609603881835, "max": 77.8643798828125, "pos_frac": 0.8828125, "sample": [-0.3559608459472656, 0.2240447998046875, 14.732856750488281, 30.539505004882812, 43.69099044799805, 45.5030517578125, 12.34918212890625, 7.0503692626953125, 33.58256530761719, 26.451534271240234, 42.697837829589844, 18.952781677246094, 40.563995361328125, 17.254783630371094, 4.275957107543945, 26.121261596679688, 18.852428436279297, 10.182304382324219, 3.8499908447265625, -0.2794513702392578, 15.13751220703125, 5.2869415283203125, 13.460533142089844, 5.34124755859375, 55.217620849609375, 20.736953735351562, 16.82988739013672, 46.00139617919922, 2.6492080688476562, -5.620319366455078, 0.49304771423339844, -14.966598510742188, 21.369583129882812, 44.182899475097656, 13.964332580566406, 10.442459106445312, 1.9154510498046875, 22.687576293945312, 45.041748046875, -9.600608825683594, -3.505868911743164, 14.728790283203125, 29.867813110351562, 2.459959030151367, -5.212821960449219, 11.729879379272461, 3.262685775756836, 31.636905670166016, 13.052001953125, 24.003982543945312, 26.1005859375, 37.18128204345703, 19.185592651367188, 23.42188262939453, -20.793060302734375, 4.6394195556640625, 2.6922130584716797, 26.84524917602539, 30.176185607910156, 4.202709197998047, 11.744041442871094, -19.60688018798828, 12.974945068359375, 0.8251266479492188, 45.82560729980469, 9.523429870605469, 8.760217666625977, 8.56905746459961, 5.692863464355469, 32.5921630859375, 14.148902893066406, 4.776893615722656, 32.27408981323242, -2.5184478759765625, 22.074264526367188, 17.813974380493164, 13.838363647460938, 18.874828338623047, 8.914016723632812, 18.427692413330078, -2.7471446990966797, 25.868953704833984, 6.481361389160156, -4.36505126953125, 23.901153564453125, 19.418697357177734, 4.82975959777832, 19.018753051757812, 9.110641479492188, 19.049381256103516, 77.8643798828125, 58.65673828125, 34.37879943847656, 16.772817611694336, 8.002105712890625, 16.847991943359375, 33.639495849609375, 15.591316223144531, 22.461631774902344, 33.03034973144531, 21.930023193359375, 19.606098175048828, 9.022933959960938, 50.97441101074219, 34.564552307128906, 3.01959228515625, 18.389772415161133, 15.256805419921875, 2.7153587341308594, 8.274894714355469, 23.177597045898438, 0.5642356872558594, 7.304080963134766, 20.324323654174805, 19.508583068847656, -5.679203033447266, 3.014862060546875, 12.984016418457031, 34.75359344482422, 41.094970703125, 7.047721862792969, 8.488960266113281, 1.4545516967773438, 34.30015563964844, 8.870586395263672, -3.3062286376953125, -8.271713256835938, 14.978286743164062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000191.npy"}
{"epoch": 0.5617647058823529, "step": 192, "batch_size": 128, "mean": 15.130165100097656, "std": 16.01743507385254, "min": -12.752809524536133, "p10": -2.191331672668456, "median": 12.878276824951172, "p90": 34.262004852294915, "max": 78.5748519897461, "pos_frac": 0.859375, "sample": [21.24787139892578, 11.190889358520508, 22.219894409179688, -8.016014099121094, 16.197341918945312, 7.091394424438477, -8.339729309082031, 25.09522247314453, 78.5748519897461, 4.0807647705078125, 23.00785255432129, 12.702644348144531, 25.845352172851562, 14.144157409667969, 4.153776168823242, 46.829681396484375, 1.46295166015625, 15.885269165039062, 9.168342590332031, 20.466819763183594, 15.193534851074219, 13.293621063232422, 31.79694175720215, 38.488731384277344, 3.1924400329589844, 22.544029235839844, 26.375350952148438, 4.108753204345703, 4.747955322265625, 24.559234619140625, 13.480178833007812, 15.154064178466797, 19.759490966796875, -4.09013557434082, 35.26787567138672, 14.055904388427734, 13.655941009521484, -5.115436553955078, -6.11767578125, 8.564741134643555, 10.153732299804688, -10.078765869140625, 51.47087097167969, 4.881536483764648, -5.482421875, 0.9639816284179688, 31.10479736328125, 8.193784713745117, 4.9047698974609375, 5.0244140625, 19.441513061523438, 12.995513916015625, 31.127166748046875, 14.130821228027344, -10.585594177246094, 23.0390625, 33.536865234375, 36.95337677001953, 11.307662963867188, 12.026596069335938, 0.4334869384765625, -1.2163562774658203, 14.543487548828125, 29.823104858398438, 11.541803359985352, 12.667469024658203, 23.94384765625, -2.84783935546875, 1.6145172119140625, -0.48712158203125, -11.793609619140625, 4.24114990234375, 8.392333984375e-05, 3.3193321228027344, 41.76081848144531, 29.345504760742188, 15.990577697753906, -9.075355529785156, 5.181726455688477, 19.330604553222656, 7.152130126953125, 21.795108795166016, 33.83091735839844, 0.15381622314453125, 32.014190673828125, 6.383636474609375, 29.867965698242188, -1.8902587890625, 27.770233154296875, 7.4284210205078125, 28.470550537109375, 11.329360961914062, 24.06598663330078, 72.04684448242188, 14.14947509765625, -0.8454437255859375, 55.19084167480469, 3.01934814453125, 27.51340103149414, 36.202796936035156, 10.43657112121582, 8.251609802246094, 25.61139488220215, 6.433778762817383, 6.398189544677734, 8.203414916992188, 2.9257278442382812, -12.752809524536133, -4.9468994140625, 3.6566829681396484, 51.667137145996094, 8.332069396972656, 13.241386413574219, 12.798149108886719, 9.984428405761719, 13.566253662109375, 13.024574279785156, 12.958404541015625, 41.64140319824219, 17.900161743164062, 12.707290649414062, 27.713699340820312, -1.9099712371826172, 7.70648193359375, 35.575904846191406, 25.64501953125, 8.882354736328125, 30.007522583007812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000192.npy"}
{"epoch": 0.5647058823529412, "step": 193, "batch_size": 128, "mean": 15.086999893188477, "std": 13.267809867858887, "min": -15.931396484375, "p10": -2.530614471435547, "median": 14.669439315795898, "p90": 30.83922424316406, "max": 54.693824768066406, "pos_frac": 0.8671875, "sample": [10.987091064453125, 8.366691589355469, 21.12054443359375, 25.98462677001953, 24.312496185302734, 30.807716369628906, 28.811641693115234, 9.217056274414062, 32.172672271728516, 14.145408630371094, 21.02639389038086, 13.064521789550781, 26.214920043945312, 5.4152069091796875, -4.673225402832031, 24.839200973510742, 1.2442169189453125, 30.912742614746094, 19.618114471435547, 41.98564147949219, -12.704151153564453, 2.9308929443359375, 54.693824768066406, 6.7361907958984375, 28.50769805908203, 5.127342224121094, 11.752899169921875, 2.9850540161132812, 12.028923034667969, -10.294319152832031, 18.38981056213379, 9.152030944824219, 18.182209014892578, 29.240074157714844, 3.7951812744140625, -2.5297698974609375, -0.922271728515625, 47.286041259765625, 10.68368911743164, -3.080718994140625, -3.2376022338867188, 17.929107666015625, 28.78966522216797, 14.780265808105469, 0.4890899658203125, 29.097442626953125, 14.549346923828125, -3.603363037109375, -0.5692138671875, -3.2606887817382812, 9.234832763671875, 8.254219055175781, 15.011846542358398, 23.415573120117188, 11.315444946289062, 3.5979766845703125, 36.26747131347656, 23.173410415649414, 15.386194229125977, 27.979496002197266, 8.315807342529297, 19.08489990234375, -1.6807098388671875, 7.6049652099609375, 10.412324905395508, 14.310680389404297, 5.0711822509765625, 20.778797149658203, 27.14453125, 15.417892456054688, 6.9703369140625, 20.325237274169922, 21.082748413085938, 8.986682891845703, 20.331432342529297, -15.931396484375, 16.8010311126709, 23.27783966064453, 7.579154968261719, -3.5286407470703125, 24.047767639160156, 7.2031097412109375, 28.026123046875, 41.182220458984375, 21.43988800048828, 11.91253662109375, 23.874114990234375, 10.233572006225586, 19.352996826171875, 14.833473205566406, 12.25543212890625, -2.5325851440429688, 2.174579620361328, 14.558612823486328, 14.986799240112305, 16.113601684570312, 21.321311950683594, 23.030227661132812, 27.761978149414062, 42.710662841796875, -5.143768310546875, 0.7008819580078125, 17.021629333496094, 37.822113037109375, 29.24361801147461, 32.573814392089844, 11.369644165039062, 13.967714309692383, 35.074974060058594, 41.247798919677734, 7.650177001953125, 19.8039493560791, 10.777549743652344, 6.491584777832031, 0.0198974609375, 24.01162338256836, 15.59735107421875, -2.715351104736328, 0.9018478393554688, 21.56803321838379, 22.567184448242188, 18.74716567993164, -5.2195892333984375, 0.4546356201171875, 43.28473663330078, 0.5785388946533203, 29.275381088256836, 10.48687744140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000193.npy"}
{"epoch": 0.5676470588235294, "step": 194, "batch_size": 128, "mean": 14.962656021118164, "std": 15.016422271728516, "min": -18.216442108154297, "p10": -1.6802885055541987, "median": 12.32329273223877, "p90": 34.05780334472656, "max": 59.99626159667969, "pos_frac": 0.8671875, "sample": [9.959781646728516, -9.296026229858398, 35.03797912597656, 13.459716796875, 2.778034210205078, 56.47723388671875, 24.142135620117188, 3.5116500854492188, 13.1893310546875, 15.265480041503906, 5.361602783203125, 11.1302490234375, 11.573570251464844, 19.9852294921875, 16.380775451660156, 25.401321411132812, 21.21990966796875, 10.451751708984375, 48.6851806640625, 7.766387939453125, 21.6978759765625, 7.329677581787109, 14.963233947753906, 0.6824836730957031, 26.654678344726562, -10.253997802734375, 5.6171875, 31.16143798828125, 9.043357849121094, 14.056488037109375, -18.216442108154297, 29.808107376098633, 4.778175354003906, 26.239730834960938, -11.505077362060547, 38.95545959472656, -2.026887893676758, 11.783706665039062, 7.464866638183594, 27.56792449951172, 14.379432678222656, 20.435569763183594, 10.199207305908203, 19.946971893310547, -2.7417831420898438, 12.265689849853516, 6.097991943359375, 33.68013000488281, 7.095287322998047, -3.5476417541503906, 18.65349578857422, 6.918067932128906, 4.425956726074219, 25.993209838867188, -8.800827026367188, -4.787748336791992, -4.457115173339844, 0.25048828125, 12.309707641601562, 9.483474731445312, 12.754739761352539, 8.155406951904297, 15.477058410644531, 22.193458557128906, 47.724220275878906, 17.609451293945312, 7.3607635498046875, 59.99626159667969, 55.01808166503906, 42.95464324951172, 24.37449836730957, 3.6737709045410156, -1.093414306640625, 8.40701675415039, 14.056770324707031, 9.21469497680664, 34.93904113769531, 47.44679260253906, 0.32013702392578125, -1.1820144653320312, 1.5253677368164062, 8.104507446289062, 22.24350357055664, 7.9430694580078125, 7.8856658935546875, 15.864124298095703, 39.87964630126953, 11.771728515625, -1.1599578857421875, 30.254886627197266, 28.465755462646484, 57.011566162109375, -15.5445556640625, 29.25103759765625, -1.5317459106445312, 8.798049926757812, 17.16562271118164, 7.229484558105469, 15.427505493164062, 27.4736328125, 10.439781188964844, 12.407733917236328, 3.330036163330078, 7.020915985107422, 23.933197021484375, 25.55859375, 11.78668212890625, 19.878557205200195, 17.93108367919922, 4.413238525390625, -5.6309356689453125, 15.117240905761719, 5.107757568359375, 10.171529769897461, 8.460189819335938, -7.35392951965332, 5.4339599609375, 21.64352798461914, 18.61414337158203, 32.12834548950195, 13.160926818847656, 18.33374786376953, 26.5230712890625, 23.235748291015625, 36.81159973144531, 30.257057189941406, 10.62629508972168, 12.336877822875977], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000194.npy"}
{"epoch": 0.5705882352941176, "step": 195, "batch_size": 128, "mean": 15.640497207641602, "std": 16.430950164794922, "min": -17.454978942871094, "p10": -4.116810417175293, "median": 13.5986967086792, "p90": 37.72742691040039, "max": 80.42291259765625, "pos_frac": 0.859375, "sample": [10.288656234741211, 51.301658630371094, 21.601409912109375, -12.680007934570312, -4.39991569519043, 18.278533935546875, 29.707847595214844, 29.453081130981445, 16.59065055847168, 21.95148468017578, 27.35700798034668, 8.111289978027344, 7.342643737792969, 2.1507415771484375, 38.11211013793945, 1.2857398986816406, 33.00072479248047, 5.521936416625977, 6.157131195068359, 1.8154144287109375, 17.193443298339844, 7.302764892578125, 38.36542510986328, 17.580215454101562, 39.836585998535156, 9.415374755859375, 7.131069183349609, 73.41795349121094, 10.004318237304688, -3.9954795837402344, 33.50201416015625, 37.61250305175781, 10.769088745117188, 9.43072509765625, 25.584243774414062, -5.093482971191406, 24.058364868164062, -5.195648193359375, -1.6314353942871094, 20.925949096679688, 10.579402923583984, 14.076431274414062, 27.500885009765625, 4.582221984863281, 16.4993896484375, 8.328725814819336, 26.53204345703125, -8.933258056640625, 20.757980346679688, 8.56399154663086, 12.607009887695312, 13.36431884765625, 11.147735595703125, 23.448806762695312, 11.537223815917969, 8.740131378173828, 47.30755615234375, 20.832927703857422, 26.2821044921875, 5.543514251708984, 1.19805908203125, -0.7609577178955078, 15.470932006835938, 21.777984619140625, 14.275794982910156, 8.696296691894531, 37.995582580566406, 51.38909912109375, 3.6205902099609375, -0.6797504425048828, 21.676956176757812, 13.572729110717773, 4.187355041503906, 80.42291259765625, 19.625534057617188, 33.991477966308594, 26.35883331298828, 21.471160888671875, 30.092788696289062, 13.624664306640625, 0.1773834228515625, 10.390960693359375, 23.445913314819336, 23.24749755859375, 38.7067985534668, 53.30149841308594, -14.489665985107422, 21.282867431640625, 45.733551025390625, 20.27680206298828, 25.2080078125, 22.179336547851562, 7.987846374511719, 15.484413146972656, 15.602645874023438, -4.904045104980469, 27.33153533935547, 14.347175598144531, 21.739181518554688, 17.310768127441406, 24.090946197509766, 18.19646453857422, -8.170578002929688, 1.1726341247558594, -17.45277976989746, 4.882865905761719, -1.65325927734375, -6.335239410400391, 10.784523010253906, 8.810043334960938, -8.54568862915039, 13.20733642578125, 9.6458740234375, 25.143463134765625, 0.29994964599609375, 21.26584243774414, 2.3230133056640625, 11.369789123535156, 6.5842437744140625, 26.918865203857422, 2.2917633056640625, -7.855567932128906, -17.454978942871094, 41.686798095703125, 2.9840927124023438, 12.797750473022461, 24.41064453125, 9.755111694335938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000195.npy"}
{"epoch": 0.5735294117647058, "step": 196, "batch_size": 128, "mean": 14.60409927368164, "std": 15.522587776184082, "min": -22.59831428527832, "p10": -3.670430755615234, "median": 13.540639877319336, "p90": 35.42823829650879, "max": 61.0093994140625, "pos_frac": 0.8515625, "sample": [6.493885040283203, 4.3692474365234375, 11.633777618408203, 18.137794494628906, 1.9615459442138672, 3.6575927734375, 0.7069091796875, 19.937828063964844, 6.215970993041992, 18.65384864807129, 1.9633255004882812, -7.9110107421875, 14.276775360107422, 39.695594787597656, 0.2458648681640625, 14.087118148803711, 21.65639305114746, 18.887266159057617, -12.892791748046875, 2.9790267944335938, 18.68857192993164, 15.119544982910156, 36.177005767822266, 6.3746795654296875, -12.30377197265625, 6.574617385864258, 6.4576568603515625, -3.798065185546875, -10.482177734375, 24.01180076599121, 42.837257385253906, 29.918479919433594, 7.62127685546875, 16.916046142578125, 4.826732635498047, 26.675735473632812, -11.388397216796875, 0.39908409118652344, 4.4085540771484375, 12.539417266845703, -8.522554397583008, 13.341064453125, 27.717010498046875, 37.77971649169922, 4.8369293212890625, 44.830963134765625, -6.788936614990234, 31.32422637939453, -3.6157302856445312, 0.6751174926757812, 15.212141036987305, 16.73282241821289, 61.0093994140625, -2.963531494140625, 60.83526611328125, 13.61638069152832, 27.482070922851562, 16.893814086914062, 25.059356689453125, 16.9990234375, 27.086563110351562, 32.05638122558594, 0.37226104736328125, 8.045860290527344, 31.888107299804688, 8.871047973632812, 5.01776123046875, 11.258377075195312, 5.869724273681641, 9.462966918945312, 16.933181762695312, -4.3925933837890625, 24.175323486328125, 46.556846618652344, 13.920173645019531, 16.842918395996094, 22.90851593017578, 22.859130859375, 45.45099639892578, 21.383628845214844, 24.494056701660156, 2.999553680419922, 31.9610595703125, 14.4530029296875, 12.481033325195312, 13.19583511352539, 3.962818145751953, 13.464899063110352, 7.427421569824219, -22.59831428527832, 4.282508850097656, 35.107337951660156, -3.075845718383789, -0.852508544921875, 5.600860595703125, 19.58953285217285, 12.950725555419922, 23.573020935058594, 40.85234451293945, 9.9979248046875, 19.383106231689453, 10.603500366210938, 6.452114105224609, 14.627740859985352, 0.38928985595703125, 19.005950927734375, 28.33039093017578, 21.159912109375, 44.638545989990234, 23.6732177734375, 4.956184387207031, 37.254364013671875, 10.755050659179688, 22.94237518310547, -2.7696151733398438, 6.9944305419921875, 25.063045501708984, 32.788818359375, -8.177436828613281, 14.9925537109375, 21.338790893554688, -4.915779113769531, 55.54656982421875, -3.8301620483398438, 4.97650146484375, -0.1086273193359375, 22.372299194335938, 25.66253662109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000196.npy"}
{"epoch": 0.5764705882352941, "step": 197, "batch_size": 128, "mean": 15.062865257263184, "std": 16.960172653198242, "min": -17.89153289794922, "p10": -3.4606197357177733, "median": 12.392358779907227, "p90": 40.57098197937012, "max": 74.88973999023438, "pos_frac": 0.8046875, "sample": [9.065940856933594, 33.684486389160156, 19.147323608398438, 8.142593383789062, 7.907508850097656, 46.059814453125, -13.55364990234375, 42.42793273925781, 66.11946868896484, 50.74827575683594, 17.854461669921875, 7.401313781738281, 7.474822998046875, -1.2332496643066406, 2.804292678833008, 7.0383148193359375, 32.86440658569336, 33.77886962890625, -6.343147277832031, 6.437751770019531, 19.826553344726562, 15.241188049316406, 47.47605895996094, 10.723495483398438, 16.912879943847656, -2.3584976196289062, 21.008865356445312, 8.933349609375, 7.284938812255859, -0.9955081939697266, -3.9623870849609375, 14.942031860351562, 15.132959365844727, 22.770263671875, 17.471817016601562, 30.92544937133789, 8.037038803100586, 32.63745880126953, -1.435150146484375, 12.499961853027344, 0.7026748657226562, 19.530725479125977, 1.8153743743896484, 23.820449829101562, 17.195690155029297, 16.076885223388672, 18.04241943359375, 11.341812133789062, -3.5069732666015625, 10.919343948364258, 15.287891387939453, 8.769943237304688, 12.503463745117188, 28.353530883789062, 42.4363899230957, -6.022968292236328, 10.184928894042969, 2.85198974609375, 1.032562255859375, -1.9273815155029297, -11.668516159057617, 16.630523681640625, 62.55903625488281, -6.643407821655273, 12.28475570678711, 10.350471496582031, -13.497488021850586, 41.095733642578125, 45.291900634765625, -2.1298828125, 3.073375701904297, 2.560911178588867, -3.440753936767578, 40.34608840942383, 23.222076416015625, -0.1428375244140625, 13.329689025878906, 19.655319213867188, 22.784011840820312, 18.204490661621094, 7.722164154052734, 7.910531997680664, 23.0759334564209, 15.712326049804688, 11.000701904296875, 10.436622619628906, 11.6611328125, 34.094146728515625, 16.260234832763672, 8.66329574584961, 11.6800537109375, -2.2092514038085938, 74.88973999023438, 37.8101806640625, 6.610187530517578, 21.09729766845703, 38.72364807128906, 4.714603424072266, 27.191543579101562, -4.961826324462891, -0.2062530517578125, 16.16211700439453, -4.702846527099609, 13.736724853515625, 21.54155731201172, 19.203208923339844, 47.68413543701172, 4.112152099609375, 6.474720001220703, 35.552833557128906, 31.208595275878906, 13.58551025390625, -13.487655639648438, 4.407012939453125, -0.23343849182128906, 6.29022216796875, -17.89153289794922, 4.895050048828125, 45.75261688232422, 43.7158203125, -6.8536834716796875, 14.075889587402344, 3.67633056640625, 21.49372100830078, 20.538650512695312, -0.4183311462402344, 15.742864608764648, 25.73290252685547], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000197.npy"}
{"epoch": 0.5794117647058824, "step": 198, "batch_size": 128, "mean": 16.278064727783203, "std": 16.926231384277344, "min": -13.530336380004883, "p10": -2.9456909179687494, "median": 14.200631141662598, "p90": 34.708514022827146, "max": 73.35286712646484, "pos_frac": 0.8515625, "sample": [13.129043579101562, 11.830940246582031, -2.3711395263671875, 0.1395893096923828, 3.4829940795898438, 34.2992057800293, 13.646163940429688, 24.62468719482422, 44.44409942626953, 33.66088104248047, 23.004440307617188, -0.4378395080566406, 31.386009216308594, -2.7507591247558594, 48.42253112792969, 20.468734741210938, 32.138214111328125, 11.123807907104492, 23.14740753173828, 2.779571533203125, 6.273979187011719, 3.1661834716796875, 63.3912353515625, 28.27703857421875, 16.270549774169922, -2.41595458984375, 6.117403030395508, -10.32080078125, 0.8128204345703125, 19.40947723388672, 31.727676391601562, 26.55384063720703, -13.530336380004883, 6.371337890625, 9.338119506835938, 28.164819717407227, 17.35302734375, 17.40565299987793, 10.149444580078125, 35.66356658935547, -6.665657043457031, -9.33856201171875, 23.21582794189453, 30.383193969726562, -0.5626602172851562, 2.7220916748046875, 30.395679473876953, 42.11529541015625, 18.95269775390625, 15.990936279296875, 7.2763519287109375, 3.846038818359375, -3.400531768798828, 25.024398803710938, 1.400970458984375, 33.677040100097656, -3.7532882690429688, 20.559188842773438, 33.97441864013672, 60.597023010253906, 32.00471878051758, 3.5196056365966797, 15.51776123046875, 14.501993179321289, 17.761695861816406, 20.503189086914062, -7.742279052734375, 37.103904724121094, 12.642040252685547, 45.7165641784668, -5.768638610839844, 17.218137741088867, 32.10206985473633, 8.126228332519531, 4.267444610595703, 7.9146728515625, 7.398796081542969, 48.97318649291992, 65.2957763671875, 8.345584869384766, -7.677177429199219, 6.733161926269531, 3.8205795288085938, 20.191680908203125, 29.327667236328125, 3.2282447814941406, 2.3348846435546875, 52.77558135986328, 4.8326263427734375, 32.38627624511719, 52.467803955078125, 9.061418533325195, 7.119293212890625, -11.059700012207031, 21.73773193359375, 9.898218154907227, 7.981168746948242, 6.56324577331543, 15.565221786499023, 5.883209228515625, 23.59326934814453, 7.746002197265625, 2.656494140625, 6.670339584350586, 13.676155090332031, 22.833877563476562, 16.33684539794922, 19.221893310546875, 9.501922607421875, 22.621543884277344, 0.1640167236328125, 20.048736572265625, 24.576904296875, 13.899269104003906, 4.4509124755859375, 27.679000854492188, 16.404541015625, -9.482532501220703, 26.136962890625, 17.80559539794922, 13.750032424926758, -4.2215728759765625, -1.0097999572753906, -6.5547027587890625, 22.47673988342285, 73.35286712646484, 29.063438415527344, 18.861618041992188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000198.npy"}
{"epoch": 0.5823529411764706, "step": 199, "batch_size": 128, "mean": 17.97695541381836, "std": 18.634923934936523, "min": -18.72003173828125, "p10": -3.589223861694336, "median": 14.386411666870117, "p90": 43.13899612426758, "max": 73.83610534667969, "pos_frac": 0.8828125, "sample": [0.12204360961914062, 60.33445739746094, 1.6364517211914062, 43.393821716308594, 6.375053405761719, 9.174560546875, -3.6771697998046875, 21.32172393798828, 11.282356262207031, 13.394058227539062, 18.314857482910156, 16.608154296875, 9.380264282226562, 48.40027618408203, 19.910545349121094, 15.683551788330078, 26.16461181640625, 32.7271728515625, 33.30558776855469, 21.597915649414062, 3.332073211669922, -3.415863037109375, 12.205795288085938, 20.765634536743164, 10.096454620361328, 14.852203369140625, -6.347023010253906, 38.9141845703125, 9.465087890625, 54.931846618652344, 21.929439544677734, 62.876220703125, 4.722381591796875, 7.37451171875, 36.3525390625, 12.22119140625, 48.612152099609375, 3.4949493408203125, -3.7493515014648438, 2.39892578125, 43.02978515625, -12.225936889648438, 40.624664306640625, 58.902645111083984, -18.72003173828125, 36.98797607421875, 46.07853698730469, 8.725021362304688, 3.2234039306640625, 2.8346309661865234, 6.203886032104492, 4.80133056640625, 30.08747100830078, 4.398284912109375, -6.0191192626953125, 36.65216064453125, 10.7354736328125, -3.551532745361328, 54.748252868652344, 21.72540283203125, 11.077133178710938, 22.127490997314453, 29.372394561767578, 42.52594757080078, 0.09572601318359375, 9.649650573730469, 18.054031372070312, 20.450729370117188, 25.152435302734375, 10.00238037109375, 22.634536743164062, 3.9150314331054688, 11.938369750976562, 32.01310729980469, 31.792434692382812, 12.004022598266602, 17.856536865234375, 32.27007293701172, 14.490299224853516, 10.241451263427734, 13.199905395507812, 10.449920654296875, -9.181159973144531, 4.245765686035156, 27.789718627929688, 7.2529144287109375, 29.66436004638672, 3.9283447265625, 15.828910827636719, 21.304718017578125, 1.4283523559570312, 34.8939208984375, 52.85139465332031, 73.83610534667969, -9.647659301757812, 27.448806762695312, 3.2349624633789062, -7.434349060058594, 17.975852966308594, -5.833488464355469, 19.652664184570312, 14.282524108886719, 14.867439270019531, 70.43695831298828, 65.68919372558594, 21.846359252929688, 14.207916259765625, 0.7376937866210938, 34.33708190917969, 3.88067626953125, -8.8280029296875, 36.179649353027344, 15.942407608032227, 32.49903106689453, 23.713184356689453, -9.700435638427734, 2.4705810546875, 7.61164665222168, 10.046928405761719, 7.3995819091796875, 22.248626708984375, 13.082267761230469, 17.628433227539062, 5.943021774291992, 29.536163330078125, 29.631851196289062, 1.9342575073242188, -12.854400634765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000199.npy"}
{"epoch": 0.5852941176470589, "step": 200, "batch_size": 128, "mean": 19.817646026611328, "std": 16.165380477905273, "min": -22.18853759765625, "p10": 1.3399223327636725, "median": 19.63735294342041, "p90": 39.18050880432128, "max": 59.35315704345703, "pos_frac": 0.90625, "sample": [17.266403198242188, 17.23480987548828, 16.233356475830078, 22.924346923828125, 22.424476623535156, -6.733781814575195, 26.42896270751953, 25.066734313964844, 33.467071533203125, 2.2473411560058594, 7.405317306518555, 0.9731369018554688, 7.23504638671875, 6.321521759033203, 32.27497482299805, 18.02970314025879, 41.82876968383789, 19.315521240234375, 17.269134521484375, -8.014623641967773, 41.97239685058594, 1.4971160888671875, 54.529624938964844, 14.33148193359375, 30.703750610351562, 7.005912780761719, 15.581954956054688, 55.39335632324219, 9.736221313476562, 20.677688598632812, 9.516441345214844, 44.805641174316406, 30.917484283447266, 28.7735595703125, 20.128257751464844, 16.12763214111328, 16.050495147705078, 4.3555450439453125, 43.264488220214844, 6.839385986328125, 49.256866455078125, 27.06640625, 13.578033447265625, 32.128292083740234, -18.817138671875, 31.102270126342773, 35.49702453613281, 33.513511657714844, 10.54901123046875, 48.49365234375, 23.649818420410156, -4.5176849365234375, 2.4891204833984375, 36.597999572753906, 5.540264129638672, 17.932174682617188, 14.480545043945312, 32.775596618652344, 5.91546630859375, 36.03887939453125, 37.55812072753906, 1.5088424682617188, 12.734657287597656, 9.766342163085938, 13.822074890136719, 25.25762939453125, 23.187896728515625, 30.733932495117188, 22.282203674316406, 8.316551208496094, -0.8318328857421875, 38.04553985595703, 50.09611511230469, 9.65713119506836, 30.1968994140625, 20.508350372314453, 32.15276336669922, 30.551177978515625, 16.978477478027344, 21.37310791015625, -6.584808349609375, 20.030372619628906, 19.959184646606445, 15.794143676757812, 8.622684478759766, 53.94398498535156, 20.178070068359375, 23.854522705078125, 6.603118896484375, 51.43885803222656, 18.09844970703125, -22.18853759765625, 2.9991989135742188, 23.55340576171875, 13.873664855957031, 35.63915252685547, 9.724639892578125, 23.330848693847656, 24.086563110351562, 22.690818786621094, 12.246170043945312, 12.663360595703125, -18.891502380371094, 31.897628784179688, -0.02222442626953125, 24.894668579101562, 37.68756866455078, 21.693084716796875, 16.404685974121094, 48.792152404785156, 4.77153205871582, 7.65386962890625, -8.192039489746094, -3.2177162170410156, 32.65879821777344, 59.35315704345703, -12.629941940307617, 32.90129852294922, 32.99259948730469, 24.044191360473633, 4.4276580810546875, 15.749580383300781, 33.02484130859375, 35.82677459716797, 9.436279296875, 26.793548583984375, 18.94207763671875, 10.535736083984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000200.npy"}
{"epoch": 0.5882352941176471, "step": 201, "batch_size": 128, "mean": 18.72084617614746, "std": 17.30936622619629, "min": -28.952194213867188, "p10": -1.3841970443725562, "median": 17.373717308044434, "p90": 39.796549224853514, "max": 73.87248992919922, "pos_frac": 0.8828125, "sample": [-6.243927001953125, 17.87409210205078, 9.209781646728516, 24.67839813232422, 25.493938446044922, 20.092323303222656, -13.668386459350586, -28.952194213867188, 13.581005096435547, 31.155193328857422, -11.622306823730469, 28.106643676757812, -0.2970123291015625, -13.378097534179688, 30.701152801513672, 35.3358154296875, 11.026496887207031, 25.131107330322266, -8.49276351928711, 31.11280059814453, 30.0155029296875, 1.675140380859375, 28.40099334716797, 19.19061279296875, 45.470458984375, 15.276458740234375, 43.015071868896484, 14.038238525390625, 36.12553787231445, 14.010124206542969, 57.661956787109375, 38.055145263671875, -22.267745971679688, -4.687004089355469, 0.7612495422363281, -2.99395751953125, 24.877628326416016, 3.8527870178222656, 4.415740966796875, 7.159099578857422, 7.753620147705078, 47.356788635253906, 56.317779541015625, 20.496109008789062, 73.87248992919922, 1.5254268646240234, 8.107671737670898, 6.956573486328125, 28.112491607666016, 38.49475860595703, 17.013792037963867, -0.6942996978759766, 6.638603210449219, -4.959297180175781, 14.849090576171875, 11.660835266113281, 10.37645149230957, 22.63089942932129, 14.016860961914062, -4.302757263183594, 40.56095886230469, 31.869873046875, 6.281982421875, -12.332450866699219, 51.54839324951172, 12.586143493652344, 9.572319030761719, 43.772216796875, 17.733642578125, 19.82498550415039, 4.319908142089844, 11.2711181640625, 0.7325820922851562, 31.54285430908203, 21.321441650390625, 16.455467224121094, 6.0642242431640625, 8.050628662109375, 37.23817825317383, 2.0096435546875, 20.12343406677246, 10.451587677001953, 22.2845458984375, 30.944225311279297, 38.17134094238281, 23.092086791992188, 12.906867980957031, 31.631080627441406, 5.7461700439453125, 40.19805908203125, 34.04344177246094, 32.870086669921875, 23.779651641845703, 35.99310302734375, 11.796173095703125, 26.456777572631836, 18.47294044494629, 23.779876708984375, 28.72954559326172, 35.004539489746094, 43.35950469970703, 14.47361946105957, 3.9758682250976562, 16.067604064941406, 14.351577758789062, 39.45221710205078, 6.489597320556641, 47.371978759765625, 12.826231002807617, 27.61810302734375, 39.624473571777344, 11.879640579223633, 15.99343490600586, 2.981801986694336, 37.014793395996094, 56.15007019042969, 22.06613540649414, 8.399059295654297, 17.855567932128906, 11.525869369506836, 31.531333923339844, 9.418426513671875, 10.910018920898438, -8.352775573730469, 29.441062927246094, 9.046638488769531, 19.073772430419922, 27.702056884765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000201.npy"}
{"epoch": 0.5911764705882353, "step": 202, "batch_size": 128, "mean": 14.909369468688965, "std": 17.673768997192383, "min": -41.57196044921875, "p10": -5.7571285247802715, "median": 13.361130714416504, "p90": 38.34685516357422, "max": 56.1632080078125, "pos_frac": 0.8203125, "sample": [2.6180953979492188, 10.833526611328125, 2.8329029083251953, 19.50940704345703, 19.408554077148438, 1.521310806274414, 26.657291412353516, 15.105178833007812, 17.780506134033203, 7.439208984375, 30.767696380615234, 21.58771514892578, 23.88068389892578, 11.686508178710938, 32.19891357421875, -3.57476806640625, 54.185707092285156, 17.416873931884766, 12.018600463867188, 19.86237335205078, -1.73236083984375, 32.45933532714844, 12.288681030273438, 42.10700225830078, 5.926525115966797, 38.34760284423828, 19.6715087890625, 19.318870544433594, 27.24798583984375, -9.726051330566406, 10.354598999023438, 8.318695068359375, -2.8429393768310547, 4.806480407714844, 6.896507263183594, 0.8389892578125, -9.218772888183594, 13.624702453613281, -1.2121009826660156, 16.590927124023438, -12.304876327514648, 51.08905029296875, 38.96995544433594, 15.285377502441406, 13.097558975219727, 22.731704711914062, 0.2632427215576172, 17.799652099609375, -27.70966339111328, -3.6714649200439453, 56.1632080078125, -0.9972381591796875, 38.346534729003906, -4.777299880981445, 32.06590270996094, 33.093109130859375, 4.175834655761719, 8.676490783691406, 15.71343994140625, 30.236610412597656, 26.874475479125977, -10.7279052734375, 12.421531677246094, 26.207298278808594, 35.417945861816406, 7.920417785644531, 12.93252944946289, 49.567901611328125, 21.18079376220703, 12.61490249633789, 17.972579956054688, 53.20283889770508, 29.118316650390625, 2.3705596923828125, -10.519998550415039, 10.00234603881836, -41.57196044921875, 10.163738250732422, 3.9568023681640625, 15.362411499023438, 20.532211303710938, 16.564987182617188, 26.341949462890625, 3.889852523803711, -16.545425415039062, 16.698989868164062, 12.977142333984375, 7.819187164306641, 35.39898681640625, -12.354385375976562, 17.247325897216797, 21.469161987304688, 16.287429809570312, 10.600791931152344, 33.232200622558594, 40.27996826171875, 1.5488128662109375, 50.56748962402344, 40.93968200683594, 11.638984680175781, 2.5722808837890625, 37.64544677734375, 12.386917114257812, -16.10919189453125, 43.45466613769531, 17.629987716674805, -22.70970344543457, 31.84680938720703, 26.67675018310547, 5.845008850097656, 2.467945098876953, 4.53611946105957, -6.934055328369141, 11.702951431274414, 14.272758483886719, 37.5299072265625, -8.552928924560547, -0.34915924072265625, 9.112573623657227, 6.835609436035156, 27.84311294555664, -5.2527313232421875, 1.3016090393066406, -1.0016746520996094, 48.35242462158203, 26.779205322265625, 30.01549530029297, 24.848724365234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000202.npy"}
{"epoch": 0.5941176470588235, "step": 203, "batch_size": 128, "mean": 14.576257705688477, "std": 16.507722854614258, "min": -23.325366973876953, "p10": -3.9810647964477535, "median": 11.713708877563477, "p90": 35.86440734863281, "max": 61.12779235839844, "pos_frac": 0.8046875, "sample": [22.247543334960938, 6.9617919921875, 22.008302688598633, 10.074462890625, 12.678993225097656, 7.538909912109375, -3.6901397705078125, -0.9061203002929688, 4.384552001953125, 19.616893768310547, 31.084117889404297, -3.7343101501464844, 55.38243103027344, 6.4747467041015625, 5.91217041015625, 16.947540283203125, 18.84296417236328, 0.8662395477294922, 27.47115707397461, 4.225687026977539, -0.8896636962890625, -4.989715576171875, 5.456172943115234, 10.346214294433594, 13.690948486328125, 18.350433349609375, 22.386764526367188, 6.8771514892578125, -4.161890029907227, 44.41954040527344, 3.25201416015625, -2.9497222900390625, 16.382028579711914, -7.6269683837890625, 9.692422866821289, 35.84968566894531, 50.58863830566406, -23.325366973876953, 8.92753791809082, 31.303512573242188, 14.645530700683594, 6.9141845703125, 22.359703063964844, -1.376220703125, -6.47576904296875, -3.9035682678222656, 13.269485473632812, -1.4911842346191406, 32.060707092285156, 10.3751220703125, 34.122093200683594, 20.141948699951172, 14.185256958007812, 18.69440269470215, 52.45762634277344, 24.16326904296875, 11.067642211914062, 8.24871826171875, 18.1656494140625, 50.82782745361328, 61.12779235839844, -0.7916183471679688, 4.374542236328125, 34.24518585205078, 15.990772247314453, 7.651821136474609, 4.6555328369140625, 24.30476188659668, 17.96393585205078, 5.129615783691406, 10.05164909362793, 13.57305908203125, 11.599544525146484, 33.981903076171875, 28.742542266845703, -9.821451187133789, 35.44133758544922, 34.084835052490234, -4.1621246337890625, -17.76348876953125, 6.8199462890625, -12.469810485839844, -11.51385498046875, 41.75822448730469, 19.857666015625, 10.344673156738281, 18.395267486572266, -0.2558174133300781, 14.002487182617188, 10.716964721679688, 35.89875793457031, 31.95335578918457, 5.091350555419922, -2.1877593994140625, 0.26116943359375, -0.7945938110351562, 48.926048278808594, -8.6859130859375, 23.525726318359375, 3.068096160888672, 53.241912841796875, 27.779464721679688, 36.47264862060547, 3.9627609252929688, 18.567153930664062, 11.951370239257812, 0.4884967803955078, -6.000736236572266, 10.522674560546875, -13.806144714355469, 1.5816192626953125, 18.028717041015625, 21.33517837524414, 27.577423095703125, 22.564481735229492, 2.597566604614258, 6.880424499511719, 11.155204772949219, 27.301177978515625, 18.363985061645508, 10.133777618408203, 12.703582763671875, 48.60603332519531, 23.981220245361328, 40.07456588745117, 28.373199462890625, 11.827873229980469, 7.9832000732421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000203.npy"}
{"epoch": 0.5970588235294118, "step": 204, "batch_size": 128, "mean": 15.058691024780273, "std": 16.896942138671875, "min": -21.5489501953125, "p10": -5.411216163635253, "median": 13.534553527832031, "p90": 31.6003698348999, "max": 64.37370300292969, "pos_frac": 0.8125, "sample": [7.483154296875, 48.823822021484375, -0.2142333984375, -8.018142700195312, -10.640838623046875, 19.32050323486328, 18.0460205078125, -5.503913879394531, 16.160446166992188, 27.046096801757812, 11.743438720703125, 22.32122802734375, 6.322254180908203, 4.5768585205078125, 25.469467163085938, 22.35000991821289, 17.329925537109375, 6.732343673706055, 11.962089538574219, 7.84893798828125, 28.787940979003906, 26.648040771484375, 29.101463317871094, -21.5489501953125, 29.199655532836914, 8.34561538696289, 48.10460662841797, -2.4852066040039062, 6.170764923095703, 31.12212371826172, 5.8994293212890625, 3.8475818634033203, 25.089401245117188, -12.598983764648438, 11.31768798828125, 30.66138458251953, 27.282329559326172, 46.352500915527344, 30.70734405517578, 4.028099060058594, 18.17308807373047, 5.1701507568359375, 26.379976272583008, 13.114151000976562, 31.37615966796875, 28.933616638183594, 5.3347625732421875, 52.469146728515625, 6.008079528808594, 18.338790893554688, 11.141937255859375, 20.558929443359375, 23.75604248046875, 27.025238037109375, -4.280265808105469, 37.329315185546875, 3.878314971923828, 40.094547271728516, 12.128128051757812, 17.52935791015625, 45.53704833984375, 23.02862548828125, 3.940143585205078, 22.871726989746094, 12.518661499023438, 61.90803527832031, 61.04814910888672, 19.03089141845703, -2.417816162109375, 16.474233627319336, 9.484840393066406, 31.354820251464844, -7.798957824707031, 26.172225952148438, 19.124465942382812, 18.719980239868164, 17.808147430419922, 22.439697265625, 12.667160034179688, 31.26003646850586, 10.68853759765625, -5.371488571166992, -11.151798248291016, 24.458587646484375, 12.299705505371094, 23.439308166503906, 18.849395751953125, 42.21605682373047, 0.5107593536376953, -0.6576690673828125, 4.4055328369140625, 8.843948364257812, 0.5655975341796875, -6.7585296630859375, 14.877227783203125, 18.28449249267578, 22.175376892089844, -2.0037002563476562, -0.5931549072265625, 6.813236236572266, -4.4514007568359375, 5.1487579345703125, -15.916900634765625, -14.868278503417969, -13.964958190917969, 9.179435729980469, -4.983135223388672, 8.281005859375, 8.301834106445312, 22.208770751953125, -11.206155776977539, -2.9245529174804688, 23.796836853027344, -18.56390380859375, 3.1094436645507812, 30.96588134765625, 28.117412567138672, 1.5810508728027344, 31.518699645996094, 7.740930557250977, 5.494672775268555, 13.9549560546875, 17.45203399658203, 31.428647994995117, 64.37370300292969, 38.75978088378906, 4.471588134765625, 31.79093360900879], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000204.npy"}
{"epoch": 0.6, "step": 205, "batch_size": 128, "mean": 15.405111312866211, "std": 17.163000106811523, "min": -30.71710205078125, "p10": -5.150159263610839, "median": 12.772102355957031, "p90": 34.02593917846679, "max": 104.86888122558594, "pos_frac": 0.8125, "sample": [3.3170166015625, -13.434638977050781, 28.338146209716797, 17.114797592163086, 24.509489059448242, 28.503616333007812, 5.190093994140625, 26.086029052734375, 3.5759811401367188, 7.6468658447265625, 23.50040054321289, 32.495330810546875, 7.80810546875, -8.525100708007812, 42.024505615234375, 22.244041442871094, 47.19075012207031, -0.8924102783203125, 18.798376083374023, 64.3687744140625, -2.1142921447753906, 19.054397583007812, -1.7242813110351562, 15.929069519042969, 16.867881774902344, 9.949779510498047, -0.1378612518310547, 20.03734016418457, -6.930028915405273, 9.127452850341797, 34.52604675292969, 48.36732482910156, 26.379676818847656, 3.07550048828125, 37.50011444091797, 27.829551696777344, -11.026718139648438, 29.350509643554688, 39.942604064941406, 8.49322509765625, 8.930580139160156, 4.515037536621094, 12.578300476074219, 9.57625961303711, 15.415283203125, 7.055381774902344, 7.523193359375, 45.44380187988281, 20.446670532226562, 7.16253662109375, 11.004470825195312, 7.0653228759765625, 20.711822509765625, 6.705432891845703, 7.2077789306640625, 22.148399353027344, 33.811607360839844, 22.766357421875, 8.695396423339844, 9.112052917480469, 44.924537658691406, 3.9846649169921875, 12.541675567626953, -1.5802040100097656, 40.386436462402344, 9.406827926635742, -5.081804275512695, -1.7635726928710938, 5.4853515625, 16.732357025146484, 22.356842041015625, 37.150299072265625, 23.53790283203125, -5.309654235839844, -6.968015670776367, 11.27203369140625, 30.229778289794922, 104.86888122558594, 12.342605590820312, 26.05899429321289, 31.24626922607422, 21.905067443847656, 13.738838195800781, 4.2279510498046875, -30.71710205078125, -2.8566818237304688, -0.84234619140625, -5.34869384765625, 30.514450073242188, 15.888671875, 25.697357177734375, 18.588768005371094, 23.855091094970703, 39.67991638183594, 26.533790588378906, 28.63599395751953, 25.22407341003418, 4.19529914855957, 9.653539657592773, 12.051267623901367, 8.218528747558594, 23.246963500976562, -5.428125381469727, 7.999187469482422, 12.965904235839844, 17.585800170898438, -6.849845886230469, -8.967113494873047, 19.752037048339844, 27.785003662109375, 28.143798828125, -0.8234462738037109, 10.984420776367188, 29.66823959350586, -3.2265090942382812, 32.28153991699219, 8.590682983398438, -14.195953369140625, 9.29327392578125, 14.14263916015625, 9.702165603637695, 14.13839340209961, -11.448638916015625, 7.306671142578125, 10.457744598388672, 15.582731246948242, 29.232643127441406, 21.034961700439453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000205.npy"}
{"epoch": 0.6029411764705882, "step": 206, "batch_size": 128, "mean": 15.952903747558594, "std": 18.492338180541992, "min": -29.606246948242188, "p10": -4.626716995239257, "median": 11.824614524841309, "p90": 40.765004730224604, "max": 77.74394226074219, "pos_frac": 0.828125, "sample": [22.877487182617188, 21.44277572631836, 13.211843490600586, 2.7306900024414062, -6.085334777832031, -3.5277252197265625, 0.8454360961914062, 5.63934326171875, 9.101083755493164, 33.187110900878906, 36.48497772216797, 22.018386840820312, 61.96116638183594, 31.82178497314453, -1.3722686767578125, -12.362165451049805, 11.287063598632812, 7.637140274047852, 21.705230712890625, 0.34221649169921875, 5.599464416503906, 30.65903091430664, 21.693832397460938, 14.017711639404297, 77.74394226074219, 6.07855224609375, 7.330955505371094, 61.465187072753906, 21.802108764648438, 49.40385437011719, 47.443023681640625, -0.058010101318359375, 26.512283325195312, 23.173583984375, 15.594381332397461, 8.11819839477539, 33.337982177734375, -5.008441925048828, 6.950885772705078, 11.877702713012695, -4.431816101074219, 7.995857238769531, 28.992141723632812, -2.4975128173828125, 14.592041015625, 42.982460021972656, 6.107994079589844, 26.593055725097656, 13.47519302368164, 37.099609375, 14.143409729003906, 12.136651992797852, 25.74124526977539, 3.2238388061523438, 19.5894775390625, -18.63671875, 2.728546142578125, 36.626930236816406, 5.708484649658203, -0.07898139953613281, 24.30229949951172, 2.953584671020508, 15.930435180664062, -4.490673065185547, -6.267877578735352, 33.02580642700195, 29.956928253173828, 15.243026733398438, 21.15363311767578, 17.525634765625, 16.69951629638672, 4.532827377319336, 25.175453186035156, -0.603057861328125, 5.5079498291015625, 12.100425720214844, 9.533369064331055, 2.5727615356445312, 35.34930419921875, 8.037250518798828, 12.736198425292969, 9.95391845703125, 26.6617431640625, 15.933822631835938, 0.9711093902587891, 9.862754821777344, -5.9845733642578125, 48.374298095703125, -29.606246948242188, 17.07202911376953, 46.942718505859375, 1.5050487518310547, -8.659236907958984, -4.94415283203125, 48.01202392578125, 49.7435302734375, 4.81732177734375, 55.164642333984375, 7.070409774780273, 7.664283752441406, 6.845634460449219, 24.07054901123047, 6.979887008666992, 36.99433898925781, 11.771526336669922, 3.093219757080078, 9.430007934570312, 9.447731018066406, -5.7597808837890625, 6.056816101074219, 3.6252307891845703, 5.713825225830078, 18.250869750976562, -7.460784912109375, 68.88392639160156, 27.047332763671875, 39.814666748046875, 8.713863372802734, 6.449699401855469, 31.629486083984375, -8.251344680786133, 26.41400146484375, 36.90540313720703, -9.998502731323242, 8.913787841796875, -0.715667724609375, 51.649932861328125, 29.1512451171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000206.npy"}
{"epoch": 0.6058823529411764, "step": 207, "batch_size": 128, "mean": 17.60502815246582, "std": 15.858807563781738, "min": -45.29681396484375, "p10": 0.03627243041992234, "median": 16.36928081512451, "p90": 37.94588088989257, "max": 65.71854400634766, "pos_frac": 0.8984375, "sample": [4.7484130859375, -2.6880569458007812, 14.166940689086914, 8.77340316772461, 31.10126495361328, -7.332099914550781, 8.355728149414062, 12.444944381713867, 15.967575073242188, -7.356380462646484, 10.4481201171875, 40.269569396972656, 0.6860885620117188, 24.699729919433594, 15.5579833984375, -45.29681396484375, 18.573440551757812, 19.328994750976562, 19.413055419921875, 29.643035888671875, 37.36931610107422, -4.79461669921875, 27.666404724121094, 31.982666015625, -0.7570037841796875, 21.374038696289062, 9.686138153076172, 15.272750854492188, 41.052490234375, 24.838897705078125, 2.3903236389160156, 11.133235931396484, 5.725563049316406, 0.36923980712890625, 36.97655487060547, 44.181968688964844, 20.947906494140625, 34.73992919921875, 6.468559265136719, -0.26706695556640625, 20.735504150390625, 16.960912704467773, 9.94677734375, 14.660652160644531, 19.872314453125, 24.069671630859375, 23.631507873535156, 18.45157814025879, 0.1662750244140625, 22.20488739013672, 14.425544738769531, 41.39405822753906, 25.073623657226562, 20.283233642578125, 31.153793334960938, -6.796272277832031, 3.467742919921875, 36.65364074707031, 8.743431091308594, 12.9503173828125, 7.943504333496094, 29.96477508544922, 8.691886901855469, 4.987998962402344, 30.00602912902832, 12.631902694702148, 18.4593505859375, -4.2764434814453125, 14.715957641601562, 34.06697082519531, 13.28165054321289, -6.520072937011719, 29.290401458740234, 20.38214111328125, 10.59437370300293, 1.4861526489257812, 16.888092041015625, 40.075172424316406, 16.3459415435791, 1.4149608612060547, -3.4614028930664062, 8.261421203613281, 39.29119873046875, 9.266807556152344, 13.868972778320312, 13.330394744873047, -14.297576904296875, 14.152841567993164, 3.644773483276367, 10.906486511230469, 30.545501708984375, 14.991096496582031, 23.18682098388672, 21.361366271972656, 4.635091781616211, 19.081159591674805, 2.629791259765625, 2.0767822265625, 20.40296173095703, 26.764419555664062, 22.99740219116211, 36.011207580566406, 6.951946258544922, 16.3837890625, 21.309707641601562, 21.561134338378906, 19.23590087890625, 16.29995346069336, 21.19451904296875, 26.738922119140625, 51.82835388183594, 13.638252258300781, 15.169754028320312, 30.956886291503906, 44.017555236816406, 9.03485107421875, 2.2938079833984375, 65.71854400634766, 24.825485229492188, 21.612409591674805, 49.185943603515625, -7.159820556640625, 48.048362731933594, 50.114013671875, 60.32255554199219, 17.48379898071289, 16.354772567749023, 34.3326416015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000207.npy"}
{"epoch": 0.6088235294117647, "step": 208, "batch_size": 128, "mean": 16.97857666015625, "std": 17.481979370117188, "min": -26.641021728515625, "p10": -3.441025924682617, "median": 16.33857822418213, "p90": 40.872216796875, "max": 73.91999816894531, "pos_frac": 0.8359375, "sample": [25.5299072265625, 6.5639801025390625, 44.05634307861328, 50.92594909667969, -0.170501708984375, 12.651504516601562, 49.58677673339844, 18.926488876342773, 21.169593811035156, 8.908035278320312, 16.599319458007812, 2.501140594482422, 4.218271255493164, 31.67095947265625, 5.67010498046875, 27.01275634765625, 14.377494812011719, -7.900787353515625, 46.7642822265625, -2.2797985076904297, -4.271320343017578, 21.8877010345459, 18.960433959960938, 28.985687255859375, 32.282875061035156, 18.43548583984375, -7.145477294921875, 50.21476745605469, 3.889911651611328, 14.612442016601562, -2.5116195678710938, -0.5522060394287109, 15.277759552001953, 29.69043731689453, 17.286808013916016, 21.865936279296875, 33.466514587402344, 37.45826721191406, 18.853622436523438, 16.453271865844727, 4.525077819824219, 36.67397689819336, 41.225250244140625, -13.38827133178711, 20.0399169921875, 23.863922119140625, 6.7807159423828125, 9.7694091796875, 3.946380615234375, 18.996803283691406, 3.4326400756835938, -15.913894653320312, 15.240243911743164, 20.601516723632812, 2.8280506134033203, 17.720947265625, -3.678924560546875, 7.523651123046875, 56.94538116455078, 7.507568359375, 50.083099365234375, 7.549337387084961, -5.62591552734375, 22.3403263092041, 21.20355224609375, 22.033546447753906, 14.89801025390625, 19.936786651611328, -3.339069366455078, 48.245670318603516, 20.36981964111328, -4.9911956787109375, 4.68548583984375, -6.625617980957031, 23.245521545410156, 0.2934837341308594, 23.83777618408203, 30.910293579101562, 3.709716796875, 13.485511779785156, 73.91999816894531, 5.497138977050781, -2.7325210571289062, -2.671764373779297, 31.00393295288086, 39.40635299682617, 13.663803100585938, 16.22388458251953, 3.6663646697998047, 10.202117919921875, -14.953895568847656, 29.288299560546875, 3.8910751342773438, 47.87315368652344, 0.8760509490966797, -26.641021728515625, 15.312477111816406, 40.720916748046875, 24.592178344726562, 6.889106750488281, 15.793607711791992, 23.31466293334961, 26.03997802734375, 25.87125015258789, 33.01301574707031, 30.848377227783203, 22.659259796142578, 62.70751953125, -6.615692138671875, 30.105178833007812, 0.26021766662597656, 18.763870239257812, 13.471153259277344, 35.5274658203125, 20.700714111328125, 12.079856872558594, 8.566078186035156, 10.300342559814453, 11.160381317138672, 25.383819580078125, 32.60149383544922, 4.0940093994140625, -2.1084938049316406, -5.1407623291015625, 0.03834724426269531, 20.927345275878906, 49.44042205810547, 24.616905212402344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000208.npy"}
{"epoch": 0.611764705882353, "step": 209, "batch_size": 128, "mean": 19.262203216552734, "std": 17.448999404907227, "min": -15.684944152832031, "p10": -1.0320518493652335, "median": 19.299732208251953, "p90": 40.75835037231444, "max": 81.04640197753906, "pos_frac": 0.875, "sample": [29.809303283691406, 19.087371826171875, 0.6683483123779297, 5.3839569091796875, 1.834646224975586, 38.724281311035156, 15.63690185546875, 10.595975875854492, -9.724178314208984, 19.249671936035156, 39.45501708984375, -1.544464111328125, 18.47827911376953, 15.66632080078125, 32.32971954345703, 24.4332275390625, -0.8124465942382812, -6.4551849365234375, 19.5625, 27.988441467285156, 19.276473999023438, 3.7542495727539062, 28.388343811035156, 6.661975860595703, 28.184768676757812, 21.88338279724121, 31.245121002197266, -3.3695545196533203, 46.39775085449219, 20.083839416503906, 6.049072265625, 6.776405334472656, 17.520105361938477, 29.18578338623047, 15.991718292236328, 23.16175079345703, 19.20074462890625, -5.3877105712890625, 20.553329467773438, 21.831172943115234, 37.18983459472656, 44.192596435546875, 2.1564369201660156, 8.908905029296875, 4.011726379394531, 19.32299041748047, 14.697311401367188, 0.9246749877929688, -3.708660125732422, 27.553707122802734, 18.131153106689453, 27.141197204589844, 3.3947067260742188, 30.818267822265625, 44.18694305419922, 7.118188858032227, 12.884441375732422, 24.189231872558594, 30.430587768554688, 51.790077209472656, 7.220672607421875, 4.06463623046875, 29.807632446289062, 10.120864868164062, 34.676475524902344, -0.5690155029296875, 30.69867706298828, 0.6505393981933594, 10.150684356689453, 46.765594482421875, -0.142333984375, 7.443096160888672, 81.04640197753906, -7.8999786376953125, 7.2474822998046875, 3.104694366455078, 1.4874191284179688, 71.37516021728516, 22.223548889160156, 1.8741302490234375, 19.274269104003906, 39.063865661621094, 10.528533935546875, 15.44563102722168, -8.651481628417969, 20.452857971191406, 14.3477783203125, 34.09638977050781, -7.403961181640625, -8.301239013671875, 22.692474365234375, 23.447172164916992, 31.497238159179688, -5.254051208496094, 37.88482666015625, 0.21198272705078125, 30.05699920654297, 24.946022033691406, 25.155853271484375, -15.684944152832031, 27.85540771484375, 21.077177047729492, 28.57049560546875, 27.46319580078125, 5.47039794921875, 20.035301208496094, 25.345970153808594, 46.57889938354492, 20.94287109375, 10.161376953125, 50.25343322753906, 14.690582275390625, 74.6729736328125, -1.8375930786132812, 24.71588897705078, 46.46437072753906, 24.144023895263672, 25.904197692871094, 7.41278076171875, 24.70482063293457, 4.503761291503906, 49.14405822753906, 6.099018096923828, 12.429924011230469, 37.09239196777344, 43.799461364746094, 27.592056274414062, 32.031341552734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000209.npy"}
{"epoch": 0.6147058823529412, "step": 210, "batch_size": 128, "mean": 17.883188247680664, "std": 15.040006637573242, "min": -12.144031524658203, "p10": -0.015636825561523286, "median": 17.32999038696289, "p90": 37.97849655151367, "max": 62.34299850463867, "pos_frac": 0.8984375, "sample": [32.740684509277344, -12.144031524658203, 9.748916625976562, 25.303321838378906, 17.77264976501465, 19.914283752441406, -6.682233810424805, -5.003576278686523, 11.116767883300781, 8.29426383972168, 24.771942138671875, 19.809782028198242, 25.34870147705078, 40.49322509765625, 0.41341400146484375, 15.8648681640625, 27.872970581054688, 15.41781997680664, -1.9969940185546875, 9.946929931640625, 9.702552795410156, 31.39453125, 10.81342887878418, 1.955535888671875, 59.935096740722656, 31.852127075195312, 30.272483825683594, 11.70294189453125, 3.8092727661132812, 62.34299850463867, 32.82096481323242, 38.0260009765625, 0.09381103515625, 44.17102813720703, 20.687271118164062, 7.0536651611328125, 26.248489379882812, 20.531631469726562, 25.610898971557617, 15.408050537109375, 15.171371459960938, -6.4539337158203125, 21.304019927978516, 3.4193496704101562, 24.47381591796875, 41.06648254394531, 40.74909973144531, 8.055747985839844, 26.493385314941406, 16.919403076171875, 27.2647705078125, 33.95649719238281, 4.407419204711914, 4.614219665527344, 0.027400970458984375, 27.429168701171875, 29.999813079833984, 19.82738494873047, 6.4689483642578125, 34.92002868652344, 9.369026184082031, 15.118240356445312, 1.4645843505859375, 15.257904052734375, 23.253692626953125, 7.1726226806640625, 18.816753387451172, 19.09292221069336, 25.816787719726562, -4.364799499511719, 16.3046875, 6.2786102294921875, 18.570079803466797, 16.255859375, 11.668338775634766, 5.534183502197266, 3.70172119140625, 21.134666442871094, -8.217483520507812, 44.69915008544922, 28.900375366210938, -3.465831756591797, 17.812698364257812, 37.95813751220703, -2.308990478515625, 13.793176651000977, 17.740577697753906, 57.73967742919922, 20.287973403930664, 16.725852966308594, 4.094839096069336, -0.8793792724609375, 7.948783874511719, 31.884288787841797, -7.555816650390625, 41.60755157470703, 25.598731994628906, 27.209381103515625, 1.3503837585449219, 12.007026672363281, 6.957218170166016, 2.0330657958984375, -0.116058349609375, 9.854110717773438, 8.882232666015625, 34.352439880371094, 36.128273010253906, 33.62187957763672, 8.428062438964844, 12.319892883300781, 22.579940795898438, 21.571189880371094, 4.80925178527832, 28.1807861328125, 41.97599792480469, 2.386371612548828, 31.128570556640625, 10.58991813659668, 50.51938247680664, 26.667030334472656, -1.25714111328125, 3.869873046875, 20.35919189453125, 25.202285766601562, 41.71723937988281, 19.363555908203125, 1.0268173217773438, 26.968917846679688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000210.npy"}
{"epoch": 0.6176470588235294, "step": 211, "batch_size": 128, "mean": 16.736446380615234, "std": 18.449857711791992, "min": -27.001708984375, "p10": -6.374881172180176, "median": 14.625556945800781, "p90": 40.54603652954101, "max": 69.55260467529297, "pos_frac": 0.8515625, "sample": [17.154069900512695, 14.039566040039062, 19.357749938964844, -9.335502624511719, 7.775951385498047, 9.62319564819336, 59.57466506958008, 1.8318710327148438, 41.408721923828125, -14.33514404296875, 21.103178024291992, 22.966720581054688, 50.53221130371094, 41.90495300292969, 39.53533935546875, 6.68391227722168, 51.899871826171875, 40.27019500732422, 10.92281723022461, -6.428913116455078, 26.743825912475586, 13.013862609863281, -21.442672729492188, 65.14173889160156, 10.286209106445312, 26.736419677734375, 7.430366516113281, 15.8731689453125, 33.745887756347656, -27.001708984375, -2.6883201599121094, 8.01959228515625, 2.745912551879883, 9.508123397827148, 27.376419067382812, 8.861743927001953, 27.362625122070312, 57.05650329589844, 21.073936462402344, 18.728652954101562, 1.8393478393554688, 7.453727722167969, 7.434600830078125, 9.548511505126953, 18.16436004638672, 27.71417236328125, 22.526580810546875, 17.549339294433594, 11.479644775390625, -6.351724624633789, 39.08473205566406, 37.91339111328125, 19.182113647460938, 12.372222900390625, 18.134450912475586, 0.5977668762207031, 16.25244903564453, -5.3886871337890625, -11.685028076171875, 14.29736328125, -2.244020462036133, -7.033784866333008, 14.953750610351562, 13.877754211425781, 19.82476043701172, 39.436737060546875, 53.94214630126953, 34.650718688964844, 2.2752227783203125, 9.46466064453125, -8.23602294921875, 49.31804656982422, 4.675762176513672, -16.095596313476562, 13.96994400024414, 44.85704040527344, 1.3710899353027344, 41.189666748046875, 20.01537322998047, -0.8476905822753906, 22.574905395507812, 7.994651794433594, 1.2792472839355469, 13.29206657409668, 52.28373718261719, 3.1241493225097656, 21.417465209960938, 0.26430511474609375, 16.714202880859375, 13.753189086914062, 29.310237884521484, 14.091705322265625, -3.122161865234375, 20.189403533935547, 1.68414306640625, 33.752044677734375, 1.8706378936767578, 69.55260467529297, 12.920982360839844, 23.708309173583984, 9.911834716796875, 3.0037078857421875, 7.928703308105469, 20.06182861328125, 27.64183807373047, 30.515975952148438, 7.933383941650391, 15.482147216796875, 11.911064147949219, 22.822555541992188, 36.100341796875, 39.26341247558594, 7.35552978515625, 26.611515045166016, 19.21637725830078, -15.798492431640625, -10.183265686035156, 34.84095764160156, 27.3978271484375, 34.21235656738281, 18.193374633789062, -8.658370971679688, 14.965629577636719, 33.718605041503906, -14.723579406738281, 3.3428115844726562, 36.50100326538086, 3.497600555419922], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000211.npy"}
{"epoch": 0.6205882352941177, "step": 212, "batch_size": 128, "mean": 17.442520141601562, "std": 19.16008758544922, "min": -26.922517776489258, "p10": -2.5758962631225586, "median": 15.042415618896484, "p90": 39.829869461059566, "max": 79.02936553955078, "pos_frac": 0.8515625, "sample": [0.26490020751953125, 15.21636962890625, -2.1904449462890625, 1.545745849609375, -11.019889831542969, -14.864734649658203, 34.77201461791992, 2.1220149993896484, 44.24549865722656, 65.73927307128906, 50.75263214111328, 27.873924255371094, 3.8014068603515625, -0.5955352783203125, 25.28875732421875, 23.33191680908203, 7.173797607421875, 42.055816650390625, 66.8630142211914, 15.561668395996094, 13.11733627319336, 34.809471130371094, -2.5238285064697266, 10.998077392578125, 35.73382568359375, 4.753875732421875, -19.54131317138672, 9.370292663574219, 31.83257293701172, 27.388099670410156, 25.11164093017578, 26.602325439453125, 27.87897300720215, 38.23657989501953, 60.38789367675781, 4.052179336547852, 27.026763916015625, 13.187578201293945, 25.105222702026367, -4.639739990234375, 29.955001831054688, 3.834270477294922, 23.799528121948242, 1.8420181274414062, -26.922517776489258, 5.418731689453125, 22.083011627197266, 8.43875503540039, 28.098419189453125, 22.70236587524414, 6.5453338623046875, 4.538066864013672, 68.79536437988281, 10.692039489746094, 10.224906921386719, 5.121297836303711, -2.3857040405273438, 31.02011489868164, 7.466835021972656, 4.72906494140625, 25.716445922851562, -23.3548583984375, 11.812980651855469, 39.46775817871094, 0.06855392456054688, 46.89019775390625, 14.868461608886719, 1.8853950500488281, 6.197925567626953, 26.79737091064453, 17.590316772460938, 39.707679748535156, 24.71654510498047, 23.771530151367188, 17.47130012512207, 20.636199951171875, 21.723114013671875, -21.21636962890625, 30.521156311035156, 8.522758483886719, 4.142364501953125, 18.566478729248047, 23.385391235351562, 15.997589111328125, 35.178123474121094, -11.548637390136719, 12.768817901611328, 12.616901397705078, 2.4084320068359375, 64.09266662597656, 18.732200622558594, -5.719886779785156, 20.662368774414062, 32.20111083984375, 32.822021484375, 12.005352020263672, 8.170328140258789, -2.6973876953125, 19.993576049804688, 6.976224899291992, 27.623865127563477, -5.534637451171875, 38.41594696044922, 47.35569763183594, 32.63310241699219, 6.596691131591797, -1.3413352966308594, 11.37509536743164, -6.207176208496094, 7.563941955566406, 9.593843460083008, 79.02936553955078, 4.860044479370117, 33.60292053222656, -8.87335205078125, 2.53338623046875, 24.241436004638672, 13.609992980957031, 11.125816345214844, -2.1913375854492188, 25.11285400390625, 7.787879943847656, 13.040477752685547, 44.93540954589844, 25.55203628540039, 40.1149787902832, 22.105499267578125, 20.6085205078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000212.npy"}
{"epoch": 0.6235294117647059, "step": 213, "batch_size": 128, "mean": 16.2468204498291, "std": 16.438310623168945, "min": -35.91162109375, "p10": -2.2777544021606437, "median": 14.073867797851562, "p90": 38.87638168334961, "max": 62.017333984375, "pos_frac": 0.8515625, "sample": [-0.9932136535644531, -0.712010383605957, 13.192115783691406, -16.14087677001953, -10.733718872070312, 23.87331771850586, 7.657867431640625, 32.64038848876953, 2.9510498046875, 16.566452026367188, 32.304386138916016, 1.0461807250976562, 15.159744262695312, 15.164596557617188, -1.2555179595947266, 25.768577575683594, 38.75865173339844, 10.309261322021484, 45.744911193847656, 24.17926025390625, 34.62699890136719, 9.742095947265625, 10.74338150024414, 33.204254150390625, 11.309989929199219, 53.16579055786133, 17.361167907714844, 30.911033630371094, 9.180450439453125, -2.090911865234375, 10.8055419921875, 5.568206787109375, 21.941680908203125, 17.55279541015625, 5.504852294921875, 24.5107421875, -3.4512176513671875, 2.253002166748047, 10.311899185180664, 0.248748779296875, 27.485397338867188, -7.575084686279297, 18.652198791503906, -7.601676940917969, 40.204437255859375, 54.94763946533203, 6.39068603515625, 42.81382751464844, -2.7137203216552734, 51.254302978515625, 13.045761108398438, -3.954570770263672, 39.151084899902344, 28.781333923339844, 5.386497497558594, 23.18827247619629, 34.78425598144531, 40.393463134765625, -8.920341491699219, 28.043025970458984, 19.973224639892578, -15.712074279785156, 7.3081207275390625, 19.71095085144043, 29.32990264892578, 4.798225402832031, 3.5795440673828125, 13.192092895507812, 9.907302856445312, 26.948829650878906, 3.5440826416015625, 12.743074417114258, 13.288454055786133, 16.787979125976562, 21.687589645385742, 8.459884643554688, 9.94437026977539, 35.94403076171875, -0.2222900390625, 16.287132263183594, 12.252079010009766, 13.726150512695312, 21.79339599609375, 37.25616455078125, 17.964462280273438, -8.992012023925781, 9.700469970703125, 40.13105773925781, 0.1397705078125, 10.162019729614258, 20.443389892578125, 25.16686248779297, 20.77186393737793, 31.649627685546875, 8.325141906738281, 26.395729064941406, 62.017333984375, 20.72956085205078, 14.421585083007812, -0.49681854248046875, 10.009761810302734, 5.23790168762207, 29.952163696289062, 6.929187774658203, 6.5994720458984375, 1.0179576873779297, 12.932050704956055, -12.996345520019531, 47.879234313964844, 27.531593322753906, 19.099308013916016, 41.26042938232422, 18.338363647460938, 10.649002075195312, 41.67596435546875, 25.100109100341797, 19.159255981445312, 0.8512859344482422, 38.20513916015625, 33.39341735839844, -4.123943328857422, 17.998672485351562, 32.85234069824219, -35.91162109375, 4.390998840332031, 9.123870849609375, 4.7226715087890625, 24.017841339111328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000213.npy"}
{"epoch": 0.6264705882352941, "step": 214, "batch_size": 128, "mean": 16.57670021057129, "std": 17.74347686767578, "min": -20.07465362548828, "p10": -0.9877403259277341, "median": 13.786479949951172, "p90": 38.82320098876953, "max": 85.96511840820312, "pos_frac": 0.8671875, "sample": [3.2529144287109375, 4.567695617675781, 19.373672485351562, 31.740711212158203, 36.144866943359375, 21.99152374267578, 34.35231018066406, -7.1142120361328125, 46.17265319824219, 56.285919189453125, -2.9781341552734375, -16.508468627929688, 3.2440719604492188, 3.0388946533203125, 5.0082550048828125, 3.34613037109375, 13.055160522460938, 6.857013702392578, 5.809455871582031, 28.72435760498047, 2.0282974243164062, 5.773347854614258, 27.060348510742188, -5.459377288818359, 15.519783020019531, 0.12391281127929688, 17.63703155517578, 18.68609046936035, 6.107025146484375, 14.9205322265625, 19.364540100097656, 2.891632080078125, 15.622779846191406, 32.888580322265625, 13.657867431640625, 8.720130920410156, -8.480632781982422, 5.078834533691406, 20.048171997070312, 10.229000091552734, 15.663337707519531, 12.8763427734375, 12.132865905761719, 56.32417297363281, 15.648941040039062, 3.4765281677246094, 13.579986572265625, 38.9580078125, 43.70643615722656, 7.332963943481445, 11.464668273925781, 18.68267822265625, -9.256568908691406, 25.231300354003906, 21.869110107421875, 15.728805541992188, 1.7750167846679688, 23.563003540039062, 18.570510864257812, -2.9680118560791016, 16.274459838867188, 5.3520355224609375, 10.297866821289062, 0.27635955810546875, -3.8430519104003906, -20.07465362548828, 8.283214569091797, 14.886810302734375, 85.96511840820312, 9.720510482788086, 15.792621612548828, 36.6380615234375, -19.626052856445312, 15.764564514160156, -0.895599365234375, 30.025146484375, 23.09510040283203, 13.83599853515625, 0.5555095672607422, 9.040512084960938, 38.76542663574219, 19.52534294128418, 35.52617645263672, 0.7537899017333984, 3.0977783203125, 41.37540817260742, 5.368770599365234, -0.4327259063720703, 38.6551513671875, 21.27044677734375, 8.565000534057617, 20.017263412475586, 39.97465515136719, -1.2027359008789062, 47.140525817871094, 10.9156494140625, 22.637527465820312, 65.59257507324219, 9.71478271484375, 4.829170227050781, 35.45851516723633, 14.037673950195312, 55.85652160644531, -5.0673370361328125, 23.090362548828125, 47.005218505859375, 13.736961364746094, 33.60760498046875, 9.454086303710938, 29.870807647705078, 6.563030242919922, 9.792388916015625, 6.843196868896484, 36.408599853515625, 10.917173385620117, 16.044692993164062, -2.9253463745117188, 20.90671157836914, 7.05523681640625, -0.02690887451171875, 29.92737579345703, 72.25375366210938, 27.146087646484375, 10.963691711425781, 29.07708740234375, -0.5216217041015625, 16.03106689453125, 21.713699340820312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000214.npy"}
{"epoch": 0.6294117647058823, "step": 215, "batch_size": 128, "mean": 16.453250885009766, "std": 16.868663787841797, "min": -25.9132080078125, "p10": -4.882379913330078, "median": 15.121501922607422, "p90": 41.42839050292969, "max": 58.0999755859375, "pos_frac": 0.8359375, "sample": [23.8857421875, 15.119422912597656, -1.9211959838867188, 38.373756408691406, 1.8467559814453125, 25.72711944580078, 0.20412445068359375, 12.257930755615234, 35.6707763671875, -8.00069808959961, 52.161827087402344, 10.993766784667969, 44.29241180419922, 48.428565979003906, 1.514007568359375, 7.2491455078125, 27.06744384765625, -25.9132080078125, 2.0295639038085938, -0.588775634765625, 3.0026092529296875, 7.034885406494141, 29.929006576538086, -4.9605712890625, 7.948947906494141, -13.526573181152344, 32.67877197265625, 40.26008605957031, 20.584442138671875, 58.0999755859375, -4.51414680480957, -11.630874633789062, 34.991943359375, 21.40332794189453, -6.253810882568359, 9.630119323730469, 2.7256546020507812, 4.807008743286133, -4.848869323730469, 45.636573791503906, 7.932731628417969, -3.8643836975097656, 23.652069091796875, 10.707643508911133, 23.507396697998047, 31.30145263671875, -7.586231231689453, 10.976303100585938, 18.47705841064453, 27.5843505859375, 8.622207641601562, 18.668861389160156, 46.420143127441406, 15.123580932617188, 45.28404235839844, -0.4646110534667969, -3.5631370544433594, 18.335386276245117, 36.20707702636719, 23.017105102539062, 7.581146240234375, 32.08772277832031, 43.29102325439453, 29.38372802734375, 17.8570556640625, 3.77264404296875, 18.56665802001953, -5.295307159423828, 42.43998718261719, 0.6750946044921875, 14.01873779296875, 14.935440063476562, -16.156024932861328, 20.168746948242188, 21.967269897460938, 5.108158111572266, 11.876785278320312, -16.936866760253906, 10.622577667236328, 20.731216430664062, 33.811279296875, 19.9599609375, 11.847476959228516, 17.42505645751953, 0.10558319091796875, -12.033214569091797, 12.461566925048828, 13.078598022460938, 18.090225219726562, 17.674789428710938, 31.99239158630371, 20.61343765258789, 24.235496520996094, 36.01433563232422, -0.34239864349365234, 37.823577880859375, 31.3238525390625, 11.532291412353516, 3.98651123046875, 17.829971313476562, 6.429679870605469, 10.115058898925781, 13.678962707519531, 13.074020385742188, 17.201417922973633, 9.859066009521484, 17.59796142578125, 41.23475646972656, 7.4058074951171875, 21.58843231201172, 13.080501556396484, -6.867948532104492, 26.276901245117188, 4.876434326171875, 23.33853530883789, 11.299118041992188, 15.356552124023438, 41.88020324707031, 33.963134765625, 25.105667114257812, 26.964447021484375, -6.776693344116211, 43.18353271484375, 51.83485412597656, 53.03509521484375, 9.609916687011719, 18.165573120117188, 5.6427154541015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000215.npy"}
{"epoch": 0.6323529411764706, "step": 216, "batch_size": 128, "mean": 16.67310333251953, "std": 17.4049015045166, "min": -25.228355407714844, "p10": -3.1414127349853516, "median": 15.172195434570312, "p90": 38.158140563964835, "max": 81.30604553222656, "pos_frac": 0.828125, "sample": [-3.1598243713378906, 15.299057006835938, -1.8252983093261719, 25.166828155517578, 21.837677001953125, 12.700613021850586, 32.32366943359375, 14.065713882446289, 0.45583343505859375, -14.444314956665039, 16.014739990234375, -14.198875427246094, 21.922866821289062, 9.39385986328125, 29.335731506347656, 32.14244079589844, 43.272613525390625, 15.15350341796875, 1.4235000610351562, -7.573646545410156, 56.4874267578125, 4.191093444824219, 23.83831787109375, 14.461715698242188, 8.572429656982422, 23.997825622558594, 45.76763153076172, 29.113082885742188, 10.832015991210938, 27.196823120117188, 28.500070571899414, 24.68950653076172, 9.731983184814453, 15.744958877563477, 4.71173095703125, 22.27972412109375, -0.6685771942138672, -3.2563743591308594, -3.1335220336914062, 5.9578094482421875, -1.1953449249267578, 10.669601440429688, 55.66962814331055, 20.978458404541016, 47.63081359863281, 16.997459411621094, 24.3538818359375, 15.806938171386719, 24.509292602539062, 32.10396957397461, 43.436866760253906, -7.4195709228515625, 0.48907470703125, 5.8289337158203125, 37.361083984375, 10.282075881958008, 9.026191711425781, 0.26320648193359375, 20.4591064453125, 10.156837463378906, 45.40113830566406, 17.84406280517578, 9.072647094726562, 33.07550048828125, 19.848114013671875, -5.88386344909668, 13.30352783203125, 5.998878479003906, 17.58605194091797, 2.8735580444335938, 7.5727691650390625, 7.448554992675781, 24.403594970703125, 37.399566650390625, 1.1947708129882812, 7.860387802124023, 6.206211090087891, 31.352615356445312, 40.805320739746094, -3.0719833374023438, 16.351364135742188, 27.286903381347656, -14.1212158203125, 81.30604553222656, 17.921707153320312, -25.228355407714844, 31.324440002441406, 14.126983642578125, 31.894638061523438, -2.8177547454833984, -0.41180419921875, 30.688827514648438, 9.87286376953125, 4.831642150878906, 25.382286071777344, 10.402585983276367, 11.479721069335938, 9.072113037109375, 1.7083892822265625, -4.383758544921875, -9.033859252929688, 5.950813293457031, -3.0110931396484375, 59.2706298828125, 12.669723510742188, 35.15895080566406, 29.99616241455078, 6.24530029296875, 10.560089111328125, 8.201545715332031, 21.28607177734375, -7.5513763427734375, -0.8023757934570312, -10.737701416015625, 35.53357696533203, 19.4581298828125, 26.270645141601562, 28.27557373046875, 27.357101440429688, 15.190887451171875, 54.96379089355469, 15.458284378051758, 41.239654541015625, 39.92814636230469, 15.923377990722656, 36.401214599609375, 13.452377319335938, 33.79150390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000216.npy"}
{"epoch": 0.6352941176470588, "step": 217, "batch_size": 128, "mean": 15.18509292602539, "std": 18.39366340637207, "min": -18.850008010864258, "p10": -6.167975425720215, "median": 13.725830078125, "p90": 36.59998321533203, "max": 111.67242431640625, "pos_frac": 0.8125, "sample": [3.6514892578125, 28.284740447998047, 45.757110595703125, 17.829421997070312, 28.89794921875, 3.2151718139648438, 3.208433151245117, -15.433937072753906, 20.610973358154297, 36.403499603271484, 34.18114471435547, -10.023162841796875, -18.850008010864258, -0.248199462890625, 23.444503784179688, 7.38836669921875, -4.471733093261719, 10.675140380859375, 2.0409927368164062, 39.38562774658203, 48.263145446777344, -5.938041687011719, 18.54387664794922, 2.899038314819336, 11.03665542602539, -14.960235595703125, 26.111343383789062, 4.5366363525390625, 7.374214172363281, -2.012042999267578, 13.880668640136719, 9.284095764160156, 48.46464538574219, 20.45166015625, -10.485851287841797, -6.7533416748046875, 40.20708465576172, 7.704109191894531, 9.732307434082031, 39.55713653564453, 18.387622833251953, 28.203582763671875, 11.724113464355469, 10.69992446899414, 10.152641296386719, 17.692527770996094, 0.51953125, 15.9598388671875, 39.11384582519531, 30.23754119873047, 6.134738922119141, -7.44757080078125, -9.889602661132812, 8.954727172851562, 19.871719360351562, 9.916297912597656, 31.536617279052734, 1.55511474609375, -2.8220081329345703, 57.032806396484375, 6.64227294921875, 9.447685241699219, 16.615821838378906, 7.136863708496094, 36.17823791503906, 42.17478942871094, 111.67242431640625, -2.8486480712890625, -9.30316162109375, -14.099456787109375, 25.265777587890625, 6.690589904785156, 9.95838737487793, 24.64215087890625, 9.202735900878906, 32.782073974609375, 15.803680419921875, -12.11444091796875, 19.109813690185547, -2.6341934204101562, 13.865234375, -6.443115234375, 23.471771240234375, 21.440574645996094, -6.050058364868164, 5.033929824829102, 23.56279754638672, 37.05844497680664, 17.378664016723633, 31.69178009033203, 49.59370803833008, 2.259744644165039, 27.537174224853516, 2.8637619018554688, 18.627105712890625, 9.973670959472656, -7.937808990478516, -2.7759132385253906, 16.91657257080078, 32.026268005371094, 24.652814865112305, 18.002609252929688, 13.58642578125, -4.427097320556641, 13.936737060546875, 19.01620864868164, 15.766866683959961, 16.76323127746582, 12.97507095336914, 11.953704833984375, 5.2933349609375, 5.688568115234375, 10.923141479492188, 24.027538299560547, 21.408172607421875, 0.5562896728515625, 20.57305908203125, 30.514694213867188, 24.997447967529297, 1.738800048828125, 27.841232299804688, 19.703964233398438, 21.35894775390625, 33.777313232421875, 70.2149429321289, 16.3111629486084, 5.181285858154297, -2.4410247802734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000217.npy"}
{"epoch": 0.638235294117647, "step": 218, "batch_size": 128, "mean": 17.16179847717285, "std": 16.912153244018555, "min": -24.112464904785156, "p10": -2.89584560394287, "median": 14.357495307922363, "p90": 41.86251373291015, "max": 69.7352294921875, "pos_frac": 0.875, "sample": [34.19203186035156, -5.648752212524414, 5.776233673095703, 17.00849151611328, 17.96027374267578, 4.001258850097656, 4.536151885986328, 9.07391357421875, 7.697792053222656, 8.408546447753906, 24.847511291503906, -9.246742248535156, -17.26988983154297, -3.668182373046875, 2.9376068115234375, 11.946937561035156, 11.697456359863281, 20.1534423828125, 33.421730041503906, 2.950765609741211, 33.3714599609375, -5.923088073730469, 0.515716552734375, 4.340229034423828, 17.01380157470703, -0.8284244537353516, 69.7352294921875, 5.068817138671875, 1.1601905822753906, 2.9328842163085938, 64.0890121459961, 49.02305603027344, 41.65821838378906, 14.32895278930664, 23.0863037109375, 21.346420288085938, 1.3502445220947266, 40.398414611816406, 12.19406509399414, 47.64990234375, -4.4253082275390625, 17.246522903442383, 5.974876403808594, 16.05170440673828, -24.112464904785156, -2.5648441314697266, 25.58123779296875, 6.401948928833008, 8.047065734863281, 3.0124740600585938, 53.505348205566406, 3.7763633728027344, 10.620809555053711, 45.22681427001953, 11.520263671875, 43.766998291015625, 20.368473052978516, 7.052726745605469, 22.34233856201172, 36.387996673583984, 9.729990005493164, 7.963890075683594, 18.34918975830078, 30.600265502929688, 2.6215133666992188, -6.425628662109375, -2.5546340942382812, 18.845199584960938, 44.34135055541992, 32.8947639465332, 12.748031616210938, 29.171520233154297, 10.89111328125, 13.371978759765625, 35.27177429199219, -7.416595458984375, 13.108451843261719, 18.25570297241211, 22.887672424316406, 16.42194366455078, 38.77903366088867, 13.766242980957031, 47.54123306274414, 7.260959625244141, 10.950128555297852, 16.40204620361328, 19.92780876159668, 34.207611083984375, 26.734756469726562, 10.753669738769531, 24.561424255371094, 14.386037826538086, 7.887962341308594, 20.353591918945312, 53.90630340576172, 8.53277587890625, 19.683292388916016, 4.5074310302734375, 7.5606536865234375, 35.681663513183594, 12.538810729980469, -4.96070671081543, 42.339202880859375, 25.690528869628906, 49.32432556152344, 10.96484375, 38.87835693359375, 29.45281219482422, 14.2645263671875, 32.31501007080078, 44.310211181640625, 21.42888641357422, 29.220733642578125, 6.342826843261719, 8.053153991699219, 5.435077667236328, 33.803123474121094, 12.628299713134766, 18.80229949951172, -12.327392578125, 28.63105010986328, 18.043254852294922, -8.017227172851562, 18.406784057617188, -6.262216567993164, 20.45926284790039, 16.59610939025879, 16.81683349609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000218.npy"}
{"epoch": 0.6411764705882353, "step": 219, "batch_size": 128, "mean": 16.190977096557617, "std": 15.525315284729004, "min": -20.233642578125, "p10": -1.7854461669921862, "median": 14.501121520996094, "p90": 39.33196868896484, "max": 51.4041748046875, "pos_frac": 0.875, "sample": [11.381797790527344, -3.720245361328125, 18.456687927246094, 14.369476318359375, 14.999542236328125, -9.999107360839844, 20.47882080078125, 24.941787719726562, 4.4636383056640625, -8.609382629394531, 4.535881042480469, 43.67060852050781, 10.481101989746094, 24.767364501953125, 8.531925201416016, 24.218931198120117, 31.526947021484375, 4.109233856201172, 23.736289978027344, -13.890029907226562, 15.938852310180664, 12.332582473754883, 42.86608123779297, 22.457542419433594, 5.918800354003906, 29.57293701171875, 0.6201725006103516, 6.25164794921875, 22.59039306640625, -20.233642578125, 6.610992431640625, 41.56248474121094, -2.6875762939453125, 0.7254714965820312, 0.4411506652832031, 6.7410736083984375, -0.5832443237304688, 39.24120330810547, 6.860950469970703, 19.83226776123047, 3.747039794921875, 45.92082214355469, 7.199989318847656, 2.6908721923828125, 36.41217803955078, -6.788948059082031, 51.4041748046875, 3.420440673828125, 19.971527099609375, -1.3988189697265625, 18.25752067565918, 38.99690246582031, 26.43102264404297, 3.8022232055664062, 44.69217300415039, 15.053657531738281, 21.864990234375, 24.825851440429688, 18.933448791503906, 34.497100830078125, 10.910396575927734, 4.450469970703125, 23.19207000732422, 6.867073059082031, 9.603755950927734, 16.30402946472168, 49.66710662841797, 13.191976547241211, 12.470333099365234, 3.14080810546875, 49.362884521484375, 19.160747528076172, 26.559814453125, 37.81591796875, 26.982927322387695, 11.185455322265625, 24.91895294189453, 28.936668395996094, 9.787738800048828, 7.660102844238281, 24.31725311279297, 33.83527374267578, 37.57672882080078, 7.085479736328125, 8.670513153076172, -5.712089538574219, 48.17045593261719, 31.945953369140625, 7.221748352050781, 24.357128143310547, 10.028327941894531, 42.322296142578125, 43.598793029785156, 8.755447387695312, 7.645328521728516, 15.640388488769531, 6.5037994384765625, 18.159629821777344, 8.04107666015625, 3.775177001953125, 6.151546478271484, 12.725730895996094, 31.05865478515625, 38.46356201171875, 33.20591735839844, -7.80076789855957, 18.711544036865234, 24.520156860351562, -8.214202880859375, -9.389640808105469, 7.1804962158203125, 1.6648445129394531, 26.763702392578125, 20.79358673095703, 2.8816890716552734, 16.975204467773438, -6.4608001708984375, 14.632766723632812, 24.70154571533203, 22.52521514892578, 6.997428894042969, 5.132942199707031, 18.337867736816406, 41.17179489135742, 5.5048370361328125, 39.54375457763672, -4.0453338623046875, -0.842437744140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000219.npy"}
{"epoch": 0.6441176470588236, "step": 220, "batch_size": 128, "mean": 14.894472122192383, "std": 16.12922477722168, "min": -17.71341323852539, "p10": -3.779333877563475, "median": 14.39169692993164, "p90": 36.00427398681641, "max": 61.25567626953125, "pos_frac": 0.7890625, "sample": [20.801040649414062, 38.538246154785156, 4.019969940185547, 18.716476440429688, -1.1908912658691406, -8.234966278076172, -1.761016845703125, 28.083358764648438, 15.66827392578125, 15.4859619140625, 11.502395629882812, 7.6106719970703125, 1.9963550567626953, 36.262542724609375, 22.611289978027344, -2.013519287109375, 21.31207847595215, 16.200082778930664, 61.221099853515625, 12.099830627441406, 17.50067901611328, 43.01061248779297, 18.576614379882812, 8.438919067382812, 17.875808715820312, -1.00860595703125, 19.075881958007812, 14.536998748779297, 11.841934204101562, 30.049087524414062, 12.853199005126953, 16.33519744873047, 16.787155151367188, 21.260879516601562, -3.043773651123047, 32.671730041503906, 10.906387329101562, 10.767044067382812, -11.024490356445312, 23.707443237304688, -9.843574523925781, -1.591217041015625, 18.30929183959961, 30.755062103271484, -3.1424732208251953, 2.6578903198242188, 27.054168701171875, 6.0414886474609375, -3.3905792236328125, -9.76348876953125, 20.689056396484375, 4.122285842895508, 29.40180206298828, 10.452495574951172, 28.888580322265625, 14.611078262329102, 54.926780700683594, 5.1489105224609375, 5.083202362060547, 11.85174560546875, 23.96605682373047, 21.623985290527344, -10.234359741210938, -2.428363800048828, 22.447052001953125, 14.877204895019531, 28.594863891601562, 36.91705322265625, 61.25567626953125, -1.5038909912109375, 15.491783142089844, 25.556678771972656, -2.4006271362304688, -6.5484466552734375, 4.419464111328125, 26.746673583984375, 16.794891357421875, 23.614391326904297, 27.996000289916992, 15.750732421875, 13.35198974609375, 10.822196960449219, 6.868175506591797, 0.7401809692382812, 56.91876220703125, -17.71341323852539, 15.1788330078125, 4.657108306884766, 20.316162109375, 38.48133087158203, 19.394065856933594, 54.418609619140625, 6.786460876464844, 14.246395111083984, 3.455026626586914, -0.4590911865234375, 5.626228332519531, 36.06549835205078, -3.000814437866211, 33.8088264465332, 33.32048034667969, 0.9872283935546875, 27.474029541015625, -5.8598785400390625, 23.903823852539062, 16.752777099609375, -11.336481094360352, 4.958992004394531, 7.370491027832031, 9.354211807250977, 11.593765258789062, 47.09342956542969, 27.408401489257812, 12.733528137207031, 46.82633972167969, 35.97803497314453, 14.828742980957031, -4.9952239990234375, 8.683280944824219, 5.1609954833984375, -1.7003288269042969, -8.597909927368164, 11.530265808105469, -4.686428070068359, 28.794998168945312, 32.22119903564453, -5.340843200683594, 6.824798583984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000220.npy"}
{"epoch": 0.6470588235294118, "step": 221, "batch_size": 128, "mean": 18.090559005737305, "std": 16.715408325195312, "min": -37.19880676269531, "p10": 0.33416900634765667, "median": 15.998188018798828, "p90": 39.21671829223633, "max": 84.10030364990234, "pos_frac": 0.90625, "sample": [12.585952758789062, 3.9108543395996094, -13.747222900390625, 24.593017578125, 27.892074584960938, 55.457275390625, 11.27679443359375, 19.93468475341797, 8.603673934936523, 11.440643310546875, 2.441925048828125, 13.670623779296875, 4.102449417114258, 8.298492431640625, 25.264862060546875, 51.4334716796875, 5.213844299316406, 30.7763671875, 19.53087615966797, 29.013736724853516, 6.706098556518555, 35.749717712402344, 46.50786590576172, 16.960067749023438, 22.540464401245117, 9.063926696777344, 6.036834716796875, 1.5421695709228516, 20.076263427734375, 16.623031616210938, 5.769721984863281, 16.990509033203125, 9.148918151855469, 0.727142333984375, 5.348955154418945, 15.987930297851562, 22.7142333984375, 19.881481170654297, 12.960948944091797, 29.069305419921875, 17.28870391845703, 23.67896270751953, 8.540596008300781, 17.011863708496094, 33.691978454589844, 84.10030364990234, 7.199832916259766, -6.77069091796875, 6.578105926513672, 0.4506378173828125, -4.7327728271484375, -37.19880676269531, 26.10931396484375, -2.4920120239257812, 13.730493545532227, 43.19541931152344, 18.114601135253906, 19.990005493164062, 14.26424789428711, 39.093101501464844, 36.0352783203125, -10.68343734741211, 13.921648025512695, 22.38727569580078, 7.861907958984375, -2.9811954498291016, 38.76782989501953, 16.539581298828125, 34.76295471191406, 39.505157470703125, 12.664222717285156, 16.008445739746094, 20.807510375976562, 10.95931625366211, 2.6053619384765625, 13.238338470458984, 0.062408447265625, 27.617767333984375, 12.237810134887695, 34.228515625, 10.106201171875, -2.2426376342773438, 22.76917266845703, 22.703266143798828, -0.3884162902832031, 39.96372604370117, 28.602127075195312, 38.382408142089844, 46.99616241455078, 27.706031799316406, 25.20589256286621, -7.775566101074219, -10.611534118652344, 18.82549285888672, 3.3661041259765625, 4.967071533203125, 9.86395263671875, 31.666343688964844, 18.407669067382812, 55.617897033691406, 7.540122985839844, 37.551666259765625, 5.0258636474609375, 30.775955200195312, 4.956583023071289, 15.205390930175781, 47.33991241455078, 11.7532958984375, 12.073905944824219, 37.144615173339844, 13.75172233581543, 13.033924102783203, 14.932418823242188, 10.450780868530273, 29.2110595703125, -1.970682144165039, 7.365531921386719, 42.68768310546875, 31.515884399414062, 16.209442138671875, 23.832672119140625, 12.344978332519531, 61.942691802978516, 7.220054626464844, 41.74772644042969, 14.533187866210938, 18.99847412109375, 23.794822692871094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000221.npy"}
{"epoch": 0.65, "step": 222, "batch_size": 128, "mean": 16.089292526245117, "std": 17.399826049804688, "min": -22.80182456970215, "p10": -3.9070108413696287, "median": 13.775932312011719, "p90": 37.809558868408196, "max": 67.8263931274414, "pos_frac": 0.8515625, "sample": [46.19074249267578, 7.288566589355469, 44.803401947021484, 7.46630859375, 12.949552536010742, -12.8162841796875, 19.57438850402832, 2.3626861572265625, 27.967193603515625, -8.178535461425781, 25.20580291748047, 10.722166061401367, 29.456207275390625, 61.207496643066406, 26.695716857910156, 18.41757583618164, 6.974639892578125, 3.8325881958007812, 10.17608642578125, 11.45233154296875, 13.063224792480469, 21.72216796875, 8.144668579101562, -1.44158935546875, 25.643278121948242, 32.739173889160156, 34.32408905029297, 7.707618713378906, 13.724098205566406, 32.58555603027344, 2.3116607666015625, 14.606697082519531, 13.154441833496094, 9.920772552490234, -3.890993118286133, 24.814315795898438, 13.785842895507812, 7.65521240234375, 9.278640747070312, -11.772293090820312, -10.404586791992188, 10.56732177734375, -0.3135223388671875, 6.291660308837891, 25.870256423950195, -16.739669799804688, 20.92689323425293, 31.732528686523438, -13.812149047851562, 7.232688903808594, 11.705039978027344, 7.3913421630859375, 4.699848175048828, -12.105865478515625, 16.418598175048828, 4.806365966796875, 15.649459838867188, 24.100751876831055, 20.151512145996094, 9.805736541748047, 50.54964828491211, 56.912109375, 3.5082015991210938, 22.640716552734375, 20.660011291503906, -3.86431884765625, 3.5200653076171875, 34.91477966308594, -5.10289192199707, 7.3074951171875, 14.538139343261719, 40.707427978515625, 10.084548950195312, 15.529930114746094, 20.101547241210938, 2.8130035400390625, 4.586954116821289, -3.944385528564453, 39.805625915527344, 67.8263931274414, 31.67669677734375, 1.4221038818359375, 15.377677917480469, 34.066505432128906, 21.839332580566406, 28.842636108398438, 43.7186279296875, 22.486370086669922, -2.223236083984375, 28.039058685302734, 18.048507690429688, 2.6123275756835938, 4.099433898925781, 25.059749603271484, -4.85504150390625, -7.8662109375, 19.789573669433594, -22.80182456970215, 21.082538604736328, 18.7288818359375, 19.972015380859375, 7.243366241455078, 59.096031188964844, 2.7033233642578125, 2.6022987365722656, 25.630027770996094, 20.855384826660156, 5.26953125, 7.562934875488281, 33.168426513671875, 14.371753692626953, 64.33692932128906, 14.138969421386719, 24.10533905029297, 21.450241088867188, -1.7706222534179688, 44.538307189941406, 52.53126525878906, 9.423452377319336, 26.021379470825195, 3.1679611206054688, 24.055503845214844, 10.41927719116211, 13.766021728515625, 36.9541015625, -17.345260620117188, 13.296615600585938, 35.82666015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000222.npy"}
{"epoch": 0.6529411764705882, "step": 223, "batch_size": 128, "mean": 17.061695098876953, "std": 17.33498191833496, "min": -19.13983154296875, "p10": -1.9783700942993159, "median": 13.48373794555664, "p90": 37.27981567382811, "max": 72.93136596679688, "pos_frac": 0.84375, "sample": [7.486198425292969, -3.5558929443359375, 20.206893920898438, 58.98352813720703, 8.877334594726562, 19.47699737548828, 14.352195739746094, -1.8037109375, 40.46803283691406, 40.52283477783203, -8.951927185058594, 17.07434844970703, 22.42784881591797, 12.622032165527344, 18.02652931213379, 30.32819366455078, 20.19391632080078, 28.232749938964844, 27.140365600585938, 1.6890869140625, 23.649986267089844, 25.168865203857422, -10.418540954589844, 34.88401794433594, 12.964290618896484, 12.640914916992188, 22.34416961669922, 16.755958557128906, 1.5981101989746094, 27.3382568359375, 20.928085327148438, 12.311859130859375, 12.798385620117188, 10.361570358276367, 8.601509094238281, 31.44354248046875, 5.1951904296875, -5.884296417236328, 11.39565658569336, -14.003036499023438, -1.675018310546875, 12.117103576660156, 49.628089904785156, 33.801048278808594, 7.0815277099609375, 32.72541046142578, 23.961780548095703, 13.74859619140625, 6.044715881347656, 72.93136596679688, 30.162437438964844, 5.858974456787109, 2.221406936645508, 43.87762451171875, 5.415313720703125, 13.1181640625, 1.868133544921875, 15.858283996582031, 61.908172607421875, -0.19310760498046875, 5.385795593261719, 15.41988754272461, 18.6083984375, -19.13983154296875, -4.292938232421875, 6.30224609375, 8.066986083984375, 14.168575286865234, 30.378662109375, 30.005218505859375, 21.7974853515625, 11.74444580078125, 57.24891662597656, 9.541847229003906, 10.464981079101562, 17.614070892333984, 11.712440490722656, -7.5509796142578125, 12.835624694824219, 3.3974151611328125, 30.954177856445312, 12.376792907714844, -0.7067356109619141, 13.218879699707031, 9.123504638671875, 59.753387451171875, 8.634872436523438, 54.571109771728516, 16.057113647460938, 18.934051513671875, 14.9522705078125, -8.7467041015625, -6.6484222412109375, 5.7634429931640625, -2.3859081268310547, 22.200191497802734, 12.582847595214844, 50.204994201660156, 69.65408325195312, 27.4471435546875, -0.2607879638671875, 25.317764282226562, 3.4899978637695312, 22.53280258178711, -5.7488250732421875, 30.044036865234375, 30.454296112060547, 13.040067672729492, 25.57770538330078, 32.39715576171875, 30.581588745117188, -0.6462936401367188, 26.23907470703125, -0.8188266754150391, 22.24201202392578, 35.91343688964844, 19.410537719726562, 10.68267822265625, 16.865219116210938, 19.403640747070312, 1.7880306243896484, 11.56829833984375, 1.4612579345703125, 3.107166290283203, -2.4146728515625, 54.2196044921875, 20.066909790039062, 11.400718688964844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000223.npy"}
{"epoch": 0.6558823529411765, "step": 224, "batch_size": 128, "mean": 17.46242904663086, "std": 17.734149932861328, "min": -17.163467407226562, "p10": -3.3094924926757803, "median": 16.685736656188965, "p90": 40.309897613525386, "max": 105.12631225585938, "pos_frac": 0.84375, "sample": [17.213102340698242, 31.162147521972656, 15.303932189941406, 0.7190532684326172, 5.6165618896484375, -10.520343780517578, 17.43927001953125, 6.4116058349609375, 56.885887145996094, 1.1574535369873047, 105.12631225585938, 40.598304748535156, 9.346172332763672, 46.598724365234375, 22.376251220703125, 27.10918426513672, 7.969259262084961, -17.163467407226562, 26.44610595703125, 4.9509735107421875, 15.633529663085938, 12.507339477539062, 10.679107666015625, 5.5934295654296875, 4.498268127441406, 17.40290069580078, 21.534648895263672, 23.443336486816406, 43.861595153808594, 18.372879028320312, -15.261550903320312, 26.563827514648438, 24.863868713378906, -1.9091567993164062, -1.1669940948486328, 16.158370971679688, 24.21703338623047, -6.5582427978515625, 24.89520263671875, 30.359352111816406, -10.067901611328125, 17.989120483398438, 24.815166473388672, -3.0355987548828125, 41.59394836425781, 10.092796325683594, 4.128150939941406, -6.666473388671875, 20.5787353515625, -0.5198974609375, 19.454532623291016, -0.4883899688720703, 35.42131805419922, 21.110153198242188, 11.827316284179688, 3.471902847290039, 31.37102508544922, 8.064876556396484, 25.71807861328125, 28.815933227539062, 8.553993225097656, 23.095375061035156, 18.869895935058594, -12.752838134765625, 4.1395111083984375, 14.143224716186523, 13.822105407714844, 47.91124725341797, 49.35308074951172, 4.0969696044921875, 20.0311279296875, 12.345962524414062, 18.470722198486328, 26.830446243286133, 38.986751556396484, 21.184249877929688, -9.128227233886719, 11.477092742919922, 23.796173095703125, 28.423675537109375, -5.175746917724609, 32.43914794921875, 57.17359161376953, 8.254480361938477, 14.70831298828125, 3.8700332641601562, -3.948577880859375, 19.221820831298828, 21.298721313476562, 29.590713500976562, 25.983219146728516, 2.5764236450195312, 48.21620178222656, -3.994842529296875, 40.18611145019531, 36.75079345703125, 12.858295440673828, 14.52069091796875, 13.475088119506836, 9.132743835449219, 39.099822998046875, 7.577049255371094, 17.690872192382812, -1.930816650390625, 19.566471099853516, -9.11639404296875, 6.5517120361328125, 1.55072021484375, 40.18629455566406, 46.836029052734375, 32.682716369628906, 30.28009033203125, 10.5826416015625, 43.50443649291992, 13.287433624267578, 5.689018249511719, 48.30397033691406, 24.120895385742188, -2.1761932373046875, 24.555419921875, -4.610828399658203, 5.736968994140625, 13.319112777709961, 5.885765075683594, 28.66634750366211, 9.530311584472656, 18.399559020996094, 24.523757934570312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000224.npy"}
{"epoch": 0.6588235294117647, "step": 225, "batch_size": 128, "mean": 17.27077865600586, "std": 18.607023239135742, "min": -33.44420623779297, "p10": -5.070985794067382, "median": 16.919696807861328, "p90": 41.06399154663085, "max": 99.78598022460938, "pos_frac": 0.828125, "sample": [-5.394023895263672, 8.858266830444336, 22.048873901367188, 9.705184936523438, 48.14012908935547, 28.097930908203125, -7.358421325683594, 16.524221420288086, 8.393882751464844, -7.3614654541015625, 18.03862762451172, -0.7981891632080078, 22.853424072265625, 10.013004302978516, 17.127662658691406, 25.703887939453125, 8.660751342773438, 16.71173095703125, 31.67724609375, 40.40656280517578, -4.9325408935546875, 13.408727645874023, -9.9173583984375, -2.2259521484375, 5.698007583618164, 22.508697509765625, 45.69690704345703, 44.015113830566406, 15.038543701171875, 16.6688289642334, 10.542808532714844, 18.10187530517578, 50.02998352050781, 34.07927322387695, 30.072330474853516, -10.202186584472656, -3.9139938354492188, -0.091552734375, 19.129467010498047, 31.10382080078125, 18.661598205566406, 59.880584716796875, 14.46307373046875, 6.11823844909668, 12.537612915039062, 20.7069091796875, 17.4781494140625, -13.126260757446289, 55.12861633300781, 24.67937660217285, -6.656005859375, 32.09605407714844, 43.40111541748047, 1.8581695556640625, -7.010162353515625, -33.44420623779297, 27.648422241210938, 54.79255676269531, 14.763763427734375, 31.46068572998047, 18.536104202270508, 1.3551101684570312, 17.33761215209961, 24.135087966918945, 32.53131866455078, 29.59168243408203, 21.797561645507812, 18.303466796875, 16.281829833984375, 68.06312561035156, 21.76056671142578, 1.5342864990234375, 17.77630615234375, -17.821029663085938, 24.612197875976562, 20.13170623779297, -8.53533935546875, 10.679550170898438, 14.224296569824219, 32.848907470703125, 19.557945251464844, 10.507537841796875, 15.347282409667969, 2.070039749145508, 9.246498107910156, 5.704675674438477, 14.437843322753906, 24.182952880859375, -0.1369152069091797, 5.072364807128906, 10.413192749023438, 6.325344085693359, 15.377166748046875, 42.14479064941406, 18.938796997070312, -16.072128295898438, 8.015159606933594, -1.6405868530273438, 13.191719055175781, 32.71617126464844, 27.926010131835938, 17.951576232910156, 40.600791931152344, 14.001235961914062, 24.207672119140625, 19.03476333618164, 12.355527877807617, 18.62542724609375, 18.20885467529297, 24.88818359375, 32.29920196533203, 46.82379150390625, 54.06901168823242, -4.4163055419921875, 6.78204345703125, 39.54711151123047, 5.789730072021484, -2.407684326171875, -5.84234619140625, 19.693252563476562, 25.394912719726562, 1.1817874908447266, 20.808395385742188, 24.062843322753906, 6.607330322265625, 99.78598022460938, 3.80145263671875, 6.036685943603516], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000225.npy"}
{"epoch": 0.6617647058823529, "step": 226, "batch_size": 128, "mean": 15.771088600158691, "std": 15.012306213378906, "min": -25.546417236328125, "p10": -0.17237377166747886, "median": 13.522735595703125, "p90": 36.60405235290527, "max": 69.6268310546875, "pos_frac": 0.8984375, "sample": [6.1197509765625, 20.973220825195312, 13.4598388671875, 21.37085723876953, 50.036476135253906, 7.8634185791015625, -2.8816070556640625, 39.97830581665039, 4.247005462646484, -25.546417236328125, -13.990856170654297, -1.238800048828125, 19.856369018554688, 15.114837646484375, 31.54700469970703, 40.38806915283203, 23.785980224609375, 13.189159393310547, 16.83538818359375, 13.280914306640625, 16.663494110107422, 3.787504196166992, 18.97638702392578, -12.208969116210938, 11.535856246948242, 20.077112197875977, 25.4815673828125, 1.8247489929199219, -1.626699447631836, 14.376541137695312, 36.191001892089844, 22.24362564086914, 3.596323013305664, 23.042526245117188, 25.996429443359375, 69.6268310546875, 28.585159301757812, 25.80229949951172, 9.021957397460938, 12.833328247070312, 15.660140991210938, 2.82568359375, 0.2846660614013672, 18.841514587402344, 11.195343017578125, 13.58563232421875, 15.980316162109375, 46.395362854003906, -6.487720489501953, -6.7904205322265625, 47.95160675048828, -8.763687133789062, 14.287572860717773, 12.793863296508789, 13.135354995727539, 30.341127395629883, 19.222816467285156, 5.874786376953125, 15.068401336669922, 1.846527099609375, 63.20254898071289, -7.602293014526367, 31.988731384277344, 23.340171813964844, 11.6668701171875, 3.3474960327148438, 27.568517684936523, 8.668098449707031, 6.510494232177734, 5.192464828491211, 17.646875381469727, 37.56783676147461, 23.13031578063965, 26.411457061767578, 5.73284912109375, 18.71954917907715, 26.13861846923828, 9.399919509887695, 47.149566650390625, 23.375347137451172, 2.1842117309570312, 17.933364868164062, 27.69287109375, 11.596036911010742, 9.350109100341797, 19.469528198242188, 5.4014434814453125, 11.7340087890625, 38.60346984863281, 12.231643676757812, 21.06536865234375, 15.42654037475586, 32.37115478515625, 9.779125213623047, 14.576751708984375, 8.69195556640625, 8.250228881835938, -11.4715576171875, 40.1595458984375, 11.593494415283203, 14.191871643066406, 4.626071929931641, 14.453033447265625, 10.013565063476562, 15.582389831542969, 27.2103271484375, 4.95543098449707, -4.294132232666016, 42.067413330078125, 2.1014556884765625, 8.010635375976562, 29.369110107421875, -2.882932662963867, 3.12567138671875, 9.573822021484375, 28.997390747070312, 6.673622131347656, 11.718023300170898, 29.2723388671875, 7.51934814453125, 13.3128662109375, 41.735111236572266, 5.3194580078125, 16.7373046875, 12.040046691894531, 12.472824096679688, 4.952178955078125, 20.58553123474121], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000226.npy"}
{"epoch": 0.6647058823529411, "step": 227, "batch_size": 128, "mean": 16.137550354003906, "std": 16.11313247680664, "min": -24.901824951171875, "p10": -4.45092430114746, "median": 15.532337188720703, "p90": 36.41879806518555, "max": 74.30892944335938, "pos_frac": 0.859375, "sample": [15.033241271972656, 26.57323455810547, 14.94256591796875, 0.10159492492675781, 20.28075408935547, -2.488311767578125, 14.500236511230469, 21.217819213867188, 25.54498291015625, 31.23581886291504, 6.349185943603516, 4.0164031982421875, 20.59320068359375, -11.085145950317383, 7.07135009765625, 4.698036193847656, 23.048927307128906, 51.33888244628906, 27.998291015625, 32.604835510253906, 1.7365188598632812, 8.281793594360352, 74.30892944335938, 28.75567626953125, 22.551795959472656, 36.76673889160156, -9.598960876464844, 29.555862426757812, -19.962867736816406, 20.781063079833984, 23.155487060546875, -12.637197494506836, 8.133197784423828, -6.53594970703125, 19.728487014770508, 18.934123992919922, 28.096603393554688, 20.775070190429688, 29.110107421875, 16.14104461669922, 41.93035888671875, -6.228179931640625, 1.921875, 37.38072204589844, -10.771942138671875, 43.976898193359375, 6.727277755737305, 3.3118743896484375, 1.6859588623046875, -4.2911224365234375, 38.248619079589844, 7.2042388916015625, 25.207870483398438, 10.084014892578125, 9.941207885742188, 19.40644073486328, 13.581047058105469, 9.611370086669922, 36.09689712524414, 32.28007507324219, 4.805440902709961, 27.792804718017578, -5.110801696777344, 13.80926513671875, 6.248743057250977, 17.71441650390625, 37.35260009765625, 13.814094543457031, 19.318756103515625, 7.991859436035156, -5.8090362548828125, 22.029190063476562, 12.668632507324219, 11.940921783447266, -4.823795318603516, 7.090410232543945, 21.35587501525879, 36.73094940185547, 18.115966796875, 34.320648193359375, 35.91632843017578, 11.763191223144531, 16.03143310546875, 17.720291137695312, 13.20977783203125, 1.9995880126953125, 24.783157348632812, 20.78583526611328, 24.01456642150879, 2.378864288330078, 18.87541961669922, 41.46394348144531, 18.306129455566406, 25.829132080078125, 5.310895919799805, 11.093070983886719, -2.4343223571777344, 28.151165008544922, -24.901824951171875, 21.629592895507812, 16.246383666992188, 59.72876739501953, -0.17805862426757812, 5.864051818847656, 14.991352081298828, 21.368263244628906, 31.0472412109375, 8.97900390625, 36.28501892089844, 9.763420104980469, 58.57375717163086, 3.0911026000976562, 30.319412231445312, 13.058616638183594, -0.2544097900390625, 16.52356719970703, 36.75431823730469, 32.49287796020508, 13.550933837890625, 11.56612777709961, 1.2617721557617188, -9.473739624023438, 11.047119140625, -9.026092529296875, 24.775184631347656, 4.495571136474609, 19.516021728515625, 4.996606826782227], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000227.npy"}
{"epoch": 0.6676470588235294, "step": 228, "batch_size": 128, "mean": 19.04955291748047, "std": 17.698801040649414, "min": -19.361263275146484, "p10": -1.2754022598266601, "median": 16.82970428466797, "p90": 41.05279006958008, "max": 95.9929428100586, "pos_frac": 0.875, "sample": [5.384227752685547, 11.477615356445312, 23.75537872314453, 57.79803466796875, -12.926708221435547, 6.43701171875, 21.614456176757812, -1.2334632873535156, 54.472312927246094, 27.375900268554688, 6.8618927001953125, -7.505592346191406, -4.559989929199219, 31.019119262695312, 5.192596435546875, 8.085489273071289, 15.4735107421875, 20.745391845703125, 5.85560417175293, 10.63873291015625, 31.061782836914062, 15.909156799316406, 7.771583557128906, 4.617744445800781, 25.15102767944336, 7.119476318359375, 39.260986328125, 24.348541259765625, 49.26252746582031, 32.43769073486328, 19.429412841796875, 5.790412902832031, 45.4923095703125, -11.621862411499023, 29.518524169921875, 17.75025177001953, 2.4273681640625, 7.646751403808594, 43.872039794921875, -19.361263275146484, 42.48401641845703, 36.71400451660156, 36.183982849121094, 5.6274566650390625, -5.846473693847656, 12.004226684570312, 35.37223815917969, 29.277976989746094, 8.221580505371094, 14.649497985839844, 31.879329681396484, 31.453685760498047, -1.9066543579101562, 2.774385452270508, -5.891284942626953, 37.704254150390625, 14.668262481689453, 33.9677619934082, 19.29977798461914, 36.77020263671875, 41.00566101074219, 28.39788818359375, 12.983715057373047, 48.9993896484375, 6.0456695556640625, -0.6263656616210938, 36.051025390625, 15.094987869262695, 1.3683013916015625, 22.127098083496094, 11.298088073730469, -5.245059967041016, 31.417160034179688, 21.030166625976562, 0.3727684020996094, 13.090850830078125, 6.239418029785156, 6.728965759277344, -1.5084190368652344, 29.972122192382812, 45.201637268066406, 22.483070373535156, -1.5149993896484375, -1.2645092010498047, 9.929948806762695, 25.84234619140625, 41.162757873535156, -1.3008193969726562, 39.077239990234375, -6.419837951660156, 95.9929428100586, 21.100460052490234, 52.744110107421875, 3.337860107421875, 11.088199615478516, 14.505165100097656, 11.457599639892578, 33.02821350097656, 15.406875610351562, 2.5486106872558594, 5.519020080566406, 35.03662109375, 30.61907958984375, 12.814704895019531, 25.556320190429688, 25.363983154296875, 27.13922882080078, 7.0368804931640625, 19.577251434326172, 21.9097900390625, 21.504806518554688, 54.52003479003906, 33.73329162597656, 25.535987854003906, 14.311798095703125, 9.151697158813477, 2.7288684844970703, 1.0233612060546875, 1.4741401672363281, 23.04132080078125, 43.83513641357422, 23.088340759277344, 4.6589508056640625, 2.7967071533203125, 24.745033264160156, 34.873390197753906, 32.35313415527344, 35.88949966430664], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000228.npy"}
{"epoch": 0.6705882352941176, "step": 229, "batch_size": 128, "mean": 18.527137756347656, "std": 16.532381057739258, "min": -25.366485595703125, "p10": -2.5273923873901367, "median": 17.93913459777832, "p90": 37.35498962402344, "max": 82.15021514892578, "pos_frac": 0.875, "sample": [18.80224609375, 29.66303253173828, 36.39830017089844, 19.577350616455078, 16.452651977539062, 11.490755081176758, 27.405364990234375, 19.509262084960938, 9.90460205078125, 11.857637405395508, 23.501419067382812, 37.28418731689453, 21.7034912109375, 1.28167724609375, 17.176292419433594, 18.009056091308594, 11.886054992675781, 31.626564025878906, 26.015724182128906, -4.27978515625, 9.623458862304688, 9.002655029296875, 35.69575500488281, 16.946090698242188, 17.828746795654297, 13.799274444580078, 14.278654098510742, 16.97589111328125, 5.504550933837891, 62.2109375, -15.515571594238281, 19.850830078125, 26.471519470214844, 18.287277221679688, 52.02820587158203, 40.41069793701172, 7.791404724121094, -2.542726516723633, 20.7706298828125, 13.221284866333008, 13.938407897949219, 22.355606079101562, -25.366485595703125, 6.657163619995117, 58.47865295410156, 10.191131591796875, 16.44585418701172, 8.738822937011719, 28.24071502685547, 14.431449890136719, 31.005455017089844, -8.670251846313477, 2.9122962951660156, 4.6706085205078125, 40.58662414550781, 15.313861846923828, 35.666053771972656, -3.91748046875, 6.102100372314453, -1.1161117553710938, 33.75889587402344, 17.240760803222656, 18.376102447509766, 3.5639572143554688, -2.9941177368164062, 2.179962158203125, 31.996551513671875, 33.08104705810547, 16.163414001464844, -17.296775817871094, 82.15021514892578, -2.5208206176757812, -0.7379016876220703, 0.8218002319335938, 26.26678466796875, 5.2310791015625, 24.25133514404297, 28.542495727539062, 17.900394439697266, 31.11871337890625, 28.310958862304688, 19.4486083984375, 12.067573547363281, -5.626152038574219, 40.213134765625, 9.518203735351562, 3.9915542602539062, 44.92402648925781, 16.622034072875977, 71.08126068115234, 16.681060791015625, 19.44965362548828, 7.885503768920898, 19.767303466796875, 21.586509704589844, 28.95477294921875, 17.977874755859375, 42.87767028808594, 13.507844924926758, 20.31957244873047, 43.45638656616211, 18.572288513183594, 28.571868896484375, -7.165302276611328, 13.73382568359375, 19.852134704589844, 17.702190399169922, 12.16204833984375, 28.668838500976562, 23.772232055664062, 19.70199966430664, 34.63776397705078, 25.129425048828125, 15.615234375, 4.31817626953125, -10.807662963867188, 23.81963348388672, -7.707309722900391, 14.807785034179688, 14.455825805664062, 37.52019500732422, 22.013580322265625, 27.207046508789062, 22.00475311279297, 23.425277709960938, 40.83870315551758, -4.53350830078125, 26.477340698242188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000229.npy"}
{"epoch": 0.6735294117647059, "step": 230, "batch_size": 128, "mean": 14.479290008544922, "std": 14.388839721679688, "min": -19.601905822753906, "p10": -2.8243188858032218, "median": 13.016532897949219, "p90": 32.03919334411621, "max": 58.24872589111328, "pos_frac": 0.8359375, "sample": [9.210769653320312, 3.1154212951660156, -3.6795883178710938, 1.718353271484375, 8.214225769042969, 17.772537231445312, 16.60272216796875, 11.346710205078125, 10.489212036132812, 2.833953857421875, 6.426383972167969, 51.10224914550781, -4.291412353515625, 31.95050048828125, 27.668106079101562, -3.2822399139404297, 40.62853240966797, -0.91778564453125, 31.429550170898438, -0.01464080810546875, 52.5517578125, 21.092243194580078, 1.0849113464355469, 5.0120086669921875, -1.7665424346923828, 28.188209533691406, 28.527084350585938, 1.7839393615722656, 1.5576324462890625, 29.69934844970703, 6.90130615234375, 37.20623779296875, 26.176918029785156, -7.531761169433594, 9.838897705078125, 7.2315673828125, 39.19750213623047, 17.291061401367188, -1.5124435424804688, 13.031314849853516, 16.80596160888672, 23.218854904174805, 13.315391540527344, 1.8214530944824219, 13.001750946044922, 25.74895668029785, 18.556074142456055, 18.721729278564453, 8.972679138183594, 3.4865036010742188, 10.525962829589844, 13.990371704101562, 8.186683654785156, 19.04315185546875, -0.4192352294921875, 42.234397888183594, -3.5742034912109375, 6.4413299560546875, 27.911453247070312, 36.554931640625, 31.115345001220703, 26.707794189453125, 1.6288890838623047, 12.168731689453125, 27.502403259277344, 23.448898315429688, 11.91741943359375, 27.25230598449707, 13.5965576171875, 22.02303695678711, 26.896697998046875, 23.11465835571289, 8.613960266113281, 12.955310821533203, -6.33258056640625, 21.902145385742188, 7.048320770263672, 15.580841064453125, 34.64276885986328, 3.5758438110351562, 25.854957580566406, -6.790863037109375, 5.301750183105469, 21.785701751708984, -5.226499557495117, 16.644760131835938, 23.874921798706055, 24.75946044921875, 10.212045669555664, 17.618804931640625, 8.50273323059082, 2.6262893676757812, 26.62915802001953, -1.018218994140625, -4.659782409667969, 6.358255386352539, -12.742015838623047, 13.292106628417969, 7.962718963623047, 4.384368896484375, -19.601905822753906, 0.7170562744140625, -1.896127700805664, 2.61456298828125, 33.52531433105469, 26.70206069946289, 58.24872589111328, 33.79193115234375, 32.24614334106445, 17.49090576171875, 16.677879333496094, -10.94091796875, 22.603904724121094, 28.019264221191406, 7.05091667175293, 22.430429458618164, 21.353408813476562, -2.6280670166015625, 20.696609497070312, 4.373645782470703, 6.8033447265625, 18.26068878173828, -9.0762939453125, 10.194526672363281, 21.81731414794922, 25.834476470947266, 10.687545776367188, 46.192813873291016], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000230.npy"}
{"epoch": 0.6764705882352942, "step": 231, "batch_size": 128, "mean": 13.9679536819458, "std": 16.6781063079834, "min": -20.149559020996094, "p10": -3.043635559082031, "median": 12.004749298095703, "p90": 36.3568733215332, "max": 69.774658203125, "pos_frac": 0.828125, "sample": [19.215927124023438, 1.1331634521484375, -2.9679832458496094, 13.11065673828125, 12.061416625976562, 1.020965576171875, -13.600616455078125, 28.97466468811035, 46.00660705566406, 2.5671443939208984, -1.8050003051757812, 7.345977783203125, 17.489883422851562, 20.99658203125, 40.183067321777344, 30.602813720703125, 19.429649353027344, 3.3895263671875, 2.9476547241210938, 5.8915252685546875, -6.435905456542969, 4.121761322021484, -9.844131469726562, 14.188735961914062, -12.655563354492188, -19.351226806640625, 10.918205261230469, 0.8159732818603516, 13.267385482788086, 3.4128952026367188, 7.138481140136719, -9.407516479492188, -2.00689697265625, 11.478256225585938, 7.976324081420898, -2.980623245239258, 21.819229125976562, 12.832550048828125, 69.774658203125, 21.238977432250977, 8.881011962890625, 4.466217041015625, 36.300621032714844, 61.669715881347656, 9.828432083129883, 20.848373413085938, -15.25640869140625, 24.071786880493164, 42.855369567871094, -4.35633659362793, 2.5451126098632812, 9.632465362548828, -15.454185485839844, 24.933727264404297, 6.794765472412109, 21.513704299926758, 5.484397888183594, 17.903440475463867, 13.202117919921875, 24.353073120117188, 22.620702743530273, 38.40690612792969, 10.315292358398438, 16.216270446777344, -1.22259521484375, 1.0633544921875, -1.1478042602539062, 23.549774169921875, 26.564178466796875, 35.47193908691406, 14.921749114990234, -4.22186279296875, 19.78986358642578, 37.558380126953125, -0.6407470703125, 35.497222900390625, 4.542137145996094, 52.56884765625, 21.025239944458008, 3.2305908203125, 12.335807800292969, 14.659797668457031, 11.948081970214844, 33.462554931640625, 6.463523864746094, -0.14876174926757812, 4.2562255859375, 52.45387268066406, 2.076465606689453, 31.105789184570312, -0.04216766357421875, 4.9064483642578125, 34.39750671386719, 31.784669876098633, 22.26358413696289, 1.7169723510742188, 10.097259521484375, 9.911689758300781, 15.21014404296875, 4.448200225830078, 7.329309463500977, -3.190664291381836, 21.91009521484375, 46.16545486450195, -20.149559020996094, 24.54095458984375, 13.011436462402344, 18.795989990234375, 1.3304367065429688, 5.287815093994141, 59.08251953125, 16.151451110839844, 14.594232559204102, 10.189346313476562, 14.368099212646484, 19.137187957763672, 19.70209503173828, 4.008575439453125, 36.488128662109375, 30.259239196777344, 22.262611389160156, 16.8904972076416, 13.359443664550781, 39.661590576171875, 9.492324829101562, 8.933074951171875, -14.045387268066406, 2.4020519256591797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000231.npy"}
{"epoch": 0.6794117647058824, "step": 232, "batch_size": 128, "mean": 16.91567611694336, "std": 17.309629440307617, "min": -30.687192916870117, "p10": 0.25654869079589854, "median": 14.861255645751953, "p90": 38.692678451538086, "max": 111.04315185546875, "pos_frac": 0.90625, "sample": [4.318355560302734, 30.250701904296875, -21.68277931213379, 2.3005313873291016, 23.177627563476562, 31.709583282470703, 14.498603820800781, 8.957313537597656, 16.225160598754883, 16.919559478759766, -0.585296630859375, 15.977096557617188, 30.69877052307129, 52.02191162109375, 16.527740478515625, 2.3879776000976562, -12.272907257080078, 6.963920593261719, 31.48291778564453, -9.955257415771484, 18.450607299804688, 14.262901306152344, 6.8718719482421875, 1.3474197387695312, 38.458526611328125, 43.512603759765625, 16.217132568359375, 31.443099975585938, 23.823631286621094, 12.439849853515625, 9.622661590576172, -0.5030593872070312, 7.593435287475586, -1.122528076171875, 19.1505126953125, 13.198966979980469, 32.07731628417969, 24.98944091796875, 10.895793914794922, 14.449947357177734, 14.832725524902344, 38.29938507080078, -12.267129898071289, 28.406644821166992, 5.265663146972656, 16.660320281982422, 16.47789764404297, 16.0809326171875, 36.97636413574219, 25.504837036132812, 19.290130615234375, 11.615158081054688, 27.20201873779297, 7.273468017578125, 13.46185302734375, 8.447807312011719, 24.74322509765625, 0.17945480346679688, 29.560821533203125, 21.2015323638916, 10.5318603515625, 41.481666564941406, 39.457244873046875, 18.864364624023438, -1.77130126953125, 15.531753540039062, 29.443363189697266, 4.079460144042969, 26.26726531982422, 9.928497314453125, 18.123985290527344, 3.812286376953125, 2.357757568359375, 11.355575561523438, 5.622577667236328, 39.23903274536133, 12.715118408203125, 3.264373779296875, 21.14849090576172, -0.539093017578125, 8.164566040039062, 10.503396987915039, 4.077674865722656, 27.63250732421875, 42.12830352783203, 18.58685302734375, -5.537422180175781, 15.642425537109375, 10.236854553222656, 9.215003967285156, 29.302078247070312, 46.26513671875, 111.04315185546875, 27.698989868164062, 0.954132080078125, 27.088409423828125, -6.01092529296875, 15.446380615234375, 14.695480346679688, 46.68949890136719, 0.488922119140625, 11.748611450195312, 11.934972763061523, 12.594192504882812, 50.61688995361328, 1.6413803100585938, 9.943679809570312, 61.5863037109375, 15.2484130859375, 5.264488220214844, 10.546417236328125, 48.51716613769531, 14.889785766601562, 16.157962799072266, 50.00785827636719, 6.781454086303711, 0.28958892822265625, 27.778358459472656, -30.687192916870117, 11.469367980957031, 19.748538970947266, 0.4403533935546875, 20.56006622314453, 24.932762145996094, 20.16802978515625, 6.54656982421875, 17.454605102539062, 7.485450744628906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000232.npy"}
{"epoch": 0.6823529411764706, "step": 233, "batch_size": 128, "mean": 18.198204040527344, "std": 15.432136535644531, "min": -20.63111114501953, "p10": 0.004160308837891757, "median": 16.866823196411133, "p90": 39.379313659667964, "max": 66.03140258789062, "pos_frac": 0.8984375, "sample": [29.448883056640625, 38.46912384033203, 17.76119613647461, 5.126350402832031, 6.01087760925293, 3.3787078857421875, 25.276100158691406, 16.885765075683594, 22.48168182373047, 40.87759780883789, 7.221488952636719, 9.041526794433594, 0.7078018188476562, 30.562198638916016, 41.837493896484375, 25.54326629638672, 25.03748321533203, 18.689489364624023, 16.67986488342285, 20.389205932617188, -7.009559631347656, 22.259017944335938, 10.9718017578125, 25.230209350585938, 11.878820419311523, 36.45995330810547, 42.573692321777344, -9.45376968383789, 10.779571533203125, 4.442352294921875, 35.95466613769531, 48.785396575927734, 6.678047180175781, 7.038520812988281, 9.937255859375, 34.360748291015625, 3.8611907958984375, 5.014984130859375, 39.98887634277344, 66.03140258789062, 51.525146484375, 7.710014343261719, 31.95306396484375, 27.8848876953125, 5.776805877685547, 23.57525634765625, 20.653213500976562, 33.936912536621094, -1.7597122192382812, 7.895679473876953, 17.153053283691406, -20.63111114501953, 20.228469848632812, 0.31903076171875, 16.18262481689453, 20.050216674804688, 10.811805725097656, 28.449417114257812, 24.2548828125, -0.7305374145507812, 41.74200439453125, 41.399017333984375, 39.118072509765625, 15.087570190429688, 35.23513412475586, 19.099151611328125, 24.261474609375, 2.9376068115234375, 13.820594787597656, 12.136695861816406, 15.263628005981445, 14.87225341796875, 5.697185516357422, 23.46295928955078, 7.800014495849609, 25.855052947998047, 9.042526245117188, 28.505537033081055, 23.570743560791016, 30.483749389648438, 23.895404815673828, 51.64359664916992, -5.764060974121094, 15.391754150390625, 26.48638153076172, 12.111549377441406, 15.424400329589844, -1.0348663330078125, 5.262107849121094, 28.275650024414062, 19.94610595703125, 12.327449798583984, 32.869056701660156, 3.5490951538085938, 21.101844787597656, 43.73888397216797, 10.806365966796875, 20.756345748901367, 25.180965423583984, 34.60337829589844, 9.202642440795898, 15.87237548828125, 2.3460140228271484, 23.20101547241211, 37.95832061767578, 16.847881317138672, 19.41620635986328, 22.851409912109375, 18.711212158203125, 21.813079833984375, 16.34527587890625, 11.486553192138672, -2.7336807250976562, 9.861457824707031, -6.493721008300781, 57.433074951171875, 12.840950012207031, 35.81452178955078, 11.464521408081055, 2.6391220092773438, -17.950088500976562, -6.129878997802734, 45.26856994628906, 4.145973205566406, 7.281364440917969, -2.28466796875, 5.7171783447265625, -1.967498779296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000233.npy"}
{"epoch": 0.6852941176470588, "step": 234, "batch_size": 128, "mean": 16.718887329101562, "std": 16.32958221435547, "min": -17.012939453125, "p10": -2.0842567443847653, "median": 16.149242401123047, "p90": 36.496730422973634, "max": 74.52590942382812, "pos_frac": 0.8515625, "sample": [31.467857360839844, 42.77845001220703, 74.52590942382812, 24.41598129272461, -1.0674209594726562, -2.9749298095703125, 11.133796691894531, 22.020965576171875, 19.773208618164062, -2.0279769897460938, 62.66729736328125, 28.02850341796875, 19.430892944335938, 22.95758819580078, 16.480344772338867, 49.768798828125, 36.613616943359375, 4.79254150390625, 36.68780517578125, 12.3450927734375, 17.03631591796875, -11.041610717773438, 7.15557861328125, 23.24645233154297, 1.95208740234375, 29.508750915527344, 20.651519775390625, 21.196945190429688, 17.1602783203125, 7.070735931396484, 8.387275695800781, 19.21997833251953, 23.52448272705078, 27.641830444335938, 46.269775390625, 19.517501831054688, 27.530487060546875, 2.165250778198242, 29.94823455810547, 22.230207443237305, -1.4250640869140625, 11.035293579101562, 3.29302978515625, 4.889396667480469, 19.661354064941406, 19.019081115722656, 16.226158142089844, -2.8207359313964844, 20.981304168701172, 12.207763671875, 29.115814208984375, -10.208526611328125, 28.55817413330078, 16.833988189697266, -16.391815185546875, -11.715263366699219, 18.40850830078125, 25.040985107421875, 11.500358581542969, 28.184677124023438, -10.608482360839844, 8.813621520996094, -5.420047760009766, 10.846221923828125, 15.4439697265625, -3.762510299682617, 36.42083740234375, 1.6700668334960938, 24.617721557617188, 9.657684326171875, 43.176239013671875, 13.009552001953125, 15.481765747070312, 8.32292366027832, 12.703948974609375, 40.60121154785156, 16.07232666015625, 1.5001220703125, -1.7444839477539062, 11.457588195800781, 7.4114990234375, 15.296134948730469, 9.417938232421875, 10.208480834960938, 16.521526336669922, 10.947479248046875, 33.643165588378906, 13.479095458984375, 4.951416015625, 57.1884765625, 13.698890686035156, 30.87615966796875, 2.3910274505615234, 4.415504455566406, -2.215576171875, 22.994796752929688, 35.91143798828125, 36.44663619995117, 28.58346939086914, 2.327850341796875, 18.84027862548828, 5.032749176025391, -5.426492691040039, 18.88519287109375, 11.166460037231445, 17.984588623046875, 35.33922576904297, 38.5592041015625, 13.67068099975586, 7.31378173828125, -1.9664459228515625, 2.6002845764160156, 48.95471954345703, 19.26629638671875, -2.3624649047851562, 9.8660888671875, 20.836013793945312, 19.74730682373047, 23.49591064453125, 23.28857421875, 28.320556640625, 22.434608459472656, 67.4346923828125, -17.012939453125, -0.5019378662109375, 5.362861633300781, 4.812591552734375, 8.734642028808594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000234.npy"}
{"epoch": 0.6882352941176471, "step": 235, "batch_size": 128, "mean": 17.595291137695312, "std": 17.10732650756836, "min": -21.92236328125, "p10": -2.003954315185547, "median": 15.783626556396484, "p90": 36.231442260742185, "max": 76.42919158935547, "pos_frac": 0.8671875, "sample": [25.803977966308594, 18.499839782714844, 11.635305404663086, 21.379289627075195, 11.476615905761719, 2.932605743408203, 19.91307830810547, 21.246814727783203, 35.86561584472656, 32.244781494140625, 32.581947326660156, 33.062721252441406, 22.05620574951172, 8.3828125, 31.731590270996094, 10.338119506835938, 31.713125228881836, 3.116109848022461, 5.2576446533203125, -5.129880905151367, 5.853126525878906, -2.0440673828125, 14.115577697753906, 18.50164794921875, 36.20813751220703, -13.314754486083984, -14.789718627929688, 37.97758483886719, -5.198028564453125, 74.00523376464844, 23.26824951171875, 46.094505310058594, 6.438407897949219, -21.92236328125, 11.309913635253906, 32.145172119140625, 12.823463439941406, -9.165267944335938, 33.08030700683594, 8.530326843261719, 38.8372802734375, -1.26422119140625, 24.3416748046875, 42.070396423339844, 34.194671630859375, 7.850685119628906, 9.291463851928711, 32.354034423828125, 69.56987762451172, -7.469017028808594, 12.757957458496094, 11.926216125488281, 15.43017578125, 16.79583740234375, 9.93075180053711, 17.83880615234375, -11.843746185302734, 13.859077453613281, 11.328468322753906, 8.725040435791016, 16.13707733154297, -1.105499267578125, 25.60572052001953, 15.298751831054688, 9.20050048828125, 27.8162841796875, 16.496505737304688, 1.620208740234375, 22.8297119140625, 14.791519165039062, 28.286865234375, 16.139137268066406, 15.30949592590332, 40.58174133300781, -8.379119873046875, 18.370189666748047, 12.77603530883789, 0.6544876098632812, 15.289237976074219, -6.995023727416992, 51.437721252441406, 9.874786376953125, 2.624063491821289, 17.265823364257812, 18.978599548339844, 5.0453643798828125, 18.464744567871094, 11.413394927978516, 18.867916107177734, 36.28582000732422, -3.6466827392578125, -1.9867630004882812, 25.5875244140625, 21.710235595703125, 15.237770080566406, 31.39049530029297, 13.421394348144531, 2.6915740966796875, 21.63573455810547, 20.59283447265625, 12.889381408691406, 11.803260803222656, 23.82111358642578, 2.9032821655273438, 28.336929321289062, 7.098152160644531, 13.491535186767578, 38.657920837402344, 23.25809097290039, -18.003204345703125, 17.56201171875, 31.824129104614258, 5.7316741943359375, 22.149986267089844, 40.87097930908203, 32.911903381347656, 9.332927703857422, 76.42919158935547, 33.0784912109375, 3.2105350494384766, 10.015876770019531, 32.384803771972656, 19.65172576904297, 3.6954803466796875, 63.13753128051758, 34.730613708496094, 29.698633193969727, -0.6430740356445312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000235.npy"}
{"epoch": 0.6911764705882353, "step": 236, "batch_size": 128, "mean": 19.192806243896484, "std": 16.473766326904297, "min": -16.02081298828125, "p10": -0.5423753738403314, "median": 18.52947235107422, "p90": 40.85281753540038, "max": 73.11540222167969, "pos_frac": 0.890625, "sample": [21.54650115966797, -2.1392669677734375, 12.642753601074219, 48.465843200683594, 5.560272216796875, 14.36490249633789, 14.815010070800781, 40.11747741699219, -1.5819072723388672, -4.844329833984375, 26.000064849853516, 20.237560272216797, 46.34764099121094, 0.29559326171875, 32.080650329589844, 35.188438415527344, 17.102699279785156, 46.92718505859375, 22.208097457885742, 30.259681701660156, 14.524993896484375, -5.162727355957031, 4.4814910888671875, 17.80744743347168, 25.1055908203125, 19.791748046875, -3.7568511962890625, 18.219223022460938, 0.6582050323486328, 29.257492065429688, 14.661026000976562, 18.035888671875, 20.289749145507812, 1.2606658935546875, 21.054702758789062, 3.75115966796875, 10.978504180908203, -11.149154663085938, 41.80426788330078, 4.400167465209961, 22.720733642578125, -9.906129837036133, -13.363410949707031, 7.201702117919922, 22.734130859375, 26.58707046508789, 2.3885421752929688, 22.367904663085938, 28.038681030273438, 9.064178466796875, 19.41045570373535, 5.937957763671875, 22.832427978515625, 26.80011749267578, 15.316741943359375, 6.999427795410156, 1.1087532043457031, -0.36604881286621094, 8.834915161132812, 12.616592407226562, 21.00336456298828, 25.5849666595459, 10.502613067626953, 21.588417053222656, 19.61279296875, 0.3704547882080078, 20.7454833984375, 14.892425537109375, 2.007965087890625, 24.10626220703125, -2.7972564697265625, -16.02081298828125, 18.675277709960938, -3.6471023559570312, 13.845489501953125, 29.18244171142578, 35.05499267578125, 17.302553176879883, 31.924896240234375, 37.59088134765625, 29.064067840576172, 46.46580505371094, 73.11540222167969, 2.7505950927734375, 16.604249954223633, 45.54571533203125, 29.2333984375, 24.832870483398438, 40.44505310058594, 14.084861755371094, 54.481292724609375, 10.241645812988281, 25.353416442871094, 1.5606689453125, 15.192573547363281, 30.72293472290039, 18.01734161376953, 6.49847412109375, 25.012361526489258, 19.723779678344727, 38.99040222167969, 10.95949935913086, 15.328155517578125, 57.9869384765625, 19.69647979736328, -1.4253368377685547, 21.554046630859375, 11.589256286621094, 23.078750610351562, 57.810333251953125, 17.35223388671875, 28.3017578125, 26.22846221923828, 41.83393096923828, 15.678977966308594, 34.845916748046875, 18.3836669921875, 58.52558898925781, 64.00534057617188, 21.334121704101562, 33.94561767578125, 9.269523620605469, 1.9918804168701172, 1.7749900817871094, 17.685470581054688, 20.220993041992188, -0.9538040161132812, 31.280193328857422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000236.npy"}
{"epoch": 0.6941176470588235, "step": 237, "batch_size": 128, "mean": 15.437126159667969, "std": 16.13946533203125, "min": -46.06878662109375, "p10": -3.614911651611328, "median": 15.262027740478516, "p90": 38.44012222290039, "max": 62.826576232910156, "pos_frac": 0.84375, "sample": [1.082489013671875, 10.650867462158203, 21.94403839111328, 25.853187561035156, 1.8036880493164062, 28.729537963867188, 7.0044708251953125, 11.166007995605469, 28.92571258544922, 1.198394775390625, 38.855079650878906, 18.45989990234375, -8.470611572265625, 42.320343017578125, 48.284263610839844, 6.130577087402344, -0.02196502685546875, 4.0919647216796875, -4.334362030029297, 8.818885803222656, 38.26228332519531, 2.6353797912597656, 20.085861206054688, 49.154144287109375, -6.606086730957031, 32.52459716796875, 44.27989959716797, 11.913406372070312, -3.4986534118652344, -7.382837295532227, 15.885963439941406, 12.811824798583984, 18.22967529296875, 1.550954818725586, 25.682144165039062, 15.249015808105469, -11.577568054199219, -1.3417587280273438, 26.697189331054688, 2.694957733154297, 15.589351654052734, 10.899843215942383, 21.220046997070312, 15.188720703125, 15.469621658325195, 19.83642578125, -2.4466209411621094, 28.994667053222656, 9.706886291503906, 14.042797088623047, 17.802757263183594, 17.03363037109375, 7.389530181884766, 23.0670108795166, -9.003219604492188, 4.176261901855469, 23.8800048828125, 12.985626220703125, 27.727279663085938, 16.24254608154297, 3.8511619567871094, 17.37933349609375, 13.761774063110352, 15.16366958618164, 6.617156982421875, 7.702663421630859, -1.843252182006836, 46.802146911621094, 42.16886901855469, 20.708282470703125, 56.95735168457031, -1.2783737182617188, 8.894134521484375, 0.2359752655029297, -46.06878662109375, 16.552967071533203, 29.6778564453125, 30.1900634765625, -11.027751922607422, 10.915443420410156, 17.27478790283203, 19.764320373535156, 30.238525390625, 27.62774658203125, 5.4622344970703125, 15.737052917480469, 12.796409606933594, 0.4167823791503906, 15.36949348449707, -3.6072921752929688, 19.91090202331543, 33.94355010986328, 15.275039672851562, 13.896148681640625, 24.666812896728516, 8.591659545898438, 28.95838165283203, 3.1853713989257812, 21.897552490234375, 7.930938720703125, 3.9542484283447266, 18.69245147705078, -8.658660888671875, 24.70709228515625, -4.194894790649414, 40.17432403564453, 21.942764282226562, 10.482048034667969, 42.19786071777344, 14.794914245605469, 0.8594131469726562, 1.6063575744628906, 12.537765502929688, 16.062538146972656, 33.0537109375, 43.68029022216797, -7.772499084472656, -3.6326904296875, 25.217247009277344, 36.091339111328125, 42.3963623046875, 18.85112762451172, 62.826576232910156, -6.1631317138671875, 18.511398315429688, 26.076637268066406, 15.053108215332031, 20.361343383789062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000237.npy"}
{"epoch": 0.6970588235294117, "step": 238, "batch_size": 128, "mean": 16.919872283935547, "std": 16.705856323242188, "min": -16.667457580566406, "p10": -1.4891725540161131, "median": 14.305899620056152, "p90": 38.15982284545898, "max": 77.96701049804688, "pos_frac": 0.8515625, "sample": [-5.8936614990234375, 1.9051322937011719, 34.375267028808594, 5.310297012329102, 19.162521362304688, 17.387107849121094, 37.38804626464844, 1.1247673034667969, 51.0762939453125, 15.964340209960938, 12.937423706054688, -3.222637176513672, 15.314655303955078, -0.9722747802734375, 13.547126770019531, 6.344144821166992, 3.862884521484375, 46.192176818847656, 8.880874633789062, 24.04456329345703, 37.34950637817383, 9.71881103515625, 31.56304168701172, 24.885711669921875, 36.25975036621094, 27.307052612304688, 5.7010498046875, 3.42779541015625, 24.031631469726562, 1.626068115234375, 12.665542602539062, 5.002281188964844, 68.28327178955078, 26.933448791503906, 15.784141540527344, 8.169441223144531, 16.280242919921875, -0.5748519897460938, 40.74586486816406, 19.42325210571289, 27.396223068237305, 20.279510498046875, 54.77960205078125, 8.282276153564453, 6.763689041137695, 20.79753875732422, 3.342630386352539, -3.5719146728515625, 26.925399780273438, 6.224376678466797, 25.940956115722656, -6.834003448486328, 27.85476303100586, 13.840106964111328, 8.835563659667969, -4.077175140380859, -1.5694198608398438, 77.96701049804688, 26.089141845703125, 54.992156982421875, -0.7319488525390625, 4.34661865234375, -5.487207412719727, 34.902442932128906, 21.5416259765625, -0.24507522583007812, -1.5556774139404297, -7.452089309692383, 25.265899658203125, 8.555419921875, 50.246063232421875, 31.26177215576172, 23.46312713623047, 5.4517669677734375, 25.046585083007812, 25.331443786621094, 37.657127380371094, 18.436416625976562, -9.314254760742188, 19.710250854492188, 3.483661651611328, 17.556943893432617, 24.73503875732422, 5.180057525634766, 8.482597351074219, 4.24766731262207, 4.853553771972656, 8.94171142578125, 54.3013916015625, 44.19087219238281, 7.653411865234375, 18.179229736328125, 21.508567810058594, 19.11690330505371, -16.667457580566406, 16.628070831298828, 3.1852378845214844, -4.788051605224609, 18.287796020507812, 24.397350311279297, 22.46532440185547, 25.965187072753906, 30.448368072509766, 29.57793426513672, 9.408025741577148, 6.9666900634765625, 7.863376617431641, 7.831398010253906, 13.908638000488281, 4.146781921386719, 4.789031982421875, -2.2762203216552734, 6.656276702880859, 8.066295623779297, 31.424762725830078, -0.31756591796875, 10.454534530639648, 14.081098556518555, 39.33277893066406, 28.554832458496094, 45.88116455078125, 15.710525512695312, 4.23759651184082, -1.4606704711914062, 42.51408386230469, 33.48283386230469, 14.53070068359375, 8.024625778198242], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000238.npy"}
{"epoch": 0.7, "step": 239, "batch_size": 128, "mean": 15.62121868133545, "std": 16.777061462402344, "min": -34.18206787109375, "p10": -4.058413505554198, "median": 16.218056678771973, "p90": 34.268677139282225, "max": 60.164512634277344, "pos_frac": 0.8125, "sample": [3.176647186279297, 27.048965454101562, 19.74996566772461, -34.18206787109375, 59.09272003173828, 27.228641510009766, 32.4197998046875, 34.5716667175293, 46.59319305419922, 21.617338180541992, 22.62061309814453, 24.088468551635742, 37.38365936279297, 23.518508911132812, 20.922286987304688, 4.595651626586914, -30.667022705078125, -10.366806030273438, 27.911392211914062, 19.365264892578125, 13.281463623046875, 22.92022705078125, 26.649417877197266, 5.899692535400391, -3.87640380859375, 11.38427734375, 42.998687744140625, -22.14674949645996, -6.0405731201171875, 26.191089630126953, 2.1451263427734375, 22.237754821777344, 9.88675308227539, 25.967864990234375, 5.848030090332031, 18.55528450012207, 24.99195098876953, 33.560523986816406, 29.669204711914062, 11.659008026123047, -5.8052978515625, 25.69762420654297, -5.127281188964844, 16.357757568359375, 8.376754760742188, 3.4657135009765625, 0.24621200561523438, 10.371208190917969, 13.97216796875, -3.8151779174804688, 16.91851806640625, 40.61962890625, -10.79083251953125, 26.221359252929688, 27.18033790588379, 26.03173065185547, 34.138824462890625, -3.3092498779296875, 30.636367797851562, 21.176620483398438, -8.284812927246094, 18.472253799438477, 29.447669982910156, 33.085906982421875, 16.32952308654785, 23.17082977294922, -3.5009098052978516, 0.24526596069335938, 15.143165588378906, 13.492874145507812, 15.506675720214844, -0.6575469970703125, 1.041961669921875, 48.936004638671875, 8.122184753417969, 13.350189208984375, -0.4648399353027344, -1.8898773193359375, -7.094429016113281, 29.85529899597168, 17.10796356201172, 57.582122802734375, 25.005756378173828, 15.250106811523438, 22.271316528320312, 15.265892028808594, 2.00732421875, 5.011436462402344, -2.32037353515625, 19.98360824584961, 50.16896057128906, 9.420013427734375, 19.262739181518555, 14.077951431274414, -2.9990921020507812, 23.68499755859375, 3.4151782989501953, 0.3450603485107422, -8.697608947753906, 17.728294372558594, -4.483102798461914, 38.48847961425781, 1.6855602264404297, 24.275924682617188, 16.914535522460938, 24.134231567382812, 9.142555236816406, 12.891471862792969, 13.239824295043945, 5.656158447265625, 60.164512634277344, 23.27446746826172, 33.93079376220703, 15.346088409423828, -0.6886367797851562, -4.6436309814453125, 28.5982666015625, -0.8765640258789062, 18.269630432128906, 36.18132019042969, 30.063514709472656, 8.936515808105469, 48.496360778808594, 16.106590270996094, 3.5391082763671875, 7.690349578857422, 22.52019500732422, 11.747970581054688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000239.npy"}
{"epoch": 0.7029411764705882, "step": 240, "batch_size": 128, "mean": 16.258371353149414, "std": 17.740018844604492, "min": -19.705078125, "p10": -3.7036581039428706, "median": 13.905906677246094, "p90": 37.463996505737306, "max": 89.97163391113281, "pos_frac": 0.84375, "sample": [2.5899581909179688, 24.188064575195312, 34.53502655029297, 5.127861022949219, 10.71240234375, 32.47969055175781, 18.222036361694336, 5.716159820556641, 30.037796020507812, 0.7474708557128906, -3.0242347717285156, -0.904937744140625, -4.024660110473633, 46.71464538574219, 14.252786636352539, 2.4837265014648438, 6.268585205078125, 1.2707672119140625, 4.002738952636719, 36.64703369140625, 28.363311767578125, 3.0104751586914062, 0.40663909912109375, 27.163532257080078, 14.617721557617188, 21.29314422607422, 16.99216079711914, 7.20770263671875, -4.432514190673828, -11.886260986328125, 5.523780822753906, 13.553268432617188, 20.032682418823242, -9.220834732055664, 22.10791778564453, 6.381141662597656, 1.8232803344726562, 36.26826477050781, 33.9677734375, 8.818443298339844, 17.557403564453125, 4.544281005859375, 7.633415222167969, 25.05356216430664, 9.391395568847656, 6.048561096191406, 1.0289154052734375, 27.785205841064453, 12.950920104980469, 36.63468551635742, 41.32600402832031, 10.625099182128906, 0.05637359619140625, -6.924102783203125, 30.196533203125, 14.216537475585938, 20.46161651611328, 8.539237976074219, 37.623714447021484, 40.94740295410156, 16.02484130859375, 32.976776123046875, -2.2806930541992188, 4.5525970458984375, 5.271430969238281, 37.395545959472656, -9.220840454101562, 3.375215530395508, 13.808914184570312, 41.004638671875, 28.66895294189453, 11.465316772460938, -0.03285980224609375, 10.896678924560547, 19.677532196044922, 27.22662353515625, -19.705078125, -0.13512420654296875, 2.7516021728515625, 22.41590118408203, 34.06879425048828, 19.449684143066406, -4.215667724609375, 18.770633697509766, 19.742820739746094, 1.3611679077148438, 14.398910522460938, 24.426612854003906, 9.456697463989258, 7.302730560302734, -2.6512985229492188, -5.993036270141602, 10.938568115234375, 27.95416259765625, 26.17646026611328, 14.460182189941406, 10.850723266601562, 44.025611877441406, 1.4926338195800781, 59.50648498535156, 2.660736083984375, 13.00775146484375, 36.022056579589844, 13.264678955078125, -3.5660858154296875, 9.414939880371094, 21.12530517578125, 12.311811447143555, 22.01580047607422, -10.065750122070312, 25.129608154296875, 40.474334716796875, -8.048873901367188, -15.47149658203125, 33.199398040771484, 24.784557342529297, 89.97163391113281, 28.389812469482422, 38.12547302246094, 44.37864685058594, 17.130569458007812, 25.227962493896484, 14.002899169921875, 44.12206268310547, 35.329803466796875, -7.937889099121094, 21.267501831054688, 81.41213989257812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000240.npy"}
{"epoch": 0.7058823529411765, "step": 241, "batch_size": 128, "mean": 16.893159866333008, "std": 16.425031661987305, "min": -19.362564086914062, "p10": -2.062421989440918, "median": 16.797351837158203, "p90": 33.75359115600585, "max": 80.06610107421875, "pos_frac": 0.84375, "sample": [11.315826416015625, 9.135772705078125, 31.73614501953125, 15.315040588378906, 25.109447479248047, -0.3812103271484375, 4.9795074462890625, 18.557342529296875, 23.437423706054688, 23.18071746826172, 48.048309326171875, 19.320524215698242, 23.081695556640625, 36.9300537109375, 28.01970672607422, 13.086925506591797, 22.862720489501953, -6.5976715087890625, 22.63311767578125, 16.44488525390625, -5.047752380371094, 26.240577697753906, 29.48444366455078, 30.399368286132812, 64.6796875, 8.603736877441406, 9.877464294433594, 22.495986938476562, -2.6800384521484375, 12.914306640625, 6.938068389892578, 13.120208740234375, 26.4461727142334, 30.35395050048828, 26.566768646240234, 3.3556137084960938, -1.75311279296875, 4.137432098388672, 4.475791931152344, 15.679767608642578, 17.08020782470703, 20.75428009033203, 25.349899291992188, 20.341873168945312, 3.5512237548828125, -2.0579147338867188, -13.073667526245117, 29.003395080566406, 23.803985595703125, 18.76293182373047, 21.72441864013672, 53.41035461425781, 4.0247955322265625, -1.1642990112304688, 44.38629150390625, 11.922439575195312, -19.362564086914062, 25.492233276367188, 17.59735107421875, 26.365951538085938, 69.92347717285156, 15.41849136352539, 15.023551940917969, 23.368717193603516, 7.4246063232421875, 9.466907501220703, 24.007020950317383, 8.832748413085938, 39.560760498046875, 12.681350708007812, 17.294906616210938, 16.61200714111328, 47.549781799316406, 28.059391021728516, 27.597204208374023, 3.9793014526367188, 2.625244140625, -2.901580810546875, 6.319854736328125, 32.392250061035156, 21.35418701171875, 22.822433471679688, 8.719581604003906, 12.772270202636719, -1.5335922241210938, 16.895355224609375, 16.610687255859375, 22.01169204711914, 80.06610107421875, 10.372413635253906, -0.6143722534179688, 18.684310913085938, 5.7625579833984375, 16.361900329589844, 23.71546173095703, 21.206958770751953, 2.2317733764648438, 2.3362808227539062, 0.2538642883300781, 6.94053840637207, -4.679759979248047, -7.3297271728515625, 16.69934844970703, 23.157421112060547, -1.5613231658935547, 27.35138702392578, -6.939613342285156, 31.938865661621094, 30.91570281982422, -2.072938919067383, 18.279155731201172, 40.34004592895508, 20.145198822021484, -3.0137252807617188, 1.4089679718017578, 0.44297027587890625, 10.212343215942383, 19.29364013671875, -6.96258544921875, 51.87339782714844, 29.834836959838867, 44.51118469238281, -9.2747802734375, 5.104118347167969, 0.5646286010742188, 19.711639404296875, 20.242889404296875, 39.50701904296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000241.npy"}
{"epoch": 0.7088235294117647, "step": 242, "batch_size": 128, "mean": 19.286924362182617, "std": 17.03950309753418, "min": -8.55844497680664, "p10": 0.37206382751464867, "median": 15.506412506103516, "p90": 38.64726638793945, "max": 81.81086730957031, "pos_frac": 0.90625, "sample": [-2.9293861389160156, 29.465499877929688, 28.575057983398438, 30.76179313659668, 10.156112670898438, 9.026023864746094, 27.106407165527344, 7.4419708251953125, 57.94459533691406, 27.159423828125, 11.280426025390625, 16.085811614990234, 3.225330352783203, 5.560031890869141, 12.447265625, 32.77897644042969, -5.556980133056641, 81.81086730957031, 9.199996948242188, 3.7673873901367188, 9.688201904296875, 27.377410888671875, 22.544082641601562, 32.790138244628906, 31.08909034729004, 12.260719299316406, 16.04175567626953, -1.6583099365234375, 8.002593994140625, 38.72355651855469, 16.459320068359375, 37.94085693359375, 28.73175811767578, 28.948837280273438, 31.900177001953125, 33.406646728515625, 15.282981872558594, -2.6609878540039062, 17.43860626220703, 27.388290405273438, 3.2645492553710938, 33.448150634765625, 32.782203674316406, 14.746574401855469, 9.153003692626953, 48.52601623535156, 2.5886268615722656, 36.18569564819336, 11.580116271972656, 6.738864898681641, 3.0936508178710938, 11.21234130859375, 21.640262603759766, 13.926498413085938, 19.76811981201172, 5.976036071777344, 27.039817810058594, 16.243362426757812, 45.87577819824219, 9.516670227050781, 12.958763122558594, 25.109344482421875, 29.40288543701172, -8.55844497680664, 16.76641082763672, 11.313560485839844, -1.5139541625976562, 18.275236129760742, 5.723213195800781, 1.1558799743652344, 10.209617614746094, 54.766326904296875, 58.76483154296875, -7.384254455566406, 18.899085998535156, 7.3986053466796875, 15.729843139648438, 20.337608337402344, 15.095695495605469, 38.61457061767578, 35.98902893066406, 28.241371154785156, 10.942207336425781, 11.339202880859375, 12.460342407226562, 12.587944030761719, 0.4408302307128906, 5.743556976318359, 9.999296188354492, -8.168380737304688, 4.010124206542969, 14.648666381835938, 44.728187561035156, 31.908065795898438, 10.575233459472656, 10.166648864746094, 38.44422912597656, 18.70888900756836, 13.854515075683594, 19.991586685180664, 22.146442413330078, 39.62054443359375, 12.347179412841797, 9.086395263671875, 81.71282958984375, -1.1024627685546875, 27.678199768066406, 64.3185806274414, -3.6866989135742188, -2.7056427001953125, 2.4490928649902344, 8.94442367553711, 1.5535774230957031, 1.7642669677734375, 17.02521514892578, 21.14873504638672, 51.178436279296875, 0.21160888671875, 14.778125762939453, 24.66668701171875, 38.119873046875, 44.59172058105469, 32.866455078125, -0.7849311828613281, 27.298011779785156, 23.914138793945312, 9.901226043701172, 29.6992130279541], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000242.npy"}
{"epoch": 0.711764705882353, "step": 243, "batch_size": 128, "mean": 20.77956771850586, "std": 17.507761001586914, "min": -15.091205596923828, "p10": -0.9514316558837889, "median": 19.4725399017334, "p90": 42.8874942779541, "max": 72.44296264648438, "pos_frac": 0.890625, "sample": [48.31025695800781, 22.230056762695312, 10.267227172851562, 2.247589111328125, 46.50634765625, 3.0318603515625, 30.612884521484375, 13.383079528808594, -1.1058731079101562, 10.144001007080078, 34.235321044921875, 41.87684631347656, 24.612701416015625, 38.0052490234375, 1.1060562133789062, 52.498260498046875, 25.58106231689453, 9.999748229980469, 16.792831420898438, 6.246185302734375, 53.14201354980469, 32.17928695678711, 7.666069030761719, 20.420608520507812, 13.374034881591797, -14.446701049804688, 42.6259765625, 32.27173614501953, 11.984100341796875, 30.332626342773438, 25.676284790039062, 13.250564575195312, 13.869598388671875, 16.07196044921875, 12.391281127929688, 22.974388122558594, 26.520214080810547, 13.763511657714844, 2.9094619750976562, 56.34808349609375, 21.566650390625, 51.87164306640625, -9.164802551269531, 3.3717899322509766, 27.830650329589844, -4.945335388183594, 16.45281982421875, 17.726533889770508, 24.982009887695312, 29.053443908691406, 44.863006591796875, 14.14007568359375, 10.0911865234375, 15.829708099365234, 6.998996734619141, 42.54804992675781, 72.44296264648438, -0.8852424621582031, -9.397159576416016, 6.8184814453125, 22.437305450439453, 28.84160614013672, 69.57037353515625, 12.696304321289062, 40.51300811767578, 20.101547241210938, 23.701644897460938, 15.012130737304688, 43.3861198425293, 20.78467559814453, 27.441192626953125, 1.8965034484863281, 30.589630126953125, 10.745765686035156, 4.001319885253906, 32.66480255126953, 39.972015380859375, 1.6493301391601562, 3.8133697509765625, -1.9070892333984375, -7.784210205078125, -1.5601043701171875, 32.977783203125, 16.332223892211914, 1.5524406433105469, 32.34063720703125, 41.13493347167969, 39.05280303955078, 29.564380645751953, 34.31243896484375, 15.719413757324219, 13.348756790161133, 8.63273811340332, 30.197845458984375, 8.300056457519531, 15.029596328735352, 21.23044776916504, 16.464492797851562, 24.659469604492188, 30.993927001953125, 31.047958374023438, 12.40130615234375, 9.800010681152344, 42.673797607421875, -11.773946762084961, 18.84353256225586, -2.0448837280273438, 23.943988800048828, 29.269699096679688, 34.266197204589844, 27.3902587890625, 18.809494018554688, -2.2639007568359375, 9.271400451660156, 26.421356201171875, -15.091205596923828, 36.38610076904297, 27.800079345703125, 44.505699157714844, 30.62425994873047, 25.21642303466797, 16.768753051757812, -4.7189483642578125, 2.292682647705078, 52.78826141357422, 7.631649017333984, 1.968902587890625, 63.041603088378906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000243.npy"}
{"epoch": 0.7147058823529412, "step": 244, "batch_size": 128, "mean": 16.80020523071289, "std": 15.967530250549316, "min": -26.515304565429688, "p10": 1.3798217773437509, "median": 14.293794631958008, "p90": 35.55280685424805, "max": 75.28648376464844, "pos_frac": 0.9140625, "sample": [32.01094055175781, 75.28648376464844, 9.703132629394531, 3.155712127685547, 45.21221923828125, 8.319625854492188, 11.638351440429688, 1.612091064453125, 38.214195251464844, 21.44051742553711, 19.43193817138672, 22.48907470703125, 26.711761474609375, 44.992942810058594, 4.316230773925781, 70.12239837646484, 8.64177131652832, 5.2882843017578125, 13.717536926269531, 6.758031845092773, 39.528541564941406, -12.326332092285156, 46.40177917480469, 16.50250244140625, 22.645870208740234, 5.460323333740234, 14.059356689453125, 5.7935333251953125, 27.595287322998047, 32.09735107421875, 15.248918533325195, -5.55718994140625, 33.152748107910156, 3.2630386352539062, 11.446273803710938, 18.415420532226562, 35.405372619628906, 7.289806365966797, 3.81524658203125, 6.16949462890625, 10.085561752319336, 32.734092712402344, -18.429458618164062, 5.258216857910156, 27.430641174316406, 4.208251953125, 27.967979431152344, -0.6027565002441406, 9.841060638427734, 13.692840576171875, 7.877971649169922, -1.9164390563964844, 2.1995697021484375, 25.085098266601562, -1.8937606811523438, 1.6892547607421875, 62.5911865234375, 45.95159912109375, 40.54481887817383, 21.136444091796875, 4.099941253662109, 7.824134826660156, 8.676116943359375, 9.487380981445312, -1.5626144409179688, 23.714719772338867, 23.056121826171875, 16.7176513671875, 44.656036376953125, -3.3358993530273438, 25.397872924804688, 6.021093368530273, 0.837860107421875, 1.8193950653076172, 9.104120254516602, 22.6953125, 9.55211067199707, 2.6587600708007812, 15.373756408691406, 13.552030563354492, 4.4216766357421875, 23.17426300048828, 60.23701477050781, 26.604875564575195, 13.57391357421875, 20.129379272460938, 9.076702117919922, 14.979776382446289, 7.673301696777344, 13.135597229003906, 12.621768951416016, 15.863815307617188, 34.994361877441406, 16.83881950378418, 16.164588928222656, 9.781063079833984, 13.607879638671875, 14.73223876953125, 26.483543395996094, -26.515304565429688, 22.35828399658203, 19.404754638671875, 18.415985107421875, 17.923282623291016, 14.172569274902344, 31.478515625, 19.063199996948242, 32.5120849609375, 16.26013946533203, 10.987794876098633, 3.5488052368164062, 25.64832305908203, 21.8320255279541, 31.32977294921875, 10.901718139648438, 14.415019989013672, 26.14497947692871, -3.771657943725586, 0.4969329833984375, 25.684722900390625, 2.1466598510742188, 25.06146812438965, 3.947824478149414, -4.66351318359375, 9.106246948242188, 23.735015869140625, 17.54071044921875, 35.896820068359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000244.npy"}
{"epoch": 0.7176470588235294, "step": 245, "batch_size": 128, "mean": 17.685142517089844, "std": 19.0830078125, "min": -21.480255126953125, "p10": -2.4746086120605466, "median": 13.937444686889648, "p90": 42.3397834777832, "max": 74.6453857421875, "pos_frac": 0.8515625, "sample": [19.749839782714844, -0.882659912109375, 25.3087158203125, 8.9356689453125, -19.601499557495117, 8.826438903808594, 2.813138961791992, 39.81758117675781, 8.672449111938477, 38.294761657714844, 12.28548812866211, 43.389549255371094, 30.853824615478516, 8.667707443237305, 5.464900970458984, 15.740249633789062, 4.816427230834961, 14.789445877075195, 41.91999816894531, 22.5400390625, 19.19881248474121, 6.196990966796875, 2.586101531982422, 9.891036987304688, 21.474796295166016, -19.437896728515625, 72.4156723022461, 25.919097900390625, 29.577892303466797, 52.70185089111328, 7.542964935302734, 13.496532440185547, 15.370887756347656, 9.933584213256836, -1.1169891357421875, 2.0381927490234375, 4.216796875, 4.954317092895508, 21.247211456298828, 18.087722778320312, 26.793663024902344, -7.651374816894531, 21.65587615966797, 11.997310638427734, 8.465690612792969, -6.374687194824219, 7.079561233520508, 2.354949951171875, 30.353363037109375, 1.7411003112792969, 36.970298767089844, 2.579050064086914, 53.35002136230469, 27.31043243408203, 9.6278076171875, 23.785003662109375, 70.91708374023438, 22.872222900390625, 35.50663757324219, -11.239433288574219, 7.43560791015625, 30.429832458496094, 24.1279296875, -8.215438842773438, 33.435791015625, -4.840797424316406, 39.876953125, -5.9705810546875, 31.977279663085938, -2.3922271728515625, 9.028541564941406, -21.480255126953125, -12.284698486328125, 21.790699005126953, 3.8704071044921875, 1.3077926635742188, 4.134317398071289, 8.394638061523438, -0.0571441650390625, 2.6555423736572266, 37.951438903808594, 53.497039794921875, 19.80596160888672, 20.469722747802734, 39.44702911376953, 27.489688873291016, 74.6453857421875, 0.09072113037109375, 5.586273193359375, 43.90641784667969, 19.045440673828125, -1.678823471069336, 15.14105224609375, 11.921852111816406, 38.555877685546875, 43.31928253173828, 33.609466552734375, -0.9952316284179688, -10.06781005859375, -2.6668319702148438, 24.628028869628906, 10.86701774597168, 7.998504638671875, 10.946739196777344, 3.6090011596679688, 39.474205017089844, 10.633575439453125, 10.67990493774414, 3.07501220703125, 58.238922119140625, 16.656021118164062, 33.90723419189453, 14.37835693359375, 56.496456146240234, 39.339134216308594, 26.682144165039062, 9.728557586669922, 48.1456298828125, -5.714443206787109, 17.853534698486328, 10.508453369140625, 5.898687362670898, 20.52050018310547, 48.31085205078125, 16.43471908569336, 35.2474365234375, 3.385976791381836, 36.6778564453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000245.npy"}
{"epoch": 0.7205882352941176, "step": 246, "batch_size": 128, "mean": 14.712345123291016, "std": 19.447336196899414, "min": -36.50019073486328, "p10": -7.669821166992187, "median": 14.001457214355469, "p90": 39.61369094848633, "max": 83.15858459472656, "pos_frac": 0.7890625, "sample": [27.042739868164062, -11.064373016357422, 24.865005493164062, 16.508270263671875, 47.400245666503906, 1.6541194915771484, 3.1354217529296875, 0.9305477142333984, 9.09881591796875, 23.247941970825195, 14.782333374023438, 11.555511474609375, 16.89154052734375, 11.383993148803711, 21.55221176147461, 13.055885314941406, -8.809288024902344, 7.75067138671875, 7.8361663818359375, 38.869415283203125, 0.05242156982421875, 58.8348388671875, 27.0700740814209, 35.3057861328125, 7.7038116455078125, 51.72397994995117, 5.764715194702148, -8.938186645507812, 48.470611572265625, 21.08441162109375, -1.293975830078125, 25.033676147460938, 10.350494384765625, 17.047683715820312, 8.903518676757812, 18.665496826171875, 31.712432861328125, 13.481330871582031, 19.96661376953125, 17.701236724853516, 10.85539436340332, 10.303300857543945, 17.649307250976562, -9.146902084350586, 12.944953918457031, 7.906106948852539, 17.97290802001953, 29.206558227539062, 14.16754150390625, -7.831298828125, 39.54045104980469, -3.443805694580078, 1.0435333251953125, 32.74620819091797, 8.04736328125, 30.088584899902344, 28.458648681640625, 14.948585510253906, 16.724822998046875, 19.94817352294922, 64.56212615966797, -0.8187713623046875, 10.817138671875, -27.01702117919922, 11.02435302734375, 20.625381469726562, 18.87939453125, -5.189117431640625, 18.504392623901367, -7.534666061401367, 18.9620361328125, 25.093502044677734, -12.04324722290039, 15.65228271484375, 7.263082504272461, 52.841468811035156, -3.1779022216796875, 22.292829513549805, 22.507789611816406, 26.62054443359375, 4.103307723999023, -29.47723388671875, -2.372526168823242, 40.80281066894531, 29.008468627929688, 9.826492309570312, -36.50019073486328, 39.784584045410156, 35.60986328125, 17.488418579101562, 11.898788452148438, 10.098846435546875, 9.18328857421875, -4.242164611816406, -0.13260650634765625, 37.75747299194336, -1.1052131652832031, 21.22270965576172, 22.3958740234375, 25.27972412109375, -18.582687377929688, 24.92196273803711, -26.73956298828125, 83.15858459472656, 43.05838394165039, 13.835372924804688, 48.04095458984375, -21.012924194335938, 8.563499450683594, 8.268558502197266, -5.000244140625, 0.9352951049804688, 25.506240844726562, -7.600616455078125, 16.420516967773438, 8.711942672729492, -4.85540771484375, 30.442779541015625, 10.509132385253906, 19.847618103027344, 27.35387420654297, 8.729087829589844, -3.7704238891601562, 42.823883056640625, -28.604576110839844, 46.81174087524414, 1.9585494995117188, 30.493743896484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000246.npy"}
{"epoch": 0.7235294117647059, "step": 247, "batch_size": 128, "mean": 16.09423828125, "std": 16.45962905883789, "min": -15.8236083984375, "p10": -5.249950790405272, "median": 17.143545150756836, "p90": 35.04369049072265, "max": 68.01132202148438, "pos_frac": 0.8203125, "sample": [20.099267959594727, 13.034337997436523, 28.588043212890625, 18.102630615234375, -2.919464111328125, 36.599273681640625, 19.529312133789062, -0.1168060302734375, 1.4933929443359375, 19.05401611328125, 17.35759735107422, 4.178955078125, 20.378707885742188, 18.636150360107422, 32.75249481201172, 29.69995880126953, 22.984024047851562, 17.75122833251953, 31.91480255126953, 25.34964942932129, 29.044944763183594, 48.587890625, 30.546669006347656, 3.8953475952148438, 17.365509033203125, 14.644309997558594, 21.605173110961914, 15.139205932617188, -1.8826713562011719, 27.283105850219727, -9.070449829101562, -6.5821075439453125, -11.595413208007812, -15.8236083984375, 19.822425842285156, 15.288414001464844, 33.554222106933594, 0.5281982421875, 11.661615371704102, 22.401491165161133, 7.458534240722656, 8.032737731933594, 28.508377075195312, -3.6240081787109375, 13.363845825195312, -2.314037322998047, -2.045989990234375, 3.0642356872558594, -12.941146850585938, 5.9254913330078125, 22.992469787597656, 6.879608154296875, 19.360511779785156, 57.02693176269531, 10.471921920776367, 34.726776123046875, 9.332738876342773, 23.163057327270508, 21.298851013183594, 18.250715255737305, 2.2892398834228516, 20.707733154296875, 22.114044189453125, -10.6070556640625, -1.4827384948730469, 16.929492950439453, 12.166275024414062, 37.71641159057617, 44.483055114746094, 41.456024169921875, 0.10169029235839844, 36.47877502441406, 28.644393920898438, -6.3459625244140625, 23.042945861816406, 44.84088134765625, 7.490333557128906, -13.71359634399414, 7.004127502441406, 19.55108642578125, 18.52161979675293, 16.896305084228516, 32.968727111816406, 32.24974060058594, 32.247161865234375, 13.716346740722656, 6.221305847167969, 55.5799560546875, 21.851669311523438, 9.380897521972656, 7.871440887451172, -7.105354309082031, 35.78315734863281, 11.766891479492188, 23.951675415039062, 5.706012725830078, 23.770954132080078, 6.661476135253906, 20.341947555541992, 25.486347198486328, -7.894784927368164, 20.80145263671875, -4.780231475830078, 68.01132202148438, 34.28919982910156, 8.85997200012207, -0.40740966796875, 45.97742462158203, 11.70263671875, 11.816034317016602, -7.542915344238281, -2.7819137573242188, 26.04157257080078, 2.5365753173828125, -6.479640960693359, -8.239551544189453, 4.41143798828125, 11.126426696777344, 16.5621337890625, 22.444202423095703, 67.63368225097656, 19.072038650512695, 31.00525665283203, 25.241531372070312, 3.16729736328125, 23.661582946777344, 3.4011001586914062, 3.8771209716796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000247.npy"}
{"epoch": 0.7264705882352941, "step": 248, "batch_size": 128, "mean": 15.693302154541016, "std": 18.002286911010742, "min": -23.0745849609375, "p10": -3.6422599792480463, "median": 13.23492431640625, "p90": 38.503821182250974, "max": 72.60635375976562, "pos_frac": 0.8359375, "sample": [9.399177551269531, 0.8845348358154297, 16.59619903564453, 25.403289794921875, 9.307548522949219, 9.973770141601562, -0.7191505432128906, 8.409896850585938, -6.232645034790039, 36.696311950683594, 26.007415771484375, 31.9422607421875, 19.945152282714844, 8.318588256835938, 15.760944366455078, -0.41321563720703125, 11.417022705078125, 0.9543609619140625, 35.94811248779297, 61.62214660644531, 40.55792236328125, -4.1019287109375, 48.3221435546875, 29.555633544921875, 9.001317977905273, 0.5894260406494141, 6.062705993652344, 10.033588409423828, 26.706771850585938, 24.027666091918945, 54.99774932861328, -8.266937255859375, 8.555313110351562, 6.3731536865234375, 10.325843811035156, 17.590240478515625, 8.706634521484375, -23.0745849609375, 23.12591552734375, 12.406255722045898, 11.875564575195312, 45.75770568847656, 50.91627502441406, -11.8720703125, 37.83374786376953, 8.065422058105469, 23.504180908203125, 19.36865234375, 29.557933807373047, 26.12183380126953, 7.342887878417969, 8.837287902832031, 8.170040130615234, 17.686904907226562, -14.115966796875, 13.238937377929688, 19.405502319335938, 21.795860290527344, -8.268394470214844, 30.961700439453125, 33.430877685546875, 49.45256042480469, 72.60635375976562, -0.816864013671875, 4.111480712890625, -0.19537734985351562, 52.0712890625, 22.484588623046875, 15.014137268066406, 31.767913818359375, 14.593460083007812, 21.55101776123047, -3.15313720703125, 17.117584228515625, 18.49493408203125, 7.126071929931641, 15.984710693359375, 20.097640991210938, 37.950592041015625, 18.43608283996582, 11.4256591796875, 15.127410888671875, 0.10829925537109375, -16.864398956298828, -3.4452590942382812, 7.9202423095703125, 7.447471618652344, 10.240543365478516, 56.0672607421875, 33.91929626464844, 2.7420272827148438, -20.61083984375, 22.222015380859375, 18.28079605102539, 9.213638305664062, 6.52398681640625, 32.192771911621094, -20.916168212890625, 19.298418045043945, 5.842235565185547, 16.83526611328125, 14.646987915039062, 30.957351684570312, 1.6568145751953125, 57.86577606201172, 24.975921630859375, 2.6952438354492188, 16.01457977294922, 12.402153015136719, 2.1496124267578125, 16.85662841796875, 5.715812683105469, -8.291351318359375, -15.090095520019531, 31.18648338317871, -9.804061889648438, 13.230911254882812, 42.62156677246094, 30.667144775390625, 0.88232421875, 33.467655181884766, 39.7946891784668, -2.0844802856445312, 2.2720565795898438, 30.685882568359375, -2.9760589599609375, 3.8372116088867188, 1.8087921142578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000248.npy"}
{"epoch": 0.7294117647058823, "step": 249, "batch_size": 128, "mean": 14.44882869720459, "std": 18.635433197021484, "min": -21.513778686523438, "p10": -8.401188659667968, "median": 12.972932815551758, "p90": 41.21381530761719, "max": 71.85580444335938, "pos_frac": 0.7578125, "sample": [-7.571794509887695, 50.389801025390625, 20.92523956298828, 41.165626525878906, 1.7806110382080078, 5.011131286621094, 49.55369567871094, -8.837532043457031, 51.560020446777344, 41.54832458496094, 16.21178436279297, 10.44598388671875, 7.8358612060546875, 1.4380950927734375, 23.193767547607422, -6.9567413330078125, 7.744071960449219, 22.79454803466797, 5.674917221069336, 54.77853012084961, 8.426895141601562, 57.289520263671875, 15.008119583129883, 4.71875, 27.60289764404297, 13.105575561523438, 16.2188720703125, 3.1021347045898438, 24.488136291503906, 22.380985260009766, 6.990970611572266, -4.169921875, 11.15846061706543, -1.495382308959961, 4.7432098388671875, 17.082630157470703, 40.04899597167969, 19.54144287109375, 3.4928855895996094, -13.034435272216797, -5.519439697265625, 9.118082046508789, -1.4863471984863281, -7.906341552734375, 15.976165771484375, -15.466083526611328, 52.61283874511719, 18.089462280273438, 16.64507293701172, 28.77300262451172, -0.7053298950195312, 71.85580444335938, -2.325481414794922, 23.586748123168945, 29.04079246520996, -8.321784973144531, -4.573638916015625, 27.32050323486328, -16.91063690185547, 4.260124206542969, 35.09435272216797, -14.2349853515625, 25.780349731445312, -8.586463928222656, -2.037027359008789, 18.29673957824707, 15.92805290222168, -8.942253112792969, 19.336883544921875, 8.673721313476562, 23.03192138671875, 37.77896499633789, 16.021682739257812, 33.53145980834961, 20.58514404296875, 49.93633270263672, 27.600465774536133, 24.030059814453125, -21.513778686523438, -8.640115737915039, -9.822021484375, 50.86986541748047, 5.874580383300781, -2.9599761962890625, 3.7761154174804688, 14.77691650390625, 12.738723754882812, 37.24324035644531, 26.77486801147461, 32.616886138916016, 8.383411407470703, 8.132209777832031, 18.667556762695312, 9.943214416503906, -17.77490234375, 3.462677001953125, 13.968280792236328, 10.845039367675781, 18.00973892211914, 20.18218994140625, 24.576684951782227, 13.077800750732422, -15.99422836303711, 40.08759689331055, -7.9961700439453125, 41.326255798339844, -0.7135734558105469, -1.9507217407226562, 0.3552513122558594, 8.068399429321289, 8.731264114379883, 14.420551300048828, 23.822418212890625, 9.906692504882812, 23.6566162109375, 1.7052879333496094, 57.610652923583984, 14.941184997558594, -4.594696044921875, 21.774776458740234, 33.91326904296875, 9.065399169921875, 44.15068817138672, 12.868064880371094, -8.798639297485352, 8.71392822265625, 22.153900146484375, -2.258930206298828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000249.npy"}
{"epoch": 0.7323529411764705, "step": 250, "batch_size": 128, "mean": 16.12694549560547, "std": 19.285907745361328, "min": -38.40741729736328, "p10": -2.9587200164794916, "median": 15.089258193969727, "p90": 42.23368606567382, "max": 88.7288818359375, "pos_frac": 0.8359375, "sample": [49.369056701660156, 13.482025146484375, 8.020147323608398, -20.62584114074707, 10.981925964355469, 18.536514282226562, 16.948501586914062, 16.171058654785156, 37.51824188232422, 7.3217315673828125, 58.21397399902344, 11.288055419921875, 14.806268692016602, 2.2021484375, 23.241737365722656, 11.160118103027344, 47.477806091308594, 17.191070556640625, 14.659469604492188, 13.640884399414062, 30.2581787109375, 21.286407470703125, 25.028427124023438, -38.40741729736328, 5.071380615234375, 11.657947540283203, 21.61904525756836, 44.77508544921875, 11.829887390136719, 31.75617218017578, 3.6686344146728516, 15.31441879272461, -1.9632186889648438, -36.072479248046875, 3.0461654663085938, 28.038169860839844, 0.13916778564453125, 21.8465576171875, -4.260002136230469, 14.864097595214844, 17.70294189453125, 17.829849243164062, 18.59734535217285, 12.639656066894531, 7.111948013305664, 18.530723571777344, 11.195699691772461, -2.3209800720214844, -0.6359424591064453, 25.098297119140625, 12.870979309082031, 40.47508239746094, 18.754196166992188, 24.918045043945312, 25.2445068359375, 17.62995147705078, 29.148391723632812, 21.307968139648438, -23.633773803710938, -20.78607940673828, -3.509716033935547, 19.615806579589844, -1.3659019470214844, 88.7288818359375, 6.388587951660156, 2.9085159301757812, -11.945556640625, 65.22299194335938, 6.45220947265625, 36.64173126220703, 44.821075439453125, 27.649879455566406, 17.766815185546875, -1.4700698852539062, 3.922210693359375, 13.812820434570312, 14.26156997680664, -0.853546142578125, 27.08934783935547, 41.73859405517578, 9.013710021972656, 18.568050384521484, -30.71680450439453, 34.15772247314453, 17.65298080444336, 28.887367248535156, 29.648456573486328, -12.370994567871094, -1.3687210083007812, 37.0506591796875, 44.05353546142578, 3.8585586547851562, 28.232986450195312, 5.87187385559082, 19.810897827148438, 30.318740844726562, 15.829641342163086, 26.28271484375, 27.5706787109375, 26.75537109375, 31.408485412597656, 10.007440567016602, -3.4231338500976562, 0.7401199340820312, 2.8733978271484375, 48.43225860595703, 17.53289031982422, 47.94523620605469, 19.560623168945312, -11.875856399536133, 8.026557922363281, 46.739837646484375, 1.4040546417236328, 20.433624267578125, 12.3377685546875, 52.67182159423828, 6.12823486328125, -7.968315124511719, 43.38890075683594, 10.902191162109375, 13.835800170898438, 7.751708984375, 12.138481140136719, 3.6486968994140625, -2.759685516357422, 40.757530212402344, 6.047050476074219, 15.799568176269531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000250.npy"}
{"epoch": 0.7352941176470589, "step": 251, "batch_size": 128, "mean": 18.680957794189453, "std": 16.407846450805664, "min": -14.519866943359375, "p10": 1.1391086578369143, "median": 16.834213256835938, "p90": 34.13709564208984, "max": 82.99658203125, "pos_frac": 0.921875, "sample": [12.61184310913086, 24.861351013183594, 30.662086486816406, -14.519866943359375, 25.032466888427734, 21.544532775878906, 5.125240325927734, 35.89910888671875, 32.30548095703125, 11.817798614501953, 24.93471336364746, 34.640167236328125, 26.282135009765625, 30.845932006835938, 9.603004455566406, 19.913488388061523, 13.777629852294922, 11.107215881347656, 9.209136962890625, 82.99658203125, 9.84951400756836, 29.584487915039062, 26.476295471191406, 22.201568603515625, 14.612531661987305, 0.738800048828125, 17.841304779052734, 13.441658020019531, 10.846590042114258, -8.312034606933594, 5.727668762207031, 8.895500183105469, 5.742073059082031, -4.843544006347656, 23.910133361816406, 9.446470260620117, 19.37822151184082, 11.677789688110352, 49.86439514160156, 24.25342559814453, 29.754682540893555, 16.761337280273438, 1.4341812133789062, 16.907089233398438, 29.75531768798828, 21.30876350402832, -9.17889404296875, 4.297981262207031, 52.020301818847656, 28.702072143554688, 10.163185119628906, 5.502105712890625, 39.01029968261719, 10.675155639648438, -5.301723480224609, 19.619178771972656, 23.543596267700195, 26.80652618408203, 23.635116577148438, 16.549503326416016, 33.92149353027344, 43.75318908691406, 33.4527587890625, 11.553871154785156, 2.0066471099853516, -6.709909439086914, 7.791948318481445, 42.18235778808594, 4.05645751953125, 14.428901672363281, 0.7270488739013672, -9.378021240234375, 14.813407897949219, 1.1734809875488281, 9.600570678710938, 9.776321411132812, 9.797531127929688, 8.436601638793945, 23.46661376953125, 33.152740478515625, 14.720260620117188, 1.0589065551757812, 52.350013732910156, 22.274913787841797, 10.742095947265625, 21.401458740234375, -8.716171264648438, 13.220081329345703, 31.518409729003906, 9.384033203125, 28.627365112304688, 12.746528625488281, 16.259998321533203, 3.34930419921875, 65.16853332519531, 1.7327117919921875, 49.15773391723633, 22.180355072021484, 33.54051208496094, 26.635345458984375, 24.30597686767578, 21.26462745666504, 51.79482650756836, -1.6606903076171875, 7.747589111328125, 19.521133422851562, 9.203208923339844, 2.714731216430664, 30.906875610351562, 31.35435676574707, 10.230514526367188, 80.38932037353516, 25.934337615966797, 4.464317321777344, 8.019367218017578, 22.0902099609375, 30.642772674560547, 22.942840576171875, 30.608169555664062, 7.229923248291016, 25.323333740234375, 4.6093597412109375, 32.817840576171875, 6.823301315307617, 23.524505615234375, 25.675024032592773, -3.211456298828125, 18.591232299804688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000251.npy"}
{"epoch": 0.7382352941176471, "step": 252, "batch_size": 128, "mean": 18.97698974609375, "std": 16.24271583557129, "min": -12.963724136352539, "p10": -1.6676216125488277, "median": 19.41049575805664, "p90": 39.568932342529294, "max": 73.8720474243164, "pos_frac": 0.875, "sample": [9.603721618652344, -6.029228210449219, 21.121200561523438, 5.82684326171875, 13.36163330078125, -10.433927536010742, 26.44611358642578, 1.4373531341552734, 19.180850982666016, 19.219497680664062, 18.928421020507812, 38.374107360839844, 21.772964477539062, 30.297119140625, 3.1514358520507812, 14.465190887451172, -1.5373764038085938, 25.101539611816406, -0.009365081787109375, 29.42443084716797, 22.964157104492188, 43.45271301269531, 26.44769287109375, 35.16314697265625, 27.377685546875, 41.3199577331543, 13.071910858154297, 10.60146713256836, 24.8260498046875, 50.9639892578125, 28.87091064453125, 13.466667175292969, -4.151304244995117, 21.922006607055664, 2.483612060546875, 26.527854919433594, 17.914962768554688, 20.244441986083984, 27.91216278076172, 32.47856140136719, 20.2503662109375, 27.4298095703125, -12.963724136352539, 27.15688705444336, 7.546112060546875, 17.117523193359375, 21.584129333496094, -8.238235473632812, 13.196487426757812, 13.930099487304688, 44.854278564453125, 39.27141189575195, -10.108749389648438, 13.730087280273438, 17.822490692138672, 3.773468017578125, 8.699825286865234, -0.516876220703125, 19.993614196777344, 0.5765228271484375, -11.280160903930664, 27.226659774780273, 19.008697509765625, 7.860694885253906, 24.214942932128906, 20.086524963378906, 20.877601623535156, 13.096439361572266, -1.971527099609375, 44.218910217285156, 25.00865936279297, -3.6778335571289062, 57.8986701965332, 21.653732299804688, -4.514225006103516, 17.0269775390625, 19.60149383544922, 12.768714904785156, 11.009864807128906, 26.35906982421875, 4.2344512939453125, 32.391441345214844, 35.16143798828125, 12.578697204589844, 56.731597900390625, 34.921142578125, -2.1124649047851562, 21.236358642578125, 16.34126091003418, 4.3052520751953125, 23.362380981445312, 12.808052062988281, 48.75537872314453, 32.53806686401367, 6.0335693359375, 31.497268676757812, 39.459442138671875, 51.984893798828125, 73.8720474243164, 4.226692199707031, 18.63043975830078, 20.408960342407227, -3.8494873046875, 16.42547607421875, 13.108617782592773, 2.0619277954101562, 6.0553741455078125, 5.8941650390625, 20.692703247070312, 8.283737182617188, -12.928852081298828, 23.430015563964844, 24.783729553222656, 55.93260955810547, 13.084526062011719, 0.7195205688476562, 50.98088836669922, 33.125885009765625, 14.934030532836914, 20.654632568359375, 22.143081665039062, 24.469261169433594, 28.12958526611328, 9.206382751464844, 34.79173278808594, 21.513404846191406, 39.82440948486328, 1.08062744140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000252.npy"}
{"epoch": 0.7411764705882353, "step": 253, "batch_size": 128, "mean": 15.225152969360352, "std": 17.24178695678711, "min": -16.99620819091797, "p10": -4.7354587554931635, "median": 12.964988708496094, "p90": 39.640911102294915, "max": 68.81841278076172, "pos_frac": 0.8515625, "sample": [0.7343978881835938, 32.053741455078125, 4.6860198974609375, 35.62908935546875, -4.6449737548828125, 56.686424255371094, -5.311271667480469, 13.797164916992188, 11.421112060546875, 18.231311798095703, 18.99980926513672, 4.349128723144531, 23.448776245117188, 16.87824249267578, 28.878082275390625, 12.638290405273438, 9.889362335205078, -13.835365295410156, 25.376523971557617, 22.622817993164062, 24.03589630126953, 9.222557067871094, 5.549415588378906, 6.487480163574219, 5.087745666503906, 68.81841278076172, 19.328536987304688, 4.528938293457031, -11.5145263671875, 51.84088134765625, 16.16808319091797, -13.953773498535156, 47.38407897949219, 12.941329956054688, 21.125835418701172, 14.830215454101562, 38.55583953857422, 3.050586700439453, 8.953788757324219, 28.646270751953125, -14.4217529296875, 5.641168594360352, 1.8513355255126953, 42.589630126953125, 21.023231506347656, 57.467010498046875, 0.9514579772949219, 11.112907409667969, 17.623207092285156, -16.99620819091797, 3.0402679443359375, 10.022228240966797, 10.4254150390625, 10.095664978027344, 43.256927490234375, 35.169189453125, 24.53668212890625, -14.125, 8.964553833007812, 8.118206024169922, 6.198577880859375, -4.946590423583984, 10.721923828125, -2.9547691345214844, -13.690902709960938, 15.505905151367188, 14.326171875, 4.1214141845703125, 2.674863815307617, 42.17274475097656, -7.431331634521484, 34.302947998046875, 20.799861907958984, 15.831268310546875, 21.056076049804688, 14.732311248779297, 13.977920532226562, 12.9886474609375, 28.681568145751953, 7.468681335449219, 5.7952423095703125, 0.7132587432861328, 9.125679016113281, -8.701461791992188, 42.44253921508789, 14.420478820800781, 16.202850341796875, 49.22752380371094, 42.79547119140625, 19.808517456054688, 10.097206115722656, 1.4444904327392578, 0.7458114624023438, 29.513896942138672, 55.454498291015625, 11.759246826171875, 28.325653076171875, 15.695014953613281, 31.777076721191406, 12.27899169921875, 9.327812194824219, 4.447509765625, -1.4845600128173828, 27.726539611816406, 60.95043182373047, -4.056783676147461, -2.147247314453125, 32.862640380859375, 13.868751525878906, -10.68603515625, 37.678062438964844, 9.73484992980957, 31.631498336791992, 28.028701782226562, 17.787124633789062, 7.316486358642578, -4.081636428833008, 16.347129821777344, 12.687980651855469, 13.89828872680664, 6.217212677001953, 0.22743988037109375, 27.236572265625, 4.4406585693359375, 14.784660339355469, 25.413658142089844, 15.02163314819336, -11.781490325927734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000253.npy"}
{"epoch": 0.7441176470588236, "step": 254, "batch_size": 128, "mean": 16.885009765625, "std": 16.615039825439453, "min": -30.354690551757812, "p10": -2.2976531982421866, "median": 15.647769927978516, "p90": 34.47418212890625, "max": 72.77855682373047, "pos_frac": 0.8828125, "sample": [14.745397567749023, 3.668975830078125, -2.0737838745117188, 7.7340240478515625, 49.249481201171875, 9.864822387695312, 22.983184814453125, 32.750640869140625, 7.9731903076171875, -20.406005859375, 11.257713317871094, 50.86299133300781, 15.563461303710938, 24.186492919921875, 0.8109359741210938, 31.576942443847656, -2.8200149536132812, -10.443693161010742, 20.631500244140625, 43.33815002441406, 3.4168319702148438, 54.66340637207031, 12.226314544677734, 6.19798469543457, 24.9156494140625, 14.87481689453125, 24.94357681274414, 10.09210205078125, 0.5841598510742188, 10.853233337402344, 11.414474487304688, 35.2957763671875, 1.3579254150390625, 35.75224304199219, 30.543548583984375, 23.653175354003906, 20.870315551757812, 28.006528854370117, 35.72430419921875, 14.826858520507812, 18.19091796875, 31.928787231445312, 21.39105987548828, 23.459259033203125, -3.3174896240234375, 5.0779266357421875, 26.934646606445312, 14.203857421875, 15.22840690612793, 8.412254333496094, 28.883255004882812, 3.0145416259765625, 30.430191040039062, 21.09527587890625, 19.4464111328125, 16.449813842773438, 25.12084197998047, 24.421730041503906, 1.1070556640625, 4.449705123901367, 9.012100219726562, 0.36769866943359375, 15.245996475219727, 7.225275039672852, 12.925994873046875, 24.32568359375, -30.354690551757812, 16.49842071533203, 10.613052368164062, -6.571990966796875, 0.7623329162597656, 43.453094482421875, 19.121753692626953, 20.311752319335938, 36.161231994628906, 26.970458984375, 3.9707908630371094, 7.7805328369140625, 10.643142700195312, 16.814075469970703, 1.6619720458984375, -1.8551559448242188, 49.72996520996094, 3.125396728515625, 8.136489868164062, 31.469518661499023, 12.37346076965332, 15.732078552246094, 28.329627990722656, 31.219940185546875, 72.77855682373047, 23.110153198242188, -15.668266296386719, 61.46078872680664, 23.727733612060547, 10.29677963256836, 22.927978515625, 29.917041778564453, 33.239044189453125, 23.240005493164062, 5.57989501953125, 50.22099304199219, 33.274234771728516, 26.306808471679688, 27.764205932617188, 15.751388549804688, 7.485157012939453, -3.9056625366210938, -3.4700851440429688, 30.1148681640625, -7.7870941162109375, 23.251914978027344, 33.28704071044922, 24.790077209472656, -10.046234130859375, 1.9282379150390625, 34.1220703125, 13.965644836425781, 13.452753067016602, 13.667770385742188, 0.202239990234375, 9.331741333007812, 15.887367248535156, 33.44208908081055, -19.267333984375, 4.1278228759765625, -4.043342590332031, 34.020843505859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000254.npy"}
{"epoch": 0.7470588235294118, "step": 255, "batch_size": 128, "mean": 19.29434585571289, "std": 20.720367431640625, "min": -31.082931518554688, "p10": -4.481972122192381, "median": 15.685585021972656, "p90": 47.32562141418457, "max": 85.7239761352539, "pos_frac": 0.8203125, "sample": [-7.422664642333984, 21.989421844482422, -1.56243896484375, 24.5169677734375, -3.8658905029296875, 30.48504638671875, 23.802764892578125, 19.455657958984375, 56.33978271484375, 2.6778030395507812, -28.01055908203125, 4.964191436767578, 61.394004821777344, 23.32349395751953, 20.4417724609375, 13.805667877197266, 7.928993225097656, -5.474723815917969, -13.990921020507812, 36.79383850097656, 16.858535766601562, 21.668350219726562, 13.223312377929688, -1.3450241088867188, 15.804367065429688, 28.31341552734375, -1.0480117797851562, 2.0060882568359375, 29.853538513183594, -10.000450134277344, 28.63823699951172, 31.767499923706055, 72.52883911132812, 24.624340057373047, 29.457962036132812, -0.09674835205078125, 15.544708251953125, 30.549081802368164, 7.7475128173828125, 4.587005615234375, 22.77975082397461, 5.497888565063477, 11.459197998046875, 26.783374786376953, 14.768363952636719, 27.454345703125, 13.234603881835938, -7.724945068359375, -5.793689727783203, 14.558601379394531, 10.611114501953125, 10.098678588867188, 23.990615844726562, -31.082931518554688, 52.82311248779297, 61.69532012939453, 11.647544860839844, 15.566802978515625, 12.459503173828125, 8.780288696289062, 36.046051025390625, 34.5552978515625, 26.992313385009766, 35.31029510498047, -13.386333465576172, 16.435394287109375, 85.7239761352539, 40.084190368652344, 4.236289978027344, 11.553627014160156, 56.091522216796875, 10.998916625976562, 34.728782653808594, -18.367950439453125, -17.62459373474121, 35.308692932128906, 18.560569763183594, 19.933115005493164, 9.23968505859375, -9.888275146484375, -4.056507110595703, 29.34668731689453, -0.9159393310546875, 37.54925537109375, 38.650787353515625, -1.7150344848632812, 1.4732589721679688, 24.68128204345703, 46.70649719238281, 28.796401977539062, 45.477813720703125, 5.437257766723633, 34.79998779296875, 29.87126922607422, 10.328346252441406, 62.87706756591797, 31.219390869140625, 27.631799697875977, 48.23961639404297, 9.045623779296875, -7.013246536254883, 15.498138427734375, 37.46862030029297, 9.534059524536133, -1.2639999389648438, 11.563730239868164, 54.344581604003906, 40.599098205566406, 47.21106719970703, 20.580535888671875, 61.09471130371094, 10.644935607910156, 13.424674987792969, 15.332683563232422, 36.823028564453125, 34.79950714111328, -4.022346496582031, 0.7020187377929688, 28.685455322265625, 56.933189392089844, 8.05984878540039, 7.3654022216796875, 1.7067852020263672, 11.884834289550781, 22.46484375, 47.59291458129883, 8.520469665527344, 9.282096862792969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000255.npy"}
{"epoch": 0.75, "step": 256, "batch_size": 128, "mean": 18.5269718170166, "std": 18.28528594970703, "min": -17.150917053222656, "p10": -2.5126657485961914, "median": 15.910869598388672, "p90": 42.69220962524413, "max": 74.76005554199219, "pos_frac": 0.828125, "sample": [6.007362365722656, 70.1729507446289, -8.410697937011719, 21.71946144104004, 10.576349258422852, 29.158397674560547, 17.171188354492188, 61.54651641845703, -8.21844482421875, 36.21642303466797, 7.38812255859375, 34.41258239746094, 46.71757125854492, 24.033409118652344, -1.6902008056640625, 26.373119354248047, -3.135831832885742, 31.168167114257812, 23.36302947998047, 30.763687133789062, 26.569725036621094, 41.06126403808594, 6.773902893066406, 13.967279434204102, -3.7957420349121094, 22.91246795654297, 2.0619964599609375, 13.160308837890625, 33.99755096435547, 22.35213851928711, 18.23680877685547, 16.131500244140625, 7.9690399169921875, 18.391408920288086, 32.660621643066406, 9.334869384765625, 53.643394470214844, 30.522262573242188, 6.962396621704102, 46.38124084472656, 11.738819122314453, 11.687362670898438, -10.528305053710938, 30.106124877929688, 3.8340835571289062, 39.19396209716797, 20.445480346679688, 8.951202392578125, 27.52799415588379, 11.467056274414062, 13.895946502685547, -2.854583740234375, -5.543283462524414, 14.657821655273438, -2.5076217651367188, -7.004634857177734, 15.949092864990234, 11.784317016601562, 65.22097778320312, 19.078994750976562, 5.035984039306641, 22.2352294921875, 20.121076583862305, 44.75421142578125, 32.1962890625, 15.090412139892578, 6.056632995605469, -9.15203857421875, 3.9669227600097656, -17.150917053222656, 16.632843017578125, 2.6310653686523438, 10.366012573242188, 26.35186004638672, 17.103286743164062, -0.24628639221191406, -1.9122390747070312, 14.843368530273438, -2.111621856689453, 51.22166442871094, 19.143341064453125, 39.6612548828125, -8.830085754394531, 25.530052185058594, 6.606285095214844, 11.2086181640625, 10.434555053710938, 48.1678466796875, -2.524435043334961, 47.00575256347656, 10.935043334960938, 9.462520599365234, 19.04851531982422, 8.457008361816406, 19.389877319335938, 15.87264633178711, 7.975927352905273, 2.310832977294922, 13.10498046875, 32.80207061767578, -0.6572742462158203, 21.66063117980957, -13.053787231445312, 11.696941375732422, 55.21897888183594, 13.782119750976562, 10.955841064453125, 18.446792602539062, -1.4783401489257812, 31.840225219726562, 28.211490631103516, 41.518157958984375, 16.802597045898438, 59.20947265625, 40.065574645996094, 40.26167297363281, 4.465087890625, 23.002761840820312, 8.771415710449219, 6.574348449707031, 32.752037048339844, -0.4622802734375, 30.975738525390625, 3.3651962280273438, -1.5302238464355469, 74.76005554199219, 41.808494567871094, 26.964019775390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000256.npy"}
{"epoch": 0.7529411764705882, "step": 257, "batch_size": 128, "mean": 15.16891860961914, "std": 16.51055335998535, "min": -35.12504577636719, "p10": -6.628954505920409, "median": 13.670890808105469, "p90": 37.62231178283691, "max": 57.74974822998047, "pos_frac": 0.8125, "sample": [13.692878723144531, 2.6469955444335938, 37.22935485839844, -0.6789913177490234, -9.349189758300781, 28.815322875976562, 1.9850749969482422, 38.80400848388672, 10.002317428588867, 6.609222412109375, 24.03900146484375, 24.675926208496094, 4.9656219482421875, 23.297882080078125, 43.255462646484375, 27.665618896484375, 39.33396911621094, 11.787307739257812, 16.107223510742188, 0.421844482421875, 30.331459045410156, -8.576614379882812, 13.648902893066406, 35.509124755859375, 35.75836944580078, 29.662582397460938, 10.593040466308594, -1.7750396728515625, 26.599517822265625, 8.896533966064453, 5.281990051269531, 20.504104614257812, -16.891769409179688, 21.16889190673828, 17.205795288085938, 12.178550720214844, 26.754371643066406, 24.388996124267578, 5.967172622680664, 39.716392517089844, 31.653894424438477, 9.831968307495117, -4.558309555053711, 18.072402954101562, 27.908645629882812, 17.344951629638672, 24.616897583007812, -2.3318443298339844, 0.9685077667236328, 6.2645416259765625, 32.212921142578125, -12.3857421875, 40.51020050048828, 1.888458251953125, 9.944160461425781, 8.330665588378906, 6.129913330078125, 21.95452117919922, 4.6480560302734375, 22.80457305908203, -35.12504577636719, 20.559364318847656, 12.159675598144531, 30.796180725097656, -7.782611846923828, 6.094139099121094, 1.689239501953125, -7.292936325073242, 12.537628173828125, 10.781051635742188, 34.19179916381836, 14.824981689453125, 9.131423950195312, 27.50936508178711, 38.68536376953125, 41.92628479003906, -9.104211807250977, 18.962860107421875, 17.656307220458984, -1.3484249114990234, 21.956039428710938, 27.621009826660156, 5.698368072509766, -0.5961837768554688, 34.80804443359375, -11.274017333984375, 42.742591857910156, -13.337018966674805, 17.337732315063477, 44.913299560546875, 14.476448059082031, 22.450027465820312, 17.27916717529297, 36.12822723388672, 57.74974822998047, -0.0054721832275390625, 57.514251708984375, 38.53921127319336, 2.8879947662353516, 23.0173397064209, -7.9614105224609375, 28.714599609375, 24.70355224609375, 7.2249298095703125, 6.626426696777344, -6.344390869140625, 13.475196838378906, -5.49310302734375, 22.297882080078125, 4.30174446105957, -1.2145957946777344, 32.98900604248047, 25.769256591796875, -1.8100357055664062, -9.165576934814453, 5.490997314453125, 23.0546875, -7.568672180175781, 7.61077880859375, 3.5319747924804688, 10.316207885742188, 14.861274719238281, 8.992790222167969, 10.06787109375, 47.37273406982422, 17.51218032836914, 6.451351165771484, 35.01612854003906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000257.npy"}
{"epoch": 0.7558823529411764, "step": 258, "batch_size": 128, "mean": 15.991632461547852, "std": 16.1535587310791, "min": -14.348793029785156, "p10": -4.023241233825684, "median": 13.17032241821289, "p90": 41.354104614257814, "max": 61.963714599609375, "pos_frac": 0.84375, "sample": [11.512920379638672, 19.696582794189453, 13.419719696044922, 61.963714599609375, -8.720741271972656, -6.009765625, 30.77880096435547, -0.6019363403320312, 24.66999053955078, 24.85674285888672, 36.42637634277344, -5.382335662841797, 44.62322998046875, -2.5904884338378906, 30.63214874267578, 41.36993408203125, 1.976694107055664, 12.797910690307617, 24.0584716796875, 51.58842086791992, 5.0639190673828125, 1.2876968383789062, 37.30937194824219, 5.354270935058594, 5.146785736083984, -0.09656524658203125, 21.12790298461914, 48.759429931640625, -12.289981842041016, -0.11450576782226562, 9.298900604248047, 22.57762908935547, 4.300605773925781, 4.792266845703125, 12.320747375488281, 13.631828308105469, -4.0786590576171875, 33.10809326171875, 43.35124969482422, 10.862205505371094, 17.85137939453125, 6.924278259277344, 33.9755859375, 8.564117431640625, 12.576377868652344, 8.075811386108398, 24.30931854248047, 1.1449470520019531, 18.246017456054688, 19.733619689941406, 5.1438140869140625, 2.1486663818359375, 15.175811767578125, 14.171745300292969, 0.9711837768554688, 52.34312438964844, 8.470008850097656, 11.127157211303711, 27.11663818359375, 12.968196868896484, 12.867012023925781, 23.62317657470703, 8.132255554199219, 17.955631256103516, 25.03630828857422, 28.67364501953125, 22.984344482421875, 8.154533386230469, 38.92304229736328, 15.465065002441406, 15.329654693603516, -6.2598724365234375, 45.806312561035156, -14.348793029785156, -7.31646728515625, 43.86915588378906, -2.0464324951171875, -0.4258880615234375, -3.999490737915039, 9.6868896484375, 41.347320556640625, 10.082138061523438, 33.494049072265625, 5.962928771972656, 12.48406982421875, 1.150339126586914, 19.590749740600586, 12.110126495361328, 13.372447967529297, 28.121410369873047, 32.39842987060547, 17.81848907470703, 8.958587646484375, 8.5391845703125, 24.326080322265625, 6.750938415527344, 8.91845703125, 15.278030395507812, 46.350242614746094, 24.07379913330078, 17.510169982910156, 20.257225036621094, -11.785873413085938, 0.9813919067382812, 5.617919921875, -9.191162109375, -4.9025726318359375, 45.025146484375, 55.775779724121094, -4.3411102294921875, 10.63909912109375, 41.63081359863281, 18.511512756347656, 0.7395000457763672, 11.985481262207031, 4.803798675537109, 22.427024841308594, 12.95114517211914, 18.191871643066406, 20.07707977294922, -11.684097290039062, 29.857162475585938, 20.63134765625, 26.75464630126953, 39.992271423339844, 23.274932861328125, 17.184356689453125, 0.9587059020996094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000258.npy"}
{"epoch": 0.7588235294117647, "step": 259, "batch_size": 128, "mean": 18.00747299194336, "std": 15.177475929260254, "min": -16.92072105407715, "p10": -1.1086914062499995, "median": 18.615604400634766, "p90": 37.275455474853516, "max": 61.70306396484375, "pos_frac": 0.8828125, "sample": [21.603290557861328, 21.11865234375, 15.201545715332031, 7.406036376953125, 22.635887145996094, 28.054367065429688, 30.739776611328125, 39.08363723754883, 31.90670394897461, 13.357803344726562, 18.529584884643555, 37.16322326660156, 28.179454803466797, 16.65606689453125, 30.26346778869629, 10.678539276123047, 0.18255615234375, 41.522743225097656, 0.9333038330078125, 29.353286743164062, 0.34812164306640625, 18.64051055908203, -3.0319881439208984, -3.7009201049804688, 2.2959632873535156, 37.03521728515625, 26.266189575195312, 49.76685333251953, 12.47159194946289, 36.87898254394531, 9.373260498046875, 40.29597473144531, 18.929962158203125, -4.200000762939453, 25.504398345947266, 35.78886413574219, 16.195716857910156, 27.731863021850586, 48.669288635253906, 25.777809143066406, 19.149948120117188, 1.5087833404541016, 15.759368896484375, 18.18498992919922, 1.60595703125, 33.10407257080078, 7.517024993896484, 8.7305908203125, 33.590797424316406, 12.657352447509766, 1.027191162109375, 14.559463500976562, 27.504547119140625, 33.3944206237793, 40.66236114501953, 20.076810836791992, 19.485397338867188, 11.233291625976562, 2.8600692749023438, 20.064029693603516, 9.657119750976562, 29.93860626220703, -3.4522705078125, 23.849464416503906, 6.365032196044922, 49.72856140136719, 19.487625122070312, 19.56934356689453, 28.041046142578125, 2.0589447021484375, -0.32071685791015625, 3.4000244140625, 18.992156982421875, 7.254402160644531, 6.041446685791016, 23.936080932617188, 29.604324340820312, -1.3967514038085938, -12.238975524902344, 20.824567794799805, 26.140296936035156, 23.25847625732422, 10.455986022949219, 23.258193969726562, 12.884086608886719, 22.01898193359375, 39.17597198486328, 29.93265151977539, 13.423858642578125, 14.46405029296875, -0.9852371215820312, 3.055980682373047, -1.4608955383300781, 33.12812805175781, -16.92072105407715, 17.884201049804688, 13.774686813354492, 7.200141906738281, 23.603771209716797, 18.5906982421875, -6.354156494140625, -2.6919708251953125, 23.246028900146484, 24.077356338500977, -11.793800354003906, 48.51786804199219, 34.08141326904297, 26.34368896484375, 8.054405212402344, 48.43537139892578, 51.29560089111328, 3.554107666015625, 5.4747161865234375, 19.383270263671875, 14.803352355957031, 11.84100341796875, -2.185169219970703, 9.658363342285156, 1.9127578735351562, -8.631526947021484, 19.15142822265625, 24.897666931152344, 3.258413314819336, 61.70306396484375, 30.554149627685547, 9.379852294921875, 16.944442749023438, 37.537330627441406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000259.npy"}
{"epoch": 0.7617647058823529, "step": 260, "batch_size": 128, "mean": 16.079952239990234, "std": 17.074853897094727, "min": -26.244186401367188, "p10": -2.683413887023925, "median": 13.836995124816895, "p90": 35.354210662841794, "max": 72.75263977050781, "pos_frac": 0.859375, "sample": [6.9320526123046875, 33.119895935058594, -0.5175399780273438, -4.361141204833984, 56.73199462890625, -6.405435562133789, 23.0185546875, 4.4520111083984375, 54.67445373535156, 23.347305297851562, 14.053260803222656, 18.266769409179688, 32.19087219238281, 18.259353637695312, 6.955600738525391, 13.566619873046875, 28.7904052734375, 2.18707275390625, 22.56633186340332, 11.068260192871094, 18.259017944335938, 6.373569488525391, 10.203857421875, 5.2812042236328125, 20.704177856445312, 20.992843627929688, 13.567039489746094, 72.75263977050781, 4.3245697021484375, 29.574386596679688, -5.2483062744140625, 37.05402374267578, 35.42015075683594, 15.01237678527832, 27.388031005859375, 18.346176147460938, 7.281337738037109, 35.22320556640625, 32.49651336669922, 14.123313903808594, 6.924835205078125, 18.23974609375, 19.341617584228516, 13.037490844726562, 9.03000259399414, -5.57713508605957, -2.396087646484375, 50.83202362060547, 20.092384338378906, 4.455162048339844, 28.941532135009766, -1.8739547729492188, 0.3114128112792969, 34.55989074707031, 33.818756103515625, 0.7381057739257812, 0.09305572509765625, 5.736137390136719, 3.263031005859375, 11.477569580078125, 0.06895828247070312, 9.38994026184082, 22.450336456298828, -11.309532165527344, 18.58740234375, 9.823295593261719, 72.02313232421875, -4.60614013671875, 9.454952239990234, 14.255138397216797, 21.732818603515625, 10.239164352416992, 38.86530303955078, 67.541748046875, 3.1015625, 12.789752960205078, 44.015079498291016, 39.4715576171875, -17.608322143554688, 1.803558349609375, 21.39141082763672, 10.39962387084961, 10.078201293945312, 26.158788681030273, 38.970069885253906, 15.681724548339844, 4.680019378662109, 16.707857131958008, 31.686416625976562, -3.353841781616211, 30.645999908447266, 14.659957885742188, -9.202285766601562, 13.584053039550781, 0.988128662109375, 43.523712158203125, 12.363349914550781, 5.2931060791015625, -10.48455810546875, -26.244186401367188, 34.068603515625, -7.128143310546875, -0.007923126220703125, 17.993629455566406, 17.044830322265625, 19.070777893066406, 32.52467727661133, 22.498207092285156, 35.325950622558594, 15.972244262695312, 13.620729446411133, 2.6866016387939453, 16.022056579589844, 9.618860244750977, -1.9445343017578125, 23.5325927734375, 21.825340270996094, 32.45030975341797, 3.9752578735351562, 34.443206787109375, -9.791946411132812, 9.033893585205078, 5.8774261474609375, 25.957887649536133, 19.151870727539062, 2.5815658569335938, 12.07470703125, 11.065628051757812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000260.npy"}
{"epoch": 0.7647058823529411, "step": 261, "batch_size": 128, "mean": 15.724516868591309, "std": 16.114805221557617, "min": -16.546630859375, "p10": -1.7328578948974604, "median": 12.69940185546875, "p90": 38.5436538696289, "max": 63.5281982421875, "pos_frac": 0.8671875, "sample": [15.501998901367188, 41.914024353027344, 20.98011016845703, 9.47906494140625, -6.191658020019531, 3.97625732421875, 6.950977325439453, 12.714797973632812, 19.304718017578125, -6.529943466186523, 2.348358154296875, 37.767364501953125, 0.22917938232421875, 0.4628715515136719, 9.90316390991211, 3.466421127319336, 9.931724548339844, 13.288894653320312, -6.1477203369140625, 37.30818176269531, 3.2871246337890625, 20.827598571777344, 27.22735595703125, 2.8870277404785156, 4.6744842529296875, 4.059852600097656, 21.0771484375, 7.080745697021484, 43.713294982910156, 16.789199829101562, -1.3815460205078125, 50.9541015625, 1.29901123046875, 13.416290283203125, 9.608625411987305, -16.546630859375, -7.167560577392578, 14.030197143554688, 33.457733154296875, 52.94087219238281, 37.208648681640625, 13.469711303710938, 25.103965759277344, 18.05585479736328, 6.50935173034668, 4.502462387084961, 23.062734603881836, 32.33216094970703, 8.076152801513672, 29.511489868164062, 11.222694396972656, 9.520706176757812, -1.0448036193847656, 31.46490478515625, -2.035430908203125, 24.170692443847656, 20.36908531188965, 61.780792236328125, 6.464696884155273, 49.346282958984375, 34.67387008666992, 45.67754364013672, 20.773727416992188, 29.56206512451172, 10.897857666015625, 20.198715209960938, 23.60425567626953, 11.535301208496094, 27.404359817504883, 5.305633544921875, 4.099517822265625, -1.6031837463378906, 21.935497283935547, 11.910602569580078, 17.568828582763672, 17.526336669921875, 30.48882293701172, 33.172462463378906, 29.289073944091797, 11.42401123046875, 17.956626892089844, 3.4304027557373047, 13.523651123046875, -5.560150146484375, 8.97463607788086, 17.312931060791016, 22.09203338623047, -5.446746826171875, -1.5465011596679688, 47.74555206298828, 22.981441497802734, 11.232030868530273, 46.2979736328125, 7.578025817871094, 4.540616989135742, 6.587547302246094, 5.135898590087891, -9.732696533203125, 8.776844024658203, 15.564029693603516, 63.5281982421875, -6.68853759765625, 42.03655242919922, 2.0321311950683594, 1.1646575927734375, 34.37726593017578, 40.35499572753906, 15.816967010498047, 19.77099609375, 13.260711669921875, 26.576553344726562, 7.686103820800781, -5.941829681396484, 12.684005737304688, 10.418045043945312, -4.4010009765625, 14.86074447631836, 15.517906188964844, 4.662452697753906, 4.078277587890625, -12.463768005371094, 3.3601455688476562, 24.194839477539062, 6.205451965332031, 54.8975830078125, 8.511560440063477, 23.603500366210938, 11.7584228515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000261.npy"}
{"epoch": 0.7676470588235295, "step": 262, "batch_size": 128, "mean": 15.360321044921875, "std": 16.14191246032715, "min": -19.023849487304688, "p10": -3.681422424316406, "median": 13.398978233337402, "p90": 36.906558990478516, "max": 57.08042907714844, "pos_frac": 0.8203125, "sample": [5.5472412109375, 21.335960388183594, 10.608551025390625, 24.312042236328125, 35.39521789550781, 1.6374359130859375, 17.061477661132812, 31.275115966796875, 35.600372314453125, 47.976715087890625, 1.3017120361328125, 10.672462463378906, 6.19488525390625, 11.272857666015625, 17.604930877685547, -1.396820068359375, 3.2803802490234375, -18.225805282592773, 30.141395568847656, 15.864501953125, 5.588701248168945, -8.260293960571289, 50.478515625, 1.454782485961914, 54.91148376464844, 1.1299667358398438, 22.90148162841797, 21.464712142944336, -2.0671463012695312, -3.4014358520507812, 31.5404052734375, 19.721519470214844, 22.95361328125, 29.7705078125, -4.364246368408203, 28.360668182373047, 0.7883739471435547, 24.509151458740234, -8.928176879882812, -6.2240142822265625, 24.748085021972656, 34.871917724609375, 4.79539680480957, 7.086212158203125, -1.88531494140625, 38.48582458496094, 28.506006240844727, 7.182392120361328, 20.877620697021484, 6.399419784545898, 2.49957275390625, 2.8756484985351562, 20.232763290405273, -3.5543060302734375, 31.82636260986328, 20.764019012451172, 30.50974464416504, -11.214859008789062, 30.96210479736328, 37.079994201660156, 31.416778564453125, 11.132759094238281, -0.12014007568359375, 12.299430847167969, -5.055084228515625, 55.51250457763672, 34.5654296875, 16.0119571685791, 4.542205810546875, 15.883987426757812, 17.8052978515625, 2.469308853149414, 21.674880981445312, 14.372421264648438, 40.80973815917969, 12.579181671142578, -19.023849487304688, 9.965660095214844, 17.452545166015625, 41.752166748046875, 18.503372192382812, 13.381818771362305, 57.08042907714844, -1.9080543518066406, 23.278709411621094, 26.833084106445312, 32.84956359863281, 36.83222961425781, 27.75627326965332, 10.878519058227539, 2.1803817749023438, 13.001235961914062, 11.109901428222656, 13.4161376953125, -0.203277587890625, 8.620803833007812, 8.776756286621094, 1.3349609375, 15.320732116699219, 3.305511474609375, 4.02490234375, 23.59302520751953, -3.97802734375, 24.164871215820312, 12.358924865722656, 4.77178955078125, 13.859100341796875, 43.365745544433594, 37.58749771118164, -10.755544662475586, 3.9109420776367188, -3.5000762939453125, 10.140609741210938, -2.8538341522216797, -6.853214263916016, -7.5263671875, 18.67136001586914, 25.094482421875, 8.84619140625, 11.539688110351562, 6.141029357910156, 32.285213470458984, 20.15143394470215, 47.4512939453125, 37.71644592285156, -6.028778076171875, 27.03343391418457, 13.674896240234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000262.npy"}
{"epoch": 0.7705882352941177, "step": 263, "batch_size": 128, "mean": 17.637216567993164, "std": 17.253618240356445, "min": -22.84369659423828, "p10": -3.84030590057373, "median": 16.741355895996094, "p90": 39.755290222167964, "max": 61.106842041015625, "pos_frac": 0.8203125, "sample": [20.53988265991211, 16.434127807617188, 19.721481323242188, 18.19280242919922, 0.6591110229492188, 31.343135833740234, 27.011627197265625, 31.41912078857422, 9.626358032226562, 8.640182495117188, 29.250823974609375, 15.832313537597656, 34.00880432128906, 12.816389083862305, 6.9333648681640625, 1.2638168334960938, 39.28123474121094, 17.085094451904297, 30.676025390625, -5.20556640625, 13.36602783203125, 29.14288330078125, 4.1990509033203125, 20.612770080566406, 17.924911499023438, 18.028064727783203, 52.920074462890625, 46.31848907470703, 7.658443450927734, -4.131662368774414, 58.994285583496094, 11.461288452148438, 44.11466979980469, -2.5800323486328125, 17.4329833984375, -0.11307907104492188, -0.10468292236328125, 16.28234100341797, 39.091033935546875, -2.1963653564453125, 10.797309875488281, 9.958503723144531, 11.020927429199219, 33.45997619628906, 10.22422981262207, 37.1148681640625, 9.735931396484375, -7.727607727050781, 52.945709228515625, 17.531219482421875, 9.620086669921875, -2.7909374237060547, 7.011085510253906, 12.660358428955078, 28.2528076171875, 25.682910919189453, 2.702728271484375, 11.133838653564453, -6.394054412841797, 19.954803466796875, 8.854278564453125, 37.02006530761719, 0.42949485778808594, 46.607994079589844, 39.5604248046875, 40.20997619628906, -0.8984527587890625, 15.288715362548828, 7.437446594238281, -7.197357177734375, -16.074848175048828, -0.5850296020507812, 10.679483413696289, 22.323776245117188, 18.57476043701172, 34.878170013427734, 3.049182891845703, -3.7154388427734375, 14.17791748046875, 28.48938751220703, 4.1700286865234375, 22.16179656982422, -5.0560302734375, 30.8052978515625, 37.74153137207031, 13.47894287109375, 16.71503448486328, 1.4040813446044922, -0.4057884216308594, 18.572021484375, 22.515426635742188, 27.034568786621094, -5.169677734375, 37.13441467285156, 31.64920425415039, 27.133148193359375, 46.20249938964844, -22.84369659423828, 46.006290435791016, 16.767677307128906, 23.65239715576172, -1.9427776336669922, 16.089248657226562, 16.36639404296875, 61.106842041015625, 34.119544982910156, 38.99021911621094, 23.039527893066406, 28.3853759765625, 18.820676803588867, -10.332321166992188, 50.59112548828125, 16.061264038085938, 18.8946533203125, 18.389862060546875, 19.037628173828125, 16.50957489013672, 9.614120483398438, -5.027351379394531, 58.958465576171875, 41.76251983642578, 15.501459121704102, 21.882644653320312, -21.115463256835938, -11.181884765625, 29.605518341064453, 0.702911376953125, 17.10857391357422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000263.npy"}
{"epoch": 0.7735294117647059, "step": 264, "batch_size": 128, "mean": 15.547611236572266, "std": 14.518284797668457, "min": -13.100379943847656, "p10": -1.7607135772705074, "median": 14.373546600341797, "p90": 34.66754150390625, "max": 52.949134826660156, "pos_frac": 0.859375, "sample": [-3.028848648071289, 43.12196350097656, 24.215007781982422, 25.103302001953125, 20.152862548828125, 14.193267822265625, 34.538978576660156, -1.0516681671142578, 37.63066101074219, 9.3592529296875, 5.9979095458984375, 17.48828125, 0.87738037109375, 16.35375213623047, 18.79486083984375, 2.204732894897461, 28.205474853515625, 33.00444030761719, 34.3880615234375, 24.317237854003906, 52.64842224121094, 6.682518005371094, 15.109909057617188, 5.5667877197265625, 9.994270324707031, -7.188121795654297, 42.49494934082031, 7.150810241699219, 14.908077239990234, 19.05590057373047, 16.810455322265625, 9.619659423828125, 7.0173492431640625, 16.19580841064453, 29.212432861328125, 2.9286041259765625, 44.977943420410156, 11.149084091186523, -0.5571460723876953, -13.100379943847656, 12.613737106323242, 30.395965576171875, -10.759803771972656, 15.465789794921875, 30.196014404296875, 28.645767211914062, 11.973678588867188, 16.976364135742188, 27.79372215270996, 9.040443420410156, 48.266845703125, 24.597007751464844, 6.469108581542969, 13.503883361816406, 4.5025787353515625, 13.738746643066406, 52.949134826660156, 19.21131134033203, 18.053062438964844, 9.177932739257812, 10.384788513183594, -12.564739227294922, -1.6171989440917969, 40.87245178222656, -1.3230934143066406, 42.980445861816406, 13.834220886230469, 10.589588165283203, 8.076446533203125, -2.0955810546875, -2.143585205078125, 38.47460174560547, 1.7010574340820312, 8.8677978515625, 30.540069580078125, 17.201263427734375, -6.312095642089844, 25.423891067504883, 30.82379150390625, 14.165435791015625, 14.553825378417969, -7.694803237915039, 11.5712890625, 7.754447937011719, -4.9098358154296875, 17.34576416015625, 33.9144287109375, 7.8017730712890625, -0.45590972900390625, 42.695335388183594, 1.978790283203125, 32.14141845703125, 7.491737365722656, 34.96752166748047, 16.229446411132812, 26.297592163085938, -7.169258117675781, 15.338329315185547, 2.991241455078125, 4.101409912109375, 27.610580444335938, 2.747621536254883, 25.41875648498535, 19.99419403076172, 7.032495498657227, 31.200790405273438, 19.615346908569336, 40.782012939453125, 1.5486564636230469, 9.961156845092773, 3.0096092224121094, -5.289924621582031, 15.003305435180664, 14.742076873779297, 13.24277114868164, 33.504722595214844, 15.74038314819336, 16.49523162841797, 3.1213760375976562, 30.419639587402344, 15.057075500488281, 5.148902893066406, -3.7046756744384766, 4.7972564697265625, 13.17388916015625, 0.060749053955078125, 27.233749389648438, 18.24695587158203], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000264.npy"}
{"epoch": 0.7764705882352941, "step": 265, "batch_size": 128, "mean": 16.23978042602539, "std": 16.99869728088379, "min": -22.53887176513672, "p10": -2.6557188034057613, "median": 14.264354705810547, "p90": 37.46994705200194, "max": 68.707763671875, "pos_frac": 0.8515625, "sample": [10.763595581054688, -2.5564117431640625, -1.6423568725585938, 2.935546875, 21.785053253173828, 13.471931457519531, 10.961021423339844, 12.090011596679688, 24.165916442871094, 19.413349151611328, 7.693359375, 6.108207702636719, 3.663778305053711, 17.2607421875, 52.87602996826172, 4.414726257324219, 10.80262565612793, 21.679542541503906, 9.15054702758789, -11.876422882080078, 41.319580078125, 8.195180892944336, 27.998313903808594, 51.529327392578125, 17.825035095214844, 11.999580383300781, 25.961469650268555, 4.8188018798828125, 3.0780258178710938, 19.8067626953125, 4.57867431640625, 1.5608062744140625, 12.235382080078125, 32.987579345703125, 20.033689498901367, 11.207756042480469, 9.366104125976562, 19.648239135742188, 17.983562469482422, -0.1584453582763672, 28.76184844970703, -22.53887176513672, 25.751312255859375, 2.47259521484375, 8.437149047851562, 18.165199279785156, -5.2471160888671875, 13.929214477539062, -4.905681610107422, 26.198749542236328, -3.1162109375, -11.925201416015625, 35.16559600830078, -2.5566043853759766, 7.313011169433594, 30.314300537109375, -7.024726867675781, 36.441978454589844, 43.68900680541992, 13.602325439453125, 14.228767395019531, -13.61260986328125, -2.8869857788085938, 39.8338623046875, 32.67207336425781, 35.94367218017578, 26.309635162353516, 14.408088684082031, 29.100006103515625, 1.0936317443847656, 40.86124038696289, 16.242149353027344, -2.543529510498047, 10.905696868896484, 17.45519256591797, 27.262847900390625, 19.42205810546875, 8.370712280273438, 21.18553924560547, 17.66969871520996, 41.051612854003906, 21.521869659423828, 31.7308349609375, -20.779273986816406, 23.689300537109375, 11.937881469726562, 12.811347961425781, -9.564506530761719, 68.707763671875, 3.0727691650390625, 40.90220642089844, 11.67098617553711, 30.14562225341797, 2.1999588012695312, 32.80462646484375, 0.5663528442382812, 14.299942016601562, 1.356689453125, 34.9832763671875, 2.8907012939453125, 16.046607971191406, 11.3575439453125, 45.68641662597656, 36.45684051513672, 16.438385009765625, 14.666728973388672, -4.8594970703125, 33.18308639526367, 28.964994430541992, 20.507049560546875, 1.5887107849121094, 32.23070526123047, 5.297176361083984, 40.34697723388672, 16.216148376464844, 67.68728637695312, -0.022579193115234375, 1.0737991333007812, 62.93000030517578, -16.246826171875, 8.839004516601562, 24.237594604492188, 30.596391677856445, 12.171974182128906, 11.318729400634766, 19.781845092773438, 22.93651580810547, 1.2745399475097656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000265.npy"}
{"epoch": 0.7794117647058824, "step": 266, "batch_size": 128, "mean": 17.28414535522461, "std": 16.50705337524414, "min": -21.69167709350586, "p10": -4.378470611572265, "median": 18.252803802490234, "p90": 34.75402145385742, "max": 63.87309265136719, "pos_frac": 0.8515625, "sample": [17.91309356689453, -5.508516311645508, 17.17559814453125, 34.14459991455078, 18.192138671875, -10.127357482910156, 4.827413558959961, 20.233238220214844, 27.850210189819336, 23.673479080200195, 6.5367889404296875, 14.694698333740234, 28.720413208007812, 26.953601837158203, 14.767776489257812, 7.128486633300781, 36.169952392578125, -11.633453369140625, 1.7150840759277344, 21.309471130371094, 2.0624237060546875, 12.88519287109375, -3.799724578857422, 21.86859130859375, 20.254547119140625, 23.375534057617188, 12.817306518554688, 40.26972961425781, 32.829193115234375, 19.463085174560547, 30.279020309448242, 0.597076416015625, 50.565704345703125, 8.898681640625, 5.122522354125977, 54.37458038330078, -7.169397354125977, 5.2440185546875, 20.039474487304688, 9.004493713378906, 18.980234146118164, 39.96171569824219, 7.141326904296875, 8.678749084472656, 27.516136169433594, 5.740776062011719, 29.19903564453125, 25.0426025390625, 27.672752380371094, 32.064292907714844, 16.130882263183594, 8.742698669433594, 63.87309265136719, -21.69167709350586, 28.35334014892578, 28.360153198242188, 16.909080505371094, 10.562980651855469, -14.698871612548828, -3.2773513793945312, -1.8539352416992188, 24.072479248046875, 19.82080078125, -8.366657257080078, 27.500526428222656, 31.35356903076172, 42.54570770263672, 28.989356994628906, 59.73335266113281, 31.454071044921875, 29.566421508789062, 24.22455596923828, 61.471343994140625, -9.401847839355469, 19.691482543945312, 18.31346893310547, 19.401622772216797, 19.303062438964844, 38.04560852050781, 34.147193908691406, -4.7779083251953125, 43.42193603515625, 5.5634002685546875, 2.734027862548828, 17.72967529296875, 9.287338256835938, 21.941650390625, 25.969650268554688, 1.9060497283935547, -5.988245010375977, 11.38482666015625, 2.6978492736816406, -12.409149169921875, 54.45672607421875, 21.8216552734375, 4.603843688964844, -11.889999389648438, 22.547626495361328, 12.111686706542969, 32.16374206542969, 21.19359588623047, 28.60901641845703, 13.813003540039062, 12.861976623535156, 38.447410583496094, 11.1331787109375, -19.087074279785156, 17.693580627441406, 29.530364990234375, -4.207283020019531, 31.25164031982422, 8.373245239257812, 4.87257194519043, 32.981170654296875, -1.740966796875, -2.4085235595703125, 21.115760803222656, 9.174331665039062, 18.123260498046875, 26.34593963623047, 25.645015716552734, 10.9630126953125, 13.459884643554688, 23.294233322143555, 5.9571990966796875, 27.33769989013672, 14.67779541015625, 18.690271377563477], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000266.npy"}
{"epoch": 0.7823529411764706, "step": 267, "batch_size": 128, "mean": 14.426403999328613, "std": 15.952117919921875, "min": -36.5418586730957, "p10": -2.814236259460449, "median": 13.381534576416016, "p90": 34.93913192749023, "max": 66.34852600097656, "pos_frac": 0.859375, "sample": [-7.368793487548828, -3.066232681274414, 25.676361083984375, 3.5420265197753906, 2.2809982299804688, 16.986778259277344, 27.220962524414062, 34.114166259765625, 36.18988037109375, 14.984085083007812, -2.5551958084106445, 2.0293807983398438, 18.901947021484375, 20.0009765625, 10.620126724243164, 32.532161712646484, 22.706039428710938, 9.588180541992188, 38.113853454589844, 4.609165191650391, 12.19677734375, 30.21928596496582, 8.440315246582031, 1.8269157409667969, 7.5746002197265625, 7.6721954345703125, 3.49786376953125, 12.59695053100586, 43.48871612548828, 19.41999053955078, 11.438884735107422, 0.13153076171875, 0.27716064453125, 6.918403625488281, 31.474937438964844, 21.319847106933594, 20.333656311035156, 25.36658477783203, -6.234901428222656, 0.49684906005859375, 26.43183135986328, 31.060779571533203, 16.08736228942871, -36.5418586730957, 18.340436935424805, 39.58094787597656, 2.3066253662109375, 21.711227416992188, 1.4885902404785156, 2.395872116088867, 22.84235382080078, 66.34852600097656, -12.810089111328125, 4.714469909667969, 7.793720245361328, -3.7305908203125, -4.178688049316406, 18.465377807617188, 22.6253662109375, 20.122589111328125, 14.52513313293457, -2.2324752807617188, 36.17933654785156, 18.944786071777344, 30.812713623046875, 29.117774963378906, -2.70623779296875, 39.534828186035156, 8.740228652954102, -6.226600646972656, 2.42791748046875, 30.861129760742188, 28.291851043701172, 14.564788818359375, 15.706581115722656, 56.901611328125, 3.7955245971679688, -4.266334533691406, -6.83184814453125, 13.698600769042969, 3.3679466247558594, 7.680206298828125, 3.348125457763672, -19.84355926513672, 5.063323974609375, 2.2408676147460938, 32.838417053222656, 39.920013427734375, 2.0412673950195312, 34.00183868408203, 9.94171142578125, -0.5783100128173828, 22.608253479003906, 13.237503051757812, 15.692581176757812, 31.252090454101562, 19.236961364746094, 22.40558624267578, 0.9343328475952148, 35.258514404296875, -5.8652496337890625, 4.24029541015625, 29.190261840820312, 55.047821044921875, 17.30913543701172, 34.80225372314453, 45.5609130859375, 2.2807960510253906, -0.09905242919921875, 9.374488830566406, 6.226640701293945, 47.07283020019531, 0.367828369140625, 7.35302734375, 20.5511474609375, 13.87864875793457, 23.824325561523438, 16.183517456054688, 16.682676315307617, 2.7925262451171875, 13.525566101074219, 2.920726776123047, 21.42577362060547, -4.258892059326172, 6.849822998046875, 5.917015075683594, 5.530429840087891, 14.789127349853516], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000267.npy"}
{"epoch": 0.7852941176470588, "step": 268, "batch_size": 128, "mean": 20.000228881835938, "std": 17.53892707824707, "min": -19.688880920410156, "p10": 0.3256565093994159, "median": 19.04402732849121, "p90": 46.86316986083984, "max": 63.671844482421875, "pos_frac": 0.8984375, "sample": [3.626699447631836, 26.998870849609375, -3.9301223754882812, 3.1272735595703125, 20.468246459960938, 8.578155517578125, 14.622337341308594, 28.404212951660156, -3.2413864135742188, 36.07734680175781, 46.06657409667969, 26.562835693359375, 44.758644104003906, 36.90093994140625, 11.566566467285156, 28.966442108154297, 36.02253723144531, 30.784652709960938, 22.799510955810547, -10.529579162597656, 26.56298065185547, 21.533584594726562, 15.031242370605469, 20.06078338623047, -0.9057388305664062, -7.92919921875, 12.100549697875977, 9.091426849365234, 25.120803833007812, 1.9295902252197266, 5.828277587890625, -2.6341819763183594, 35.70928955078125, -3.748790740966797, 2.974903106689453, 26.298912048339844, 32.595428466796875, 37.181793212890625, 10.007713317871094, 4.380182266235352, 36.944129943847656, 49.74928283691406, 11.739456176757812, 19.791072845458984, 9.589679718017578, 46.574256896972656, 53.39142990112305, 20.234458923339844, 19.540863037109375, 34.10662841796875, 18.81208610534668, 47.92228317260742, 19.08791732788086, 19.000137329101562, 5.338529586791992, 27.188194274902344, 33.08252716064453, 5.112377166748047, 48.13102722167969, 4.713865280151367, 5.691469192504883, 13.884223937988281, -1.9630126953125, 7.723304748535156, 13.810882568359375, -1.8269805908203125, 32.35614013671875, 34.86799621582031, 42.253501892089844, 9.367916107177734, 25.48975372314453, 0.8533973693847656, 57.176902770996094, 2.4582462310791016, 36.958290100097656, 25.18891143798828, 13.980796813964844, 8.995735168457031, 21.78723907470703, 6.087200164794922, 23.070846557617188, 19.46588134765625, 5.21929931640625, 17.52198028564453, 11.133697509765625, 49.855712890625, 4.3368377685546875, 13.103382110595703, 22.24819564819336, 10.389923095703125, 14.395742416381836, 9.617835998535156, 27.82155990600586, 22.9268798828125, 21.607933044433594, 47.53730010986328, -12.354423522949219, 15.976730346679688, 4.437263488769531, 25.87196922302246, 10.081016540527344, 60.82218933105469, 15.068313598632812, 61.5379753112793, 30.773117065429688, 62.84752655029297, 12.749156951904297, 20.834457397460938, 41.634300231933594, -4.31805419921875, 63.671844482421875, 23.54766845703125, 18.502471923828125, -19.688880920410156, 47.90235137939453, 6.869682312011719, 30.38058853149414, 2.0779953002929688, 7.515327453613281, 20.76461410522461, 3.145038604736328, 14.121620178222656, 6.02202033996582, -16.403167724609375, 32.647560119628906, 47.68064880371094, 6.472869873046875, 33.57203674316406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000268.npy"}
{"epoch": 0.788235294117647, "step": 269, "batch_size": 128, "mean": 18.2183837890625, "std": 18.785316467285156, "min": -21.89740753173828, "p10": -1.6678842544555663, "median": 15.570466995239258, "p90": 42.73626937866211, "max": 101.20446014404297, "pos_frac": 0.8671875, "sample": [-7.298377990722656, 28.46582794189453, -0.9038791656494141, 1.8927497863769531, -12.611328125, 19.15182876586914, 0.6100692749023438, 43.66859817504883, 27.321014404296875, 5.8594970703125, 30.4127197265625, 2.645824432373047, 8.792037963867188, 40.58826446533203, 11.069847106933594, 17.30478286743164, 56.75782012939453, 16.787376403808594, 45.004554748535156, 28.829498291015625, 9.18497085571289, 48.986663818359375, 28.807815551757812, 50.80101013183594, 11.3475341796875, 37.812255859375, 17.531803131103516, 39.76148986816406, -1.6451282501220703, 9.277816772460938, 35.412208557128906, 13.437210083007812, 13.232574462890625, 29.47463607788086, 27.055648803710938, 8.125503540039062, 8.691520690917969, 7.055444717407227, 2.751384735107422, 44.34930419921875, 13.290779113769531, 5.554634094238281, 26.923797607421875, 20.795921325683594, 16.216278076171875, 24.591896057128906, 55.740684509277344, 19.54425048828125, -21.89740753173828, 89.51652526855469, 37.62175750732422, -4.94097900390625, 33.4010009765625, 16.32714080810547, 12.651992797851562, 13.666528701782227, 45.11396789550781, 12.200677871704102, 26.11168670654297, 8.670650482177734, 24.999435424804688, 27.09650421142578, -0.6715774536132812, 25.95842742919922, 39.5081787109375, 14.131378173828125, 13.857917785644531, 101.20446014404297, 30.20903778076172, -6.84893798828125, 17.83870506286621, -20.594375610351562, 17.12347412109375, 11.944221496582031, 4.054664611816406, 33.480384826660156, 14.110321044921875, -4.336021423339844, 7.503303527832031, -7.9263916015625, 10.590965270996094, -21.59026336669922, 22.757537841796875, 9.304637908935547, -1.7209815979003906, 42.51184844970703, 19.523488998413086, 18.338775634765625, -5.404918670654297, -0.33736419677734375, 19.889379501342773, 2.8364639282226562, 27.164627075195312, 0.5921306610107422, 8.037721633911133, 21.205821990966797, 14.600997924804688, 10.138553619384766, 1.0783538818359375, 7.1924285888671875, 39.09979248046875, 19.055496215820312, -1.8770065307617188, 8.964263916015625, 21.72821807861328, 19.439613342285156, 10.977615356445312, 26.51396942138672, 9.32403564453125, -5.866926193237305, 18.166641235351562, 55.44752502441406, 5.812602996826172, 8.486198425292969, 13.676446914672852, 24.793067932128906, 15.0732421875, 52.53997802734375, 16.067691802978516, 17.052520751953125, 14.640090942382812, 24.104354858398438, 43.259918212890625, 12.989273071289062, 17.183792114257812, 10.826702117919922, 22.23400115966797, 5.984586715698242], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000269.npy"}
{"epoch": 0.7911764705882353, "step": 270, "batch_size": 128, "mean": 18.23770523071289, "std": 18.269010543823242, "min": -17.528823852539062, "p10": -0.9699264526367183, "median": 15.226493835449219, "p90": 43.52907333374023, "max": 76.9435806274414, "pos_frac": 0.8671875, "sample": [19.24181365966797, 21.59740447998047, -0.0340423583984375, 13.030754089355469, 7.146003723144531, 59.24384307861328, 2.9299755096435547, 15.511226654052734, 18.317058563232422, 28.15662384033203, 12.855926513671875, 34.229705810546875, 26.818161010742188, 3.4640045166015625, 23.191757202148438, 41.65271759033203, 2.5611419677734375, 47.75872802734375, 22.487716674804688, 5.9077911376953125, -0.2633247375488281, 36.984046936035156, 12.55305290222168, 11.287250518798828, 19.18299102783203, -8.880325317382812, 26.14059066772461, -1.2708358764648438, 36.393524169921875, 15.163063049316406, 21.077529907226562, 17.679977416992188, 11.190704345703125, 26.80274200439453, 0.24703598022460938, -0.2041015625, 0.470672607421875, 38.254234313964844, 45.624412536621094, 23.941619873046875, 20.956600189208984, -12.544561386108398, 12.469657897949219, 26.213844299316406, 35.26339340209961, 7.120086669921875, 9.633758544921875, 11.56967544555664, -5.896026611328125, 53.04130554199219, 1.2319107055664062, 3.984691619873047, 32.115135192871094, 0.3655967712402344, 29.478775024414062, -3.50433349609375, 44.08149719238281, 6.898773193359375, 22.705047607421875, 28.822738647460938, 17.996274948120117, 12.51824951171875, 30.259963989257812, 2.8295822143554688, 11.916526794433594, 23.213470458984375, 47.72226333618164, 10.09442138671875, 28.518447875976562, 13.148765563964844, 3.4817962646484375, -14.086761474609375, 16.9791259765625, 2.4725875854492188, 24.83193588256836, 60.138694763183594, 26.329322814941406, 7.386741638183594, 11.142791748046875, 46.213592529296875, 18.33367919921875, 10.147293090820312, 6.672168731689453, 19.390785217285156, -2.7027359008789062, 10.725051879882812, 76.9435806274414, 15.289924621582031, 12.816200256347656, 1.2162628173828125, 0.47066497802734375, 39.544830322265625, 18.476158142089844, 27.070602416992188, -3.00360107421875, 12.923591613769531, -17.528823852539062, -0.8409652709960938, -15.060600280761719, 39.35911560058594, 9.403167724609375, 9.437820434570312, 57.756797790527344, 28.36212921142578, 7.9718170166015625, 18.23968505859375, 26.308124542236328, 61.74683380126953, 24.41640853881836, 14.45468521118164, 9.0386962890625, 31.125694274902344, 6.406578063964844, 70.10653686523438, 12.045608520507812, 52.813629150390625, -10.853729248046875, 28.60467529296875, 40.946868896484375, 24.314218521118164, 31.1785888671875, 10.251594543457031, 43.292320251464844, 6.287151336669922, -13.21978759765625, 4.243865966796875, 17.023136138916016, -3.0786056518554688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000270.npy"}
{"epoch": 0.7941176470588235, "step": 271, "batch_size": 128, "mean": 18.095252990722656, "std": 16.522714614868164, "min": -13.799179077148438, "p10": 0.9569351196289063, "median": 14.708137512207031, "p90": 42.929025650024414, "max": 90.10009765625, "pos_frac": 0.9140625, "sample": [31.54654312133789, 11.489143371582031, 17.418289184570312, 13.53460693359375, 6.81549072265625, 7.245613098144531, 0.5274028778076172, 7.7605133056640625, 22.708858489990234, 90.10009765625, 1.2886199951171875, 4.9828338623046875, 45.145118713378906, 42.914405822753906, 31.212509155273438, 9.410446166992188, 24.174232482910156, 21.142074584960938, -13.799179077148438, 1.1578998565673828, 5.7213287353515625, 23.458805084228516, 47.8154296875, 7.417266845703125, 20.793190002441406, 24.769878387451172, -3.088693618774414, 44.270896911621094, 10.3709716796875, 21.801876068115234, 36.23644256591797, -5.523345947265625, 12.778060913085938, 5.96026611328125, 5.015960693359375, 54.88116455078125, -4.5371856689453125, 26.64925765991211, 25.8177490234375, 25.2861328125, 7.928264617919922, 3.374664306640625, -4.8783721923828125, 11.866817474365234, 2.7437210083007812, 52.103729248046875, 10.8563232421875, 14.785263061523438, 5.945545196533203, 7.674043655395508, 12.761192321777344, 29.003509521484375, 8.931953430175781, 34.650146484375, 9.400375366210938, 21.58849334716797, 6.97772216796875, 8.791046142578125, 9.805702209472656, 3.9761123657226562, 11.979255676269531, 13.719169616699219, 28.356050491333008, 22.2403564453125, 26.70162582397461, 22.60999298095703, 34.565399169921875, 11.783164978027344, -7.732107162475586, 18.425445556640625, 3.6732940673828125, 45.186859130859375, 44.413787841796875, 58.593170166015625, 5.6930694580078125, 23.283981323242188, 7.795440673828125, 4.750003814697266, 13.380928039550781, 11.097564697265625, 20.302459716796875, -8.517723083496094, 17.270294189453125, -2.6154747009277344, 36.21349334716797, 8.424549102783203, 2.623687744140625, 25.311500549316406, 52.74653625488281, 3.929685592651367, 25.615137100219727, 15.228302001953125, 14.631011962890625, 35.07734680175781, 23.565711975097656, 30.62701416015625, 44.95838928222656, 38.0655517578125, 17.465423583984375, 45.98252868652344, 27.773780822753906, -2.0082168579101562, 36.02785110473633, 22.424230575561523, 0.9768524169921875, 9.977333068847656, 9.187942504882812, 21.49859619140625, 0.91046142578125, 36.089271545410156, 3.7271652221679688, 34.040016174316406, 21.85329818725586, 21.814538955688477, 18.258930206298828, -3.4811477661132812, 22.4329833984375, 32.21473693847656, 1.1250228881835938, 26.647964477539062, 2.296092987060547, -7.234582901000977, 42.963138580322266, 7.391927719116211, 7.237342834472656, 23.266090393066406, 10.547962188720703, 19.853912353515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000271.npy"}
{"epoch": 0.7970588235294118, "step": 272, "batch_size": 128, "mean": 15.644951820373535, "std": 17.59017562866211, "min": -16.551292419433594, "p10": -2.022857856750488, "median": 13.388511657714844, "p90": 32.95509147644043, "max": 103.33743286132812, "pos_frac": 0.8515625, "sample": [6.597862243652344, -16.551292419433594, 2.4244003295898438, -4.441612243652344, 11.727596282958984, 39.182952880859375, 0.24022674560546875, 13.192306518554688, 23.565765380859375, 69.15341186523438, 6.293243408203125, -0.052402496337890625, 6.1678009033203125, 34.59112548828125, 14.144424438476562, 19.590187072753906, 0.11704254150390625, 54.10221862792969, 19.675186157226562, -1.3026504516601562, 80.40167236328125, 23.18060302734375, 29.679569244384766, 11.484458923339844, -16.25487518310547, -0.9954147338867188, 9.845664978027344, 10.743171691894531, 20.108795166015625, 15.41693115234375, 14.76629638671875, 18.1693115234375, -8.31365966796875, 16.40375518798828, 8.119319915771484, -11.25335693359375, -0.11115074157714844, -2.4127769470214844, 17.835311889648438, 13.239234924316406, -13.084129333496094, 6.736381530761719, 3.9931640625, 33.2550048828125, 1.5569686889648438, 24.138641357421875, 31.208656311035156, 8.121856689453125, 13.640823364257812, 32.002891540527344, 19.533370971679688, 15.326881408691406, 11.455989837646484, 2.7891921997070312, 10.969963073730469, 42.290199279785156, 14.7471923828125, 8.106124877929688, -2.749725341796875, 9.933486938476562, 9.831499099731445, 11.4693603515625, 11.86073112487793, 29.298622131347656, 31.641311645507812, 13.001113891601562, 13.803520202636719, 19.280960083007812, 103.33743286132812, -11.947555541992188, 15.299522399902344, 14.718833923339844, 27.074140548706055, 2.4790191650390625, 4.548919677734375, 12.24032211303711, 10.068065643310547, 12.198417663574219, -5.0837860107421875, 10.741928100585938, 14.49331283569336, 25.017711639404297, 3.9550819396972656, 29.69305419921875, -2.20184326171875, 3.505889892578125, 19.90972137451172, 0.36309051513671875, 6.037353515625, 15.028961181640625, -1.9461498260498047, 23.26385498046875, 0.7704429626464844, 54.7081298828125, 21.980140686035156, 3.2357406616210938, 29.689138412475586, 8.959875106811523, 30.30474853515625, 12.935920715332031, 9.023859024047852, 19.472801208496094, 7.834877014160156, 24.91527557373047, 17.55638313293457, 15.256294250488281, 20.982261657714844, 30.395606994628906, 12.07640266418457, 20.674182891845703, 26.578216552734375, 10.751319885253906, 18.52301788330078, 16.982295989990234, 26.09487533569336, 43.32122802734375, 36.331260681152344, 15.357940673828125, 41.30354309082031, 29.03717041015625, 13.537788391113281, 2.160980224609375, 26.221771240234375, -1.9036788940429688, 32.82655715942383, 48.82991027832031, -10.65475082397461, -8.943693161010742], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000272.npy"}
{"epoch": 0.8, "step": 273, "batch_size": 128, "mean": 17.430044174194336, "std": 15.813995361328125, "min": -16.65484619140625, "p10": -0.6032827377319316, "median": 15.415132522583008, "p90": 38.6757625579834, "max": 60.255767822265625, "pos_frac": 0.890625, "sample": [9.117645263671875, 3.56109619140625, -7.20550537109375, -11.517738342285156, 3.12908935546875, 45.8532600402832, 10.596588134765625, 8.752254486083984, 19.577163696289062, 40.1588134765625, 15.005905151367188, 19.36728286743164, 57.129547119140625, 19.29163360595703, 10.420036315917969, -6.216880798339844, 16.208600997924805, 19.380393981933594, 24.478240966796875, 36.47401809692383, 25.03545379638672, 14.332984924316406, 20.12792205810547, 28.469253540039062, 7.39532470703125, 26.241165161132812, -1.8769207000732422, 6.26934814453125, -5.20947265625, 30.490196228027344, 24.765609741210938, 15.302215576171875, 42.031280517578125, 2.306060791015625, 19.64932632446289, 32.068504333496094, -10.891265869140625, 58.897308349609375, 39.66058349609375, -4.9690704345703125, 8.897872924804688, 2.5059814453125, 5.903564453125, 20.34998321533203, 27.84691619873047, 1.0753860473632812, 25.219741821289062, 11.922679901123047, 8.529077529907227, 16.697608947753906, 40.11964416503906, 14.843881607055664, 6.005199432373047, 22.471527099609375, 14.47967529296875, 48.37073516845703, 38.25369644165039, -9.073837280273438, 25.73841094970703, 15.949066162109375, 11.300743103027344, 2.8510818481445312, 2.367015838623047, 46.8734130859375, -3.760040283203125, 9.600624084472656, 2.7196807861328125, 12.894258499145508, 20.231285095214844, 24.546218872070312, 13.220329284667969, 17.564531326293945, 23.446659088134766, 35.050498962402344, 30.31146240234375, 4.5915069580078125, 14.417755126953125, 51.805503845214844, -16.295989990234375, 32.37428283691406, 6.121576309204102, 28.89871597290039, 7.317829132080078, -5.602256774902344, 46.74800491333008, 18.564910888671875, 34.91216278076172, 60.255767822265625, 27.928367614746094, 11.821138381958008, 9.821273803710938, 35.79004669189453, 6.055061340332031, 18.890905380249023, 7.075897216796875, 37.584022521972656, 23.353370666503906, 9.724273681640625, 1.8627243041992188, 12.86994743347168, 15.369827270507812, 7.8978424072265625, 27.690872192382812, 10.021163940429688, 11.946319580078125, 27.132408142089844, 19.398780822753906, 27.200942993164062, 45.134117126464844, 22.578048706054688, 5.022796630859375, 1.284881591796875, -16.65484619140625, -0.057437896728515625, 34.60386657714844, 8.046875, 8.670623779296875, 38.00487518310547, -3.282693862915039, 33.52635955810547, 9.734848022460938, 22.751625061035156, 2.4433364868164062, 15.460437774658203, 8.3594970703125, 22.043411254882812, 23.79605484008789, 17.052047729492188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000273.npy"}
{"epoch": 0.8029411764705883, "step": 274, "batch_size": 128, "mean": 16.539297103881836, "std": 17.213932037353516, "min": -24.836578369140625, "p10": -4.695704460144043, "median": 16.26846408843994, "p90": 34.606586074829096, "max": 78.77171325683594, "pos_frac": 0.8203125, "sample": [-8.266420364379883, 19.726318359375, -24.836578369140625, 73.20091247558594, 4.790779113769531, 1.3680839538574219, 17.050724029541016, -13.74261474609375, 24.06817626953125, 24.527076721191406, -3.8202133178710938, 22.98816680908203, -0.5213241577148438, 9.405899047851562, 28.57099151611328, 30.451446533203125, 11.971076965332031, 26.363243103027344, 8.863410949707031, 2.4962024688720703, 3.0303783416748047, 4.675994873046875, 26.409719467163086, 31.12615966796875, 5.975440979003906, 30.489730834960938, 30.14077377319336, -1.215728759765625, 38.183135986328125, 29.85125732421875, -3.390909194946289, 8.731613159179688, 8.541839599609375, 36.42247009277344, -5.9211273193359375, 33.380706787109375, 17.521183013916016, 78.77171325683594, 22.27075958251953, -9.063774108886719, 17.059659957885742, 4.516407012939453, 41.93244552612305, 16.344146728515625, 24.40912628173828, 18.261518478393555, 28.520904541015625, 0.4957427978515625, 28.298599243164062, 29.7613525390625, 22.49386215209961, 26.14996337890625, 12.481056213378906, 27.935760498046875, 7.3398284912109375, 32.596519470214844, 24.290664672851562, 10.055755615234375, 43.19196319580078, -6.777679443359375, 26.74749755859375, 25.908172607421875, 9.721906661987305, 24.684385299682617, 27.832122802734375, 11.529884338378906, 17.04461669921875, 16.20745086669922, 29.111793518066406, 1.3406143188476562, -17.525054931640625, 14.270011901855469, 34.243736267089844, 43.75401306152344, 29.262073516845703, 14.058036804199219, 33.495758056640625, 40.10199737548828, 30.110366821289062, 13.556846618652344, 22.479263305664062, 2.3363189697265625, 10.871932983398438, -4.900629043579102, 10.636581420898438, 10.99493408203125, -0.0330657958984375, 8.839004516601562, 12.459465026855469, -13.072792053222656, 24.48052215576172, 21.8082275390625, -6.217914581298828, 50.029876708984375, -4.9968109130859375, 15.19990348815918, 37.50318908691406, 34.0234375, 20.689468383789062, 11.8941650390625, 18.915481567382812, 13.607574462890625, 30.20240020751953, -11.407630920410156, 19.19488525390625, -0.61285400390625, 17.69024658203125, 4.1579742431640625, 16.056262969970703, 7.688560485839844, -4.607879638671875, 67.81409454345703, 22.868194580078125, -13.041290283203125, -0.37334632873535156, 12.617263793945312, -2.459653854370117, -3.3880157470703125, 2.831207275390625, 5.815704345703125, 26.479888916015625, 30.74449920654297, 14.262069702148438, 16.329477310180664, 6.108573913574219, 4.510650634765625, 35.4532356262207, 39.14677429199219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000274.npy"}
{"epoch": 0.8058823529411765, "step": 275, "batch_size": 128, "mean": 14.982522964477539, "std": 15.511258125305176, "min": -15.579681396484375, "p10": -1.6600500106811522, "median": 12.785867691040039, "p90": 37.32962989807129, "max": 63.5601921081543, "pos_frac": 0.8359375, "sample": [18.33960723876953, 58.40971374511719, 15.137710571289062, 4.834197998046875, 32.479000091552734, 46.060829162597656, 10.596031188964844, 35.68310546875, 22.409446716308594, -10.988487243652344, 4.76141357421875, -5.79937744140625, 34.92718505859375, 30.761003494262695, 7.9718170166015625, 34.79491424560547, 27.980430603027344, 14.833852767944336, 17.36966323852539, 7.568260192871094, 11.51738166809082, 24.0157470703125, 11.803474426269531, -15.579681396484375, 1.41607666015625, 24.682220458984375, 42.213470458984375, -10.34112548828125, -0.6413059234619141, 23.2142333984375, 11.650863647460938, 6.919200897216797, 20.737323760986328, 13.416343688964844, -6.408246994018555, 9.471858978271484, 21.67010498046875, 9.379255294799805, 11.080039978027344, 9.332229614257812, 27.607620239257812, 13.942445755004883, 13.550289154052734, -0.3559150695800781, 37.373355865478516, 8.730697631835938, 5.5109405517578125, 4.985250473022461, -1.4093170166015625, 23.481060028076172, 43.83885192871094, 27.794246673583984, 57.30732727050781, 13.842147827148438, 10.288597106933594, 16.53645133972168, 15.355527877807617, 6.371486663818359, 20.275638580322266, 44.95515060424805, -11.3887939453125, 28.981163024902344, -0.678009033203125, 17.853187561035156, 5.843465805053711, 8.154850006103516, 1.3421401977539062, 32.14482498168945, -8.22802734375, 37.310890197753906, 20.888633728027344, 8.453548431396484, 23.344738006591797, -13.868904113769531, 8.794708251953125, 24.70269012451172, -5.529853820800781, 0.8828125, 15.91168212890625, 10.165241241455078, -2.75616455078125, 7.1476593017578125, 4.380523681640625, 41.68505859375, 9.761222839355469, 25.949058532714844, 29.520591735839844, 9.481765747070312, 19.041305541992188, 25.178321838378906, 0.0237884521484375, 40.918087005615234, -1.8070507049560547, 17.65502166748047, -4.093957901000977, 3.135120391845703, 5.424369812011719, 13.635818481445312, 41.6331787109375, 17.55291748046875, 63.5601921081543, 12.550617218017578, 0.415924072265625, 14.709281921386719, 2.0690364837646484, 5.118896484375, -0.34006500244140625, 20.587993621826172, 15.765625, -1.5970497131347656, -0.884552001953125, 18.002445220947266, 12.284347534179688, 20.404708862304688, 1.3557624816894531, 0.9609184265136719, 16.533203125, 5.81964111328125, 35.955806732177734, 40.853721618652344, 21.820068359375, 26.43425750732422, 13.0211181640625, -0.0200347900390625, -1.8422698974609375, 1.56732177734375, 38.23960876464844, 8.207035064697266], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000275.npy"}
{"epoch": 0.8088235294117647, "step": 276, "batch_size": 128, "mean": 19.417633056640625, "std": 17.880598068237305, "min": -18.770050048828125, "p10": -1.2514747619628899, "median": 15.352045059204102, "p90": 46.82012863159179, "max": 60.342041015625, "pos_frac": 0.859375, "sample": [11.961324691772461, 16.450881958007812, 10.024772644042969, -0.6953582763671875, 42.67913818359375, 49.53135681152344, -1.0124969482421875, 51.52820587158203, 44.717254638671875, 7.70758056640625, 17.369125366210938, 31.947372436523438, 10.355712890625, 21.521013259887695, 31.778427124023438, 54.57652282714844, 34.47886657714844, -1.8090896606445312, 12.11279296875, 17.234375, 31.54991912841797, 7.7202911376953125, 13.5576171875, 0.7430286407470703, 19.2158203125, 0.5277252197265625, -6.84686279296875, 14.9044189453125, 25.48845672607422, 13.24658203125, 27.862943649291992, 4.4170989990234375, 22.520050048828125, 26.438037872314453, 22.787586212158203, 23.592208862304688, -6.3934326171875, 33.51771545410156, 55.45011901855469, 37.041908264160156, 17.4639892578125, 7.7819671630859375, 46.422027587890625, 8.516189575195312, -0.13362503051757812, 35.1585807800293, 13.986822128295898, 13.604026794433594, 11.52691650390625, 5.44677734375, 30.279129028320312, -8.136184692382812, 1.6745223999023438, 49.261863708496094, -18.770050048828125, 9.994140625, 8.372047424316406, 15.691543579101562, 44.11005401611328, 6.066337585449219, 10.651634216308594, 32.01850128173828, -1.9491004943847656, 24.21439552307129, 14.967941284179688, 17.471826553344727, 11.98675537109375, 60.342041015625, 7.05780029296875, 35.74571990966797, 38.15882110595703, 16.01828384399414, 58.17323303222656, 56.950714111328125, 57.384674072265625, -5.500085830688477, -3.6630630493164062, 35.200775146484375, 33.15472412109375, 15.135562896728516, 18.95452880859375, 30.919105529785156, 15.850410461425781, 12.238922119140625, 15.488765716552734, 36.80615234375, 44.09626007080078, 12.363685607910156, 45.173309326171875, 23.98974609375, 5.1093292236328125, 8.944618225097656, -1.9604949951171875, 16.79507827758789, 8.736831665039062, 13.355751037597656, 13.655227661132812, 17.81033706665039, 39.50025177001953, 26.068923950195312, -1.9439849853515625, 15.215324401855469, 47.74903106689453, 18.17877960205078, -14.568672180175781, 11.001256942749023, 14.4061279296875, 32.303131103515625, -0.3011798858642578, 0.29734039306640625, 48.30902099609375, -0.0729522705078125, 52.986671447753906, 9.136199951171875, 25.209259033203125, 28.943206787109375, 13.391983032226562, 58.25947570800781, 6.015464782714844, -3.2478561401367188, -9.180168151855469, 1.057830810546875, 1.9097156524658203, 12.644180297851562, 18.90081024169922, 10.013427734375, 41.820770263671875, 1.48895263671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000276.npy"}
{"epoch": 0.8117647058823529, "step": 277, "batch_size": 128, "mean": 18.010860443115234, "std": 19.021371841430664, "min": -23.44573974609375, "p10": -4.464442634582519, "median": 15.609306335449219, "p90": 42.29140014648438, "max": 95.40031433105469, "pos_frac": 0.84375, "sample": [63.110870361328125, 45.9930419921875, 1.788299560546875, 7.278083801269531, 14.278411865234375, 3.4493446350097656, 21.732437133789062, -2.9676284790039062, 9.513236999511719, -8.080108642578125, 32.173980712890625, -4.343175888061523, 23.419708251953125, 29.069429397583008, -2.8792266845703125, -7.459402084350586, 32.04193115234375, -10.009536743164062, 10.187782287597656, 8.832901000976562, 15.5791015625, 24.863006591796875, 22.31340789794922, -11.424751281738281, 31.60855484008789, -1.4241371154785156, 48.21620178222656, 15.64767837524414, 7.0050201416015625, 23.945098876953125, 32.94743347167969, 95.40031433105469, 15.246767044067383, 8.66604232788086, 17.995014190673828, 6.1090850830078125, 23.77294158935547, 13.883720397949219, -4.885326385498047, 19.95233726501465, 22.338088989257812, 21.09759521484375, 44.99647903442383, 8.356208801269531, 11.514663696289062, 37.435455322265625, -0.23474884033203125, 11.076236724853516, 14.349231719970703, 16.712610244750977, 9.879432678222656, 12.340324401855469, 8.353092193603516, 7.644386291503906, 18.625396728515625, 5.067481994628906, 25.38733673095703, 50.91926574707031, 17.371200561523438, 1.3328399658203125, -6.436271667480469, 15.193214416503906, 1.7482681274414062, 15.280427932739258, 13.252632141113281, 12.185836791992188, 32.9630126953125, 4.14947509765625, 16.353599548339844, 47.91819763183594, 34.70591354370117, 28.873123168945312, 30.133941650390625, -7.1024627685546875, 4.197715759277344, 29.56830596923828, -4.747398376464844, -2.606029510498047, 10.326419830322266, 11.582115173339844, 42.45849609375, 24.667083740234375, 43.75782775878906, 21.739967346191406, 73.07318115234375, 34.76499938964844, 46.813175201416016, -3.414976119995117, 35.87242889404297, 13.596954345703125, -9.528875350952148, 10.176643371582031, 14.362213134765625, -23.44573974609375, 22.168296813964844, 4.078582763671875, 9.991613388061523, 20.919387817382812, 32.20825958251953, 13.236038208007812, 37.69435119628906, 17.74646759033203, 2.4840736389160156, 20.939708709716797, 28.9271240234375, -18.325416564941406, 7.248260498046875, 22.778873443603516, 2.7802200317382812, 38.27740478515625, 29.23836898803711, 21.078567504882812, 3.4046554565429688, 20.18024444580078, 29.603065490722656, 28.99920654296875, 42.21978759765625, 17.01045799255371, 37.982025146484375, -14.667449951171875, -5.6593475341796875, 9.995719909667969, 20.83538818359375, 15.639511108398438, 63.386932373046875, 71.30899047851562, 21.238929748535156, 0.8978958129882812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000277.npy"}
{"epoch": 0.8147058823529412, "step": 278, "batch_size": 128, "mean": 16.900224685668945, "std": 18.088024139404297, "min": -24.77655029296875, "p10": -4.946682739257812, "median": 15.617935180664062, "p90": 41.19759750366211, "max": 77.34213256835938, "pos_frac": 0.8046875, "sample": [25.03131103515625, 10.098526000976562, 18.110687255859375, 28.31280517578125, 18.231719970703125, 38.907470703125, 14.733161926269531, 4.181648254394531, 4.337028503417969, -10.736495971679688, 46.321502685546875, 55.25834655761719, 17.421173095703125, 50.366493225097656, 11.225677490234375, 50.945281982421875, 18.01880645751953, 25.86480712890625, 9.01800537109375, 23.10503387451172, 18.08108139038086, -8.877752304077148, 6.803678512573242, 11.373943328857422, 20.915863037109375, 2.7552413940429688, 33.016387939453125, 34.62078857421875, 41.80815887451172, -3.054086685180664, -9.734443664550781, 15.866922378540039, -5.547637939453125, 11.75775146484375, -13.002716064453125, 47.99950408935547, 26.98797607421875, 25.67386245727539, -3.7891311645507812, 0.42510414123535156, 15.013908386230469, 4.1550445556640625, 40.026039123535156, 12.943763732910156, 35.18644714355469, -14.727828979492188, 0.46282958984375, 33.86590576171875, -12.284374237060547, 33.21210861206055, 38.924652099609375, 37.69417190551758, -4.708322525024414, -2.5380325317382812, 30.03083038330078, 14.407426834106445, 77.34213256835938, 12.33847427368164, 22.028305053710938, 23.199050903320312, 24.168235778808594, -4.855010986328125, -3.4898719787597656, 10.528724670410156, 15.6419677734375, 3.76531982421875, 17.012527465820312, -24.77655029296875, 20.671722412109375, 9.747406005859375, 45.0474739074707, 17.717742919921875, 40.93592834472656, 17.862014770507812, 21.719070434570312, 11.397089004516602, -5.42974853515625, 24.78466033935547, 1.3156509399414062, 1.2916336059570312, -2.111927032470703, -5.16058349609375, 30.01476287841797, 6.284595489501953, 8.696540832519531, 24.602127075195312, 28.645347595214844, -1.951573371887207, 35.85748291015625, -1.788421630859375, 28.871286392211914, 17.492446899414062, 15.593902587890625, 25.37738037109375, 50.93219757080078, 0.5473060607910156, -4.295234680175781, 44.51368713378906, -2.499614715576172, 21.526378631591797, 38.690467834472656, 13.785343170166016, 9.209030151367188, 14.576213836669922, -9.242233276367188, 28.908309936523438, 10.876174926757812, 11.060386657714844, 10.835243225097656, -18.5545654296875, 12.458499908447266, 11.76215934753418, 2.9269561767578125, 31.192291259765625, 38.26134490966797, 25.83165740966797, 5.490446090698242, 9.52020263671875, 44.340904235839844, 19.887699127197266, 31.026885986328125, 54.74925231933594, 12.364824295043945, 42.81974792480469, 26.640838623046875, -0.7399444580078125, -9.324760437011719, 18.265300750732422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000278.npy"}
{"epoch": 0.8176470588235294, "step": 279, "batch_size": 128, "mean": 18.319446563720703, "std": 16.58319091796875, "min": -20.210113525390625, "p10": -1.2756725311279289, "median": 17.044437408447266, "p90": 39.83022232055663, "max": 70.92359924316406, "pos_frac": 0.875, "sample": [4.891853332519531, 48.43977355957031, 18.967010498046875, 10.60318374633789, -0.346038818359375, 9.778493881225586, -2.7432937622070312, 10.150344848632812, 33.597747802734375, 3.376129150390625, 35.27731704711914, 7.020389556884766, 16.077346801757812, 64.25431823730469, 19.98053741455078, 29.867141723632812, 22.59097671508789, 6.718303680419922, 25.670452117919922, -1.8637847900390625, 9.964277267456055, 0.2403564453125, -0.5912017822265625, -2.9211692810058594, 15.817489624023438, 21.996780395507812, 6.011528015136719, 50.192596435546875, 48.37017822265625, 19.060562133789062, 20.779321670532227, 4.538116455078125, 8.300590515136719, 1.6276931762695312, 14.5504150390625, 35.109962463378906, 20.146774291992188, 23.912872314453125, 4.956697463989258, 19.241172790527344, 21.263763427734375, 25.31012725830078, 3.513397216796875, 13.519256591796875, -3.0693626403808594, 34.29029083251953, 12.070446014404297, 16.38507080078125, 10.363883972167969, 3.0068397521972656, 3.8088436126708984, 11.468839645385742, -3.4987869262695312, -3.520843505859375, 63.35821533203125, 32.456634521484375, -6.644756317138672, 8.40521240234375, 1.2934646606445312, 16.140762329101562, 3.4655532836914062, -4.154302597045898, 26.313751220703125, 11.884780883789062, 23.407150268554688, 14.038898468017578, 7.55194091796875, 28.489391326904297, 23.92578125, 15.269804000854492, 18.79920196533203, 19.111103057861328, 18.294845581054688, 12.27883529663086, 38.83544921875, 22.1812744140625, 22.006622314453125, 13.150466918945312, 20.487518310546875, 27.398906707763672, 11.837898254394531, 26.016082763671875, 38.95709991455078, 9.77838134765625, 25.294906616210938, 19.159934997558594, 18.710426330566406, 13.583869934082031, 19.641929626464844, 18.768211364746094, 44.47027587890625, 55.08464050292969, 34.725860595703125, 38.922698974609375, 38.38294982910156, 13.59722900390625, 12.794170379638672, -7.659275054931641, -20.210113525390625, 27.375503540039062, 16.209426879882812, 25.453960418701172, 14.737751007080078, 21.847301483154297, 5.8110809326171875, 18.542068481445312, 34.18883514404297, -6.304195404052734, 12.181720733642578, 18.18321990966797, 17.70380401611328, 28.055984497070312, -4.9413299560546875, -1.0236244201660156, 22.263080596923828, 41.86750793457031, 9.97039794921875, 0.4062004089355469, -5.710243225097656, 70.92359924316406, 22.575759887695312, 4.245960235595703, 50.800079345703125, 3.2001953125, 23.71918487548828, 46.62420654296875, 58.61882019042969, 49.23432159423828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000279.npy"}
{"epoch": 0.8205882352941176, "step": 280, "batch_size": 128, "mean": 15.768138885498047, "std": 16.448890686035156, "min": -25.145599365234375, "p10": -2.810477828979491, "median": 14.088558197021484, "p90": 39.82823791503906, "max": 69.02838134765625, "pos_frac": 0.828125, "sample": [14.714656829833984, 15.740257263183594, 50.613525390625, 21.434417724609375, 9.955513000488281, 19.913818359375, 2.152393341064453, 0.5618209838867188, 57.712799072265625, 32.88287353515625, -1.1796417236328125, 6.360908508300781, 69.02838134765625, 3.4085826873779297, -3.5953750610351562, -2.5250816345214844, -8.445404052734375, 9.366127014160156, 23.80746841430664, 19.088882446289062, -8.540182113647461, -6.761005401611328, 20.921077728271484, 15.041015625, 41.21422576904297, -7.857917785644531, 24.15613555908203, 29.99292755126953, 2.7198410034179688, 20.101211547851562, 33.63624572753906, 39.8037109375, 9.196863174438477, 20.4722900390625, 13.895347595214844, 17.62358856201172, 3.0010910034179688, 12.797502517700195, 42.27668762207031, 39.885467529296875, 16.330055236816406, -9.244171142578125, -12.88040542602539, -1.93292236328125, 7.728599548339844, 35.35948944091797, 13.548728942871094, -0.5088348388671875, 24.941390991210938, 3.5672607421875, 9.35153579711914, 22.905357360839844, 7.5662994384765625, 6.049121856689453, 0.7824058532714844, 17.538333892822266, -3.4764022827148438, 39.99867248535156, 36.47045135498047, 28.70633888244629, 18.590072631835938, 3.7734298706054688, 16.213043212890625, 50.89915466308594, 24.857269287109375, 11.401844024658203, 18.952957153320312, 22.96902084350586, 14.03970718383789, 12.335742950439453, -1.9716262817382812, -25.145599365234375, 52.97566604614258, 39.07853698730469, 11.487640380859375, 16.148849487304688, 14.431583404541016, 11.2452392578125, 11.493011474609375, 54.04064178466797, 11.495258331298828, 19.397754669189453, 23.510536193847656, 2.592498779296875, 8.553607940673828, 41.26426696777344, 36.387664794921875, 21.294403076171875, 10.773462295532227, 39.737457275390625, 14.281852722167969, -4.962944030761719, 14.191471099853516, 28.48969268798828, 3.7441177368164062, 9.128555297851562, 7.167198181152344, 9.261497497558594, 30.331268310546875, 17.975936889648438, 29.850557327270508, -5.545768737792969, 30.708938598632812, 14.137409210205078, 30.70447540283203, 7.2606658935546875, -2.2214279174804688, 11.034698486328125, 9.316162109375, 7.454620361328125, 6.95184326171875, -0.2595367431640625, 15.332683563232422, 0.5264892578125, 15.995128631591797, -6.670722961425781, 46.725372314453125, 43.498779296875, 5.910430908203125, 26.499778747558594, 5.746601104736328, 17.765785217285156, 21.086837768554688, 20.49577522277832, -0.4614830017089844, 4.841651916503906, -1.8233699798583984, -6.346672058105469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000280.npy"}
{"epoch": 0.8235294117647058, "step": 281, "batch_size": 128, "mean": 17.13780975341797, "std": 15.821426391601562, "min": -19.710098266601562, "p10": -0.8791156768798827, "median": 14.371500015258789, "p90": 40.11488494873047, "max": 61.228240966796875, "pos_frac": 0.8828125, "sample": [26.073184967041016, 4.6052398681640625, 7.612483978271484, 2.319082260131836, 6.664215087890625, 19.326370239257812, 12.57427978515625, 25.876304626464844, 24.706703186035156, 8.359169006347656, 58.760658264160156, 23.348350524902344, 36.77869415283203, -6.14263916015625, 10.202743530273438, 30.186477661132812, 7.23858642578125, 26.693771362304688, 26.04840087890625, 12.307075500488281, -4.4398651123046875, 4.27178955078125, 14.279634475708008, 7.0106964111328125, 18.521514892578125, 19.090736389160156, -19.710098266601562, 14.511581420898438, 19.27326202392578, -17.605880737304688, 25.061359405517578, 20.68438720703125, 11.190071105957031, 58.193756103515625, 10.863285064697266, -3.6864700317382812, 31.194259643554688, 24.206802368164062, 22.764358520507812, 17.616920471191406, 13.408241271972656, 8.490951538085938, 53.923583984375, 19.018272399902344, 21.46746063232422, 37.01460266113281, 15.256668090820312, 36.22625732421875, 8.501068115234375, 4.434385299682617, 17.301055908203125, 23.257980346679688, -2.52740478515625, -3.1316661834716797, 9.105766296386719, 61.228240966796875, 5.748018264770508, 24.603736877441406, 14.204151153564453, 5.391395568847656, 13.603851318359375, -8.132591247558594, 30.125076293945312, 4.006858825683594, 13.238426208496094, 41.24690246582031, 7.0314788818359375, 30.429431915283203, 12.949434280395508, -0.8559036254882812, 23.170726776123047, 18.364681243896484, 20.8956298828125, 9.727523803710938, 11.528736114501953, 2.530670166015625, 3.795806884765625, 23.629348754882812, 22.073394775390625, 12.683494567871094, 18.460742950439453, 29.319854736328125, 0.5256576538085938, 58.67585754394531, 16.74347686767578, 22.738082885742188, 6.20556640625, 40.361427307128906, 12.86639404296875, 40.36930847167969, -0.20944976806640625, 4.96588134765625, 34.54302978515625, 10.994796752929688, 8.244270324707031, 5.80999755859375, 8.659568786621094, 12.191436767578125, 15.99267578125, 40.00922393798828, 1.4432373046875, 50.96927261352539, 37.025718688964844, -3.55718994140625, -3.8075790405273438, 37.291622161865234, 42.701385498046875, 15.833099365234375, 23.559158325195312, 6.038055419921875, -0.9332771301269531, 15.479949951171875, -11.519561767578125, 24.613906860351562, 40.640113830566406, 30.46170425415039, -4.127521514892578, 9.092962265014648, 25.899879455566406, 52.07244873046875, 9.809303283691406, 6.318876266479492, 14.634222030639648, 12.32003402709961, 14.46336555480957, 13.170272827148438, 40.66923522949219, 3.811969757080078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000281.npy"}
{"epoch": 0.8264705882352941, "step": 282, "batch_size": 128, "mean": 15.735054016113281, "std": 15.026784896850586, "min": -18.028427124023438, "p10": -4.286013412475584, "median": 15.404757499694824, "p90": 35.197628021240234, "max": 70.80096435546875, "pos_frac": 0.8515625, "sample": [7.395366668701172, 6.0047149658203125, 19.245433807373047, -0.08203125, 30.340084075927734, 3.991119384765625, 26.150497436523438, 6.5464935302734375, 17.35638427734375, 40.70824432373047, 5.702491760253906, 23.143157958984375, 19.6785888671875, -2.155109405517578, -8.2398681640625, 2.31097412109375, -18.028427124023438, 27.574420928955078, 10.666696548461914, 36.68010711669922, 15.246490478515625, -7.3828125, 22.635833740234375, 24.824790954589844, -14.092460632324219, 19.753944396972656, 10.394533157348633, 30.69898223876953, 3.6487655639648438, -11.996810913085938, 17.029823303222656, -2.426239013671875, 14.956268310546875, 33.8079833984375, 16.033790588378906, 12.452159881591797, 4.755149841308594, 19.974090576171875, -5.6383514404296875, 14.645435333251953, 5.232292175292969, -5.8075714111328125, 20.353530883789062, 14.976531982421875, 49.424896240234375, 25.893150329589844, 7.9113616943359375, 21.9692325592041, 22.4713134765625, 22.30109405517578, 70.80096435546875, 21.416473388671875, 31.58251953125, -6.506561279296875, 12.798904418945312, 54.042022705078125, 15.348251342773438, 0.04461860656738281, 13.599334716796875, 7.442237854003906, 23.988304138183594, 3.150056838989258, 11.289833068847656, 4.0587310791015625, 13.292205810546875, 20.0682373046875, 39.4521484375, 8.12542724609375, 19.5963134765625, 17.246246337890625, 8.551692962646484, 7.379474639892578, 2.076751708984375, 5.948997497558594, -11.967323303222656, 10.165496826171875, 21.83422088623047, 12.434921264648438, 11.499839782714844, -5.657899856567383, 10.014640808105469, -3.706439971923828, 17.531982421875, -7.176727294921875, 16.60614013671875, 24.997112274169922, -2.3393707275390625, -7.138156890869141, 34.85618591308594, 36.91520690917969, 13.468177795410156, 23.757835388183594, 32.09625244140625, 2.7856216430664062, 13.683975219726562, 11.607818603515625, 19.436614990234375, 6.6026153564453125, 22.266586303710938, 21.601627349853516, 34.37744140625, 22.289794921875, 41.69799041748047, 20.354637145996094, -6.3909454345703125, 17.526039123535156, 19.6986083984375, 29.49687957763672, 16.828750610351562, 27.11285400390625, 15.976425170898438, 45.7537841796875, 19.87322998046875, 13.333892822265625, 1.4909591674804688, 12.693805694580078, 11.057350158691406, -2.9871368408203125, 16.943607330322266, 15.461263656616211, 25.238479614257812, 44.857391357421875, 40.005165100097656, 41.718414306640625, 35.71216583251953, 34.97711181640625, 22.278196334838867, 6.704010009765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000282.npy"}
{"epoch": 0.8294117647058824, "step": 283, "batch_size": 128, "mean": 18.210716247558594, "std": 18.798873901367188, "min": -23.41896629333496, "p10": -1.8294435501098623, "median": 15.09333610534668, "p90": 46.70518493652343, "max": 72.09693908691406, "pos_frac": 0.8359375, "sample": [-4.474517822265625, 68.7086410522461, 26.81585693359375, 5.5783843994140625, -0.72174072265625, 45.351806640625, 47.89414978027344, -8.305694580078125, -0.6809539794921875, 29.710315704345703, 23.11749267578125, 29.906879425048828, 27.69171142578125, 22.9443359375, 14.813549041748047, 32.78699493408203, 12.349834442138672, 41.53601837158203, -0.390472412109375, 2.91204833984375, 25.867874145507812, 38.384910583496094, 5.1473388671875, 12.168937683105469, 47.886390686035156, 10.075271606445312, 22.936111450195312, -3.316650390625, 26.374053955078125, 4.998601913452148, 13.932647705078125, 21.326080322265625, -3.250682830810547, 20.77313995361328, 6.893455505371094, 39.96473693847656, 5.944286346435547, 27.76519775390625, 16.732221603393555, -1.5635967254638672, 3.2364273071289062, 12.187591552734375, 6.065589904785156, 16.833702087402344, -18.496185302734375, 72.09693908691406, 22.898956298828125, 14.06770133972168, 11.786643981933594, 47.63658142089844, 0.45172119140625, 22.192367553710938, 7.916267395019531, 58.71404266357422, 8.82890510559082, 6.12841796875, 42.07765197753906, 1.993215560913086, 9.762645721435547, 2.863790512084961, 11.245489120483398, 11.128232955932617, 16.756301879882812, 59.11513900756836, 2.5326766967773438, -1.1214752197265625, 24.938873291015625, 28.0986328125, 31.210739135742188, -2.4497528076171875, 19.619552612304688, 20.08344268798828, 47.750030517578125, 7.917881011962891, -1.2520694732666016, 4.558685302734375, 15.767528533935547, 53.12626647949219, 34.25910568237305, 12.560577392578125, 11.943328857421875, 15.373123168945312, 50.022247314453125, 1.6743392944335938, -12.251884460449219, 57.82566452026367, 16.109085083007812, 2.2458267211914062, -23.41896629333496, -2.554576873779297, -0.3455638885498047, 58.808837890625, 14.228687286376953, 23.27307891845703, 1.9992504119873047, 20.451332092285156, -0.6371955871582031, 45.53904724121094, 29.035057067871094, -3.3001022338867188, -4.497344970703125, 26.521867752075195, 8.212875366210938, 31.96832275390625, 8.88812255859375, -13.75086784362793, 34.139259338378906, 27.077056884765625, 15.960283279418945, 25.61212158203125, 11.959236145019531, 46.30601501464844, -3.5099716186523438, 18.191112518310547, 1.4677200317382812, 17.827098846435547, 2.710205078125, 29.14111328125, 41.90966796875, 30.655635833740234, 24.287763595581055, 14.75634765625, 62.347496032714844, 0.5275650024414062, 5.8516387939453125, 22.512847900390625, 14.232166290283203, 21.971939086914062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000283.npy"}
{"epoch": 0.8323529411764706, "step": 284, "batch_size": 128, "mean": 18.444965362548828, "std": 18.262481689453125, "min": -27.150436401367188, "p10": -2.153494644165038, "median": 18.649656295776367, "p90": 43.310781860351554, "max": 66.4655532836914, "pos_frac": 0.828125, "sample": [-10.180732727050781, 16.399139404296875, 32.12718963623047, 48.201416015625, 7.9265594482421875, 19.97669219970703, -3.0010452270507812, 1.3501663208007812, 33.30213165283203, 37.982269287109375, 32.42485809326172, 4.866710662841797, 39.54316711425781, 27.072219848632812, -13.151861190795898, 28.226638793945312, 3.4475440979003906, 1.8202857971191406, 28.465774536132812, 5.174646377563477, 22.7557373046875, 52.55012893676758, 56.194522857666016, 23.842697143554688, 11.448188781738281, -1.5394306182861328, -11.655174255371094, 23.211593627929688, -1.6142635345458984, 45.82142639160156, -9.307655334472656, 13.09747314453125, 39.07068634033203, 19.1710205078125, 33.40745544433594, -1.82159423828125, 31.537673950195312, 31.44512939453125, 54.269386291503906, 30.530426025390625, 7.343280792236328, 19.027240753173828, 31.62505340576172, 4.318824768066406, 6.792243957519531, 22.04509162902832, 30.292882919311523, 54.22453308105469, 23.891006469726562, -27.150436401367188, 2.411865234375, 0.7740631103515625, 6.873771667480469, 18.356258392333984, 9.015327453613281, -5.465614318847656, 2.0388946533203125, 17.65313720703125, -0.015811920166015625, 31.61922836303711, 22.422195434570312, -0.6191444396972656, -7.448633193969727, 27.400299072265625, 5.183803558349609, 5.815502166748047, 66.4655532836914, -0.3157196044921875, 15.477363586425781, 24.0631103515625, 26.995773315429688, 31.111473083496094, 14.429725646972656, 1.3562374114990234, 7.699001312255859, 13.733352661132812, 4.449024200439453, 16.552227020263672, 44.90032958984375, 35.58892059326172, 20.420339584350586, 20.748916625976562, 0.3916015625, 20.82323455810547, 17.34491729736328, 14.798431396484375, -1.73809814453125, 50.197364807128906, 9.41370964050293, 29.52709197998047, 11.056779861450195, 45.164329528808594, 12.864845275878906, -8.111442565917969, 34.76636505126953, 1.6048660278320312, 21.90363311767578, -20.792373657226562, 2.9259109497070312, -1.0099372863769531, 27.972854614257812, 36.91770935058594, -4.476112365722656, 51.767921447753906, 33.984405517578125, 18.94305419921875, 17.257896423339844, 30.689361572265625, -4.675384521484375, 18.087051391601562, 15.265853881835938, 42.629547119140625, 64.34573364257812, -1.6410331726074219, 35.63983154296875, 23.686370849609375, 19.503150939941406, 35.505096435546875, 4.929630279541016, 9.034664154052734, -2.927928924560547, 34.87055206298828, 28.514141082763672, 19.35028076171875, 36.46961975097656, 45.640655517578125, 1.58477783203125, 24.464811325073242], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000284.npy"}
{"epoch": 0.8352941176470589, "step": 285, "batch_size": 128, "mean": 17.206008911132812, "std": 18.113386154174805, "min": -23.118209838867188, "p10": -5.372175216674804, "median": 15.262195587158203, "p90": 41.155566406249996, "max": 78.82217407226562, "pos_frac": 0.8359375, "sample": [-0.6117897033691406, 10.137954711914062, 78.82217407226562, 16.87757110595703, 6.1199951171875, -7.1633758544921875, 9.091062545776367, 31.532577514648438, 6.010219573974609, 45.11744689941406, 3.420511245727539, 11.56988525390625, -21.367660522460938, 26.546432495117188, 11.197542190551758, 9.479068756103516, 24.779617309570312, 12.608671188354492, 21.018516540527344, 20.904033660888672, -1.7222900390625, 22.618467330932617, 55.94831466674805, -14.33526611328125, -23.118209838867188, 26.461456298828125, 8.837203979492188, 14.751373291015625, 9.043722152709961, 11.22146224975586, 33.13848876953125, -2.2489967346191406, -1.9376678466796875, 16.64159393310547, 26.820587158203125, 43.82549285888672, 13.595804214477539, 15.415969848632812, 26.422679901123047, 3.3208465576171875, 2.716032028198242, 37.129058837890625, 48.274017333984375, -11.872055053710938, -9.529312133789062, -13.411277770996094, 5.067867279052734, 20.541976928710938, 35.701690673828125, 12.983112335205078, -5.099033355712891, 43.20021057128906, 19.863296508789062, 7.27192497253418, 26.368000030517578, 31.42861557006836, 23.910247802734375, 38.20038604736328, 12.493080139160156, 23.238906860351562, 12.877849578857422, -6.0095062255859375, 50.47728729248047, 35.947021484375, -0.36767578125, 22.092788696289062, 18.76197052001953, 41.00535583496094, 11.10968017578125, 17.791547775268555, 29.632884979248047, 16.8240966796875, 38.025184631347656, 15.108421325683594, 41.50605773925781, 4.1078338623046875, 2.411151885986328, -18.081634521484375, 4.332698822021484, 12.170421600341797, -12.407958984375, 2.3780250549316406, 19.535308837890625, 43.258827209472656, 3.601806640625, 29.809228897094727, 4.252899169921875, 38.87940216064453, 16.97897720336914, 34.778472900390625, -0.8473548889160156, 8.923294067382812, 35.179534912109375, 25.403152465820312, 24.790019989013672, 21.58294677734375, 69.56719970703125, 42.27110290527344, 29.497623443603516, 61.944183349609375, 45.083282470703125, 15.9749755859375, 12.043119430541992, 17.6937255859375, 12.587377548217773, 21.79070281982422, 7.788362503051758, 6.211250305175781, -12.241806030273438, 15.808151245117188, 8.870613098144531, 8.974441528320312, 11.493183135986328, 14.547195434570312, -9.645050048828125, 32.17671203613281, 29.790634155273438, 16.28052520751953, 30.26024627685547, 22.949813842773438, 1.3135299682617188, -1.4866180419921875, 12.950773239135742, -6.346792221069336, 13.518653869628906, 14.932701110839844, 31.9215087890625, 28.759546279907227], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000285.npy"}
{"epoch": 0.8382352941176471, "step": 286, "batch_size": 128, "mean": 16.586755752563477, "std": 16.947751998901367, "min": -25.732912063598633, "p10": -3.226942443847656, "median": 15.331570625305176, "p90": 38.80232543945312, "max": 70.34774017333984, "pos_frac": 0.8125, "sample": [5.091041564941406, 8.4005126953125, 3.7271766662597656, 29.446178436279297, -11.716472625732422, 11.158016204833984, 3.4574432373046875, 17.573780059814453, 56.47511291503906, 38.38282775878906, 20.033668518066406, 39.112548828125, 44.149147033691406, 59.26996612548828, 30.121353149414062, -1.05303955078125, 30.004226684570312, 40.98455810546875, 29.695846557617188, 13.877239227294922, 8.873764038085938, 22.221343994140625, 36.6443977355957, 20.148303985595703, 0.5345687866210938, 12.936958312988281, -0.3235626220703125, -5.1257476806640625, 40.30735778808594, 5.498771667480469, 14.664260864257812, 18.109397888183594, 23.508182525634766, 24.606399536132812, 41.601409912109375, 21.958351135253906, 19.835847854614258, 3.9049224853515625, 11.873754501342773, 18.229965209960938, -4.619411468505859, 22.257484436035156, 37.36907196044922, 17.32827377319336, 6.904426574707031, 30.596649169921875, 11.885173797607422, 10.893421173095703, 22.270950317382812, 6.457366943359375, 20.048843383789062, 17.143898010253906, 9.75738525390625, 2.1113815307617188, 24.683685302734375, 24.278244018554688, 14.374502182006836, -0.46193695068359375, -7.190883636474609, 5.506778717041016, 4.11724853515625, 18.4947509765625, 15.85693359375, -0.8380279541015625, -3.3717041015625, -0.6395492553710938, -6.4606781005859375, 16.1641845703125, 56.285179138183594, 9.856040954589844, -0.32122802734375, 6.121250152587891, 1.1160049438476562, 42.84199523925781, 8.228609085083008, 38.66937255859375, -0.4940948486328125, 8.659103393554688, 4.6809844970703125, 15.843986511230469, 18.021963119506836, 14.078826904296875, -8.942497253417969, 70.34774017333984, 26.27313995361328, 35.00938415527344, 32.124427795410156, 14.798370361328125, 25.293115615844727, -5.991172790527344, 43.80131530761719, 9.741600036621094, 11.448646545410156, 27.1285400390625, 7.857936859130859, 22.209693908691406, -1.7472076416015625, 14.707664489746094, 0.10974311828613281, -1.150360107421875, 26.336936950683594, 7.409996032714844, 36.71825408935547, 27.82793426513672, 8.094371795654297, -0.6044635772705078, -6.563249588012695, -3.1649017333984375, -15.447746276855469, 26.893783569335938, 25.773284912109375, 14.819154739379883, 30.209915161132812, 28.15958595275879, -25.732912063598633, 44.119300842285156, 4.933866500854492, -14.12811279296875, 21.427490234375, 17.237686157226562, 55.92362976074219, 31.18328094482422, 9.580963134765625, -6.06878662109375, 19.804916381835938, 29.773880004882812, 38.38624572753906, 20.47403335571289], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000286.npy"}
{"epoch": 0.8411764705882353, "step": 287, "batch_size": 128, "mean": 17.606403350830078, "std": 15.254363059997559, "min": -22.351905822753906, "p10": 1.56048698425293, "median": 15.721763610839844, "p90": 38.14481430053711, "max": 58.92572021484375, "pos_frac": 0.921875, "sample": [26.780353546142578, 5.310892105102539, 6.957790374755859, -9.524246215820312, 12.328086853027344, 11.892379760742188, 26.899208068847656, 15.0982666015625, 28.43756103515625, -18.14573097229004, 1.3487091064453125, 8.7154541015625, 15.733047485351562, 20.392501831054688, 10.014762878417969, 37.51982879638672, 5.387992858886719, 22.938072204589844, 30.061317443847656, 1.0971832275390625, 17.85680389404297, -7.83294677734375, 10.4698486328125, -17.29373550415039, 13.391815185546875, 21.821121215820312, 28.498748779296875, 13.046195983886719, -8.568084716796875, 23.0535888671875, 20.344696044921875, 6.586668014526367, 39.21870422363281, 51.144073486328125, -2.661396026611328, 15.816802978515625, -0.08225250244140625, 35.79755401611328, 31.497528076171875, 43.316680908203125, 5.835929870605469, 41.138038635253906, 31.023284912109375, 58.92572021484375, 16.036941528320312, 14.827747344970703, 4.053153991699219, -1.3828277587890625, 19.772140502929688, 32.19734191894531, 30.126934051513672, 7.060293197631836, 6.397407531738281, 5.381229400634766, 7.2576751708984375, 11.708786010742188, 12.506385803222656, 26.913040161132812, 36.14215087890625, 22.295249938964844, 20.125640869140625, 13.814123153686523, 9.491889953613281, 21.580245971679688, 9.042388916015625, 31.014249801635742, 6.043876647949219, 15.710479736328125, 30.924062728881836, 3.345762252807617, -5.639606475830078, 46.961181640625, 11.458581924438477, 20.152267456054688, 16.007461547851562, 53.958824157714844, 9.812969207763672, 12.648284912109375, 12.18682861328125, 38.053245544433594, 1.8067474365234375, 30.27703094482422, 1.6512489318847656, 2.4824066162109375, -22.351905822753906, 19.99163818359375, 25.59709930419922, 15.337684631347656, 14.723075866699219, 5.722564697265625, 39.51643371582031, 14.48324966430664, 2.612680435180664, 48.05451202392578, 20.645965576171875, 30.9449462890625, 8.882476806640625, 19.632293701171875, 20.305992126464844, 19.69580078125, 32.990821838378906, 32.27287292480469, 20.668601989746094, 30.84722900390625, 10.824508666992188, 32.064910888671875, 8.847671508789062, 5.930780410766602, 6.629955291748047, 16.47681427001953, 22.46942138671875, 3.735942840576172, 48.185577392578125, 6.326515197753906, 35.11668395996094, 15.875869750976562, 8.007804870605469, 11.415603637695312, 0.5859603881835938, 6.712818145751953, 19.00879669189453, 53.87968444824219, 13.107608795166016, 20.808334350585938, 45.820518493652344, 38.35847473144531, 15.887147903442383, 11.175365447998047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000287.npy"}
{"epoch": 0.8441176470588235, "step": 288, "batch_size": 128, "mean": 17.188316345214844, "std": 20.182924270629883, "min": -36.000335693359375, "p10": -5.375944519042967, "median": 15.24978256225586, "p90": 43.780997085571286, "max": 91.36233520507812, "pos_frac": 0.84375, "sample": [6.3743743896484375, 23.50809669494629, 49.55381774902344, 6.27618408203125, 41.97217559814453, 16.740436553955078, 14.977745056152344, 26.205425262451172, 9.768157958984375, 28.52124786376953, 16.144105911254883, 42.148475646972656, 23.052305221557617, 7.840301513671875, 63.44615936279297, 6.863780975341797, 28.295909881591797, 25.067771911621094, -9.357391357421875, 2.6512069702148438, 41.69647216796875, 1.4146308898925781, 54.45709991455078, 15.310546875, 19.300338745117188, 16.315582275390625, 12.104623794555664, 64.59185791015625, 10.536422729492188, 6.241607666015625, 43.48188018798828, 15.562234878540039, 24.9415283203125, 25.92582893371582, 8.239606857299805, 4.938831329345703, -11.3306884765625, 25.588058471679688, 10.086227416992188, 33.23260498046875, 15.624536514282227, -4.458324432373047, -6.8444366455078125, 18.57766342163086, 2.4570465087890625, 19.649982452392578, 0.06633186340332031, 38.85441589355469, 14.818099975585938, -5.0549468994140625, -21.741058349609375, 16.941436767578125, 53.62577819824219, -36.000335693359375, 40.043365478515625, 18.133359909057617, 36.885520935058594, 10.037162780761719, 53.83936309814453, 2.108684539794922, -11.535171508789062, 11.545909881591797, 0.6029739379882812, 23.76471710205078, 29.37305450439453, 38.976470947265625, -29.613449096679688, 16.023681640625, -3.7210559844970703, 16.57103729248047, 5.6333770751953125, 26.292648315429688, 24.454986572265625, -2.146686553955078, 34.731895446777344, 21.205535888671875, 23.809555053710938, 43.760250091552734, 10.24871826171875, 49.75035858154297, -5.034942626953125, 3.583282470703125, 5.0363311767578125, 43.82940673828125, 91.36233520507812, 1.5337753295898438, -8.2398681640625, 29.910202026367188, -6.12493896484375, 16.13695526123047, -0.330352783203125, 57.74479675292969, 42.88390350341797, 47.026885986328125, 4.341728210449219, 0.16223907470703125, 5.886081695556641, 8.6146240234375, 6.902366638183594, 56.188934326171875, 15.189018249511719, 11.571197509765625, 21.133056640625, 31.187332153320312, 7.953910827636719, 29.2657470703125, 7.078332901000977, 10.887962341308594, 14.813850402832031, 22.356796264648438, -13.446243286132812, 7.663818359375, -9.79482650756836, 15.128509521484375, 14.239418029785156, 16.015914916992188, 12.182327270507812, -2.8987274169921875, 3.0017242431640625, 21.046852111816406, 29.13916015625, 13.731414794921875, 19.88164520263672, -17.996322631835938, 11.904241561889648, 18.879281997680664, -12.149429321289062, 50.74665069580078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000288.npy"}
{"epoch": 0.8470588235294118, "step": 289, "batch_size": 128, "mean": 18.512102127075195, "std": 16.832256317138672, "min": -24.223670959472656, "p10": -1.5170967102050779, "median": 16.13731575012207, "p90": 40.83524818420409, "max": 62.9071044921875, "pos_frac": 0.875, "sample": [-24.223670959472656, 56.74622344970703, 29.703964233398438, 62.9071044921875, 4.612222671508789, 31.64228057861328, 26.41901397705078, 15.877887725830078, 58.76280212402344, 50.069847106933594, 14.755615234375, 18.57040786743164, 14.227115631103516, 20.291854858398438, -23.53997802734375, 10.539413452148438, 13.015466690063477, 8.05224609375, 21.561439514160156, 15.097564697265625, 45.680152893066406, -2.8046722412109375, 9.198844909667969, 12.557491302490234, 6.86546516418457, 33.23765563964844, 27.014564514160156, 32.06305694580078, 38.30670928955078, -1.43377685546875, 38.80705261230469, 31.2386474609375, 27.667831420898438, 3.749114990234375, 32.36647033691406, 25.302566528320312, 23.300464630126953, 36.100242614746094, 14.233081817626953, 28.374046325683594, 31.64392852783203, 5.479640960693359, 3.4290313720703125, 12.391952514648438, 58.830726623535156, 9.90285873413086, 15.529434204101562, 6.231586456298828, 5.2433013916015625, 7.136699676513672, 23.4156494140625, 17.3590087890625, 4.3724365234375, -1.4009552001953125, -2.5826263427734375, 32.68321990966797, 27.91033935546875, -3.6620559692382812, -5.642333984375, 16.54608917236328, 18.85395050048828, 13.786392211914062, 29.81806182861328, 15.141138076782227, 34.615196228027344, 7.55479621887207, 9.920799255371094, 7.230625152587891, 45.733367919921875, 31.323816299438477, 44.871192932128906, -3.03289794921875, 48.29191207885742, 8.54376220703125, 32.84857177734375, 37.39354705810547, 13.111526489257812, 21.694820404052734, 14.458812713623047, 11.29266357421875, 14.729692459106445, 52.77668762207031, 3.9541473388671875, 43.9715576171875, 9.517427444458008, -0.2456207275390625, -6.442775726318359, -8.850093841552734, 12.272850036621094, 20.145858764648438, 19.06841278076172, 29.330398559570312, 27.819477081298828, 22.02123260498047, 29.155792236328125, 6.185550689697266, 12.710512161254883, 4.485637664794922, 2.5982894897460938, 25.98724365234375, 32.361907958984375, 16.95758819580078, 26.31060791015625, 16.396743774414062, 3.3240528106689453, 10.4017333984375, 39.49111557006836, 3.7257957458496094, -6.9820709228515625, 15.710800170898438, 25.441452026367188, -15.042022705078125, 14.193103790283203, 22.09172821044922, 1.8228302001953125, 4.749809265136719, 50.62104034423828, 20.731971740722656, 22.107460021972656, -1.7115097045898438, 9.506790161132812, 13.36016845703125, 45.515846252441406, 26.000144958496094, 30.381507873535156, -11.534530639648438, 19.77721405029297, 17.46163558959961], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000289.npy"}
{"epoch": 0.85, "step": 290, "batch_size": 128, "mean": 16.68277359008789, "std": 17.429637908935547, "min": -17.159317016601562, "p10": -1.9427928924560538, "median": 13.671488761901855, "p90": 40.395263671875, "max": 76.77743530273438, "pos_frac": 0.828125, "sample": [-7.818178176879883, 17.273733139038086, 10.82135009765625, 20.621910095214844, -7.24383544921875, 15.779998779296875, 22.098838806152344, 19.032913208007812, 16.66638946533203, 42.80950164794922, 39.92853546142578, 2.086162567138672, -14.821578979492188, -4.215383529663086, 13.950651168823242, 16.237625122070312, 6.0672149658203125, 27.482175827026367, 33.935890197753906, 33.188751220703125, 12.016006469726562, 8.236576080322266, 7.433633804321289, 6.08575439453125, 1.674468994140625, 11.965538024902344, 23.33704376220703, 9.308349609375, 12.742820739746094, 13.392326354980469, 10.848041534423828, 11.872879028320312, 21.538986206054688, 35.07355499267578, -14.319589614868164, 8.788171768188477, -17.159317016601562, 10.520774841308594, 30.132225036621094, 14.945198059082031, 13.234508514404297, 16.185916900634766, 12.171968460083008, 4.112773895263672, 28.16793441772461, 0.0255279541015625, 14.278739929199219, -2.5570526123046875, 8.12969970703125, 5.792686462402344, 16.415191650390625, -0.223236083984375, 54.59037780761719, 9.399528503417969, 18.63512420654297, -0.7545394897460938, 42.11729431152344, 12.653482437133789, -1.092355728149414, 12.517778396606445, 6.17913818359375, 50.20941162109375, 4.725746154785156, 29.67449951171875, -6.3065643310546875, 45.92036437988281, -5.9678955078125, 7.9004364013671875, -4.065948486328125, 16.898399353027344, 40.68450927734375, 51.1285400390625, 18.79129409790039, -0.9329662322998047, 56.56791687011719, 26.70928955078125, 11.888057708740234, 40.32417297363281, 36.590667724609375, 18.574787139892578, 2.73138427734375, 0.9853973388671875, 76.37678527832031, 45.530006408691406, 18.710250854492188, -10.801225662231445, 25.581809997558594, 10.960456848144531, 4.4317626953125, 27.048126220703125, 39.44891357421875, -0.917510986328125, 31.704620361328125, 50.015541076660156, 9.233016967773438, 9.881107330322266, 10.067014694213867, 25.42822265625, -0.19561195373535156, 15.68182373046875, 76.77743530273438, 17.885284423828125, -1.0006637573242188, 40.56114196777344, 24.647628784179688, 22.703594207763672, 20.211318969726562, 21.96859359741211, 36.10353088378906, 3.5483779907226562, 16.36444091796875, 27.116317749023438, 9.46533203125, 22.55152130126953, 3.91326904296875, 14.905303955078125, 6.050453186035156, 12.313251495361328, -11.759023666381836, -7.408985137939453, 8.364799499511719, 31.975921630859375, -1.6795387268066406, -1.645151138305664, 17.61703109741211, 35.45257568359375, 32.43378448486328, 32.4443244934082], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000290.npy"}
{"epoch": 0.8529411764705882, "step": 291, "batch_size": 128, "mean": 16.446044921875, "std": 16.782747268676758, "min": -15.983222961425781, "p10": -3.1168380737304684, "median": 12.440620422363281, "p90": 37.95186538696288, "max": 61.986663818359375, "pos_frac": 0.828125, "sample": [24.396835327148438, 1.9814910888671875, 22.62613296508789, -4.367515563964844, 8.303939819335938, 9.703475952148438, -10.532424926757812, 11.274425506591797, 51.89398193359375, 25.06775665283203, 36.573143005371094, 45.98480224609375, -1.4366073608398438, 34.58821487426758, -0.18696212768554688, 24.04743194580078, -0.5372543334960938, 17.423683166503906, 43.485137939453125, -3.06915283203125, 32.78511428833008, 25.23798370361328, -6.106224060058594, 32.986671447753906, 34.58952713012695, -6.1601104736328125, 7.565990447998047, -6.924007415771484, 5.073451995849609, -7.097755432128906, 19.143211364746094, 21.294952392578125, 0.3749198913574219, 13.631168365478516, 3.7548980712890625, 51.93623733520508, 27.17432403564453, 21.965131759643555, 10.353302001953125, 4.480674743652344, 11.410003662109375, 5.023124694824219, 41.16888427734375, 10.350364685058594, 26.718605041503906, 22.410751342773438, -4.9003753662109375, 47.79876708984375, 20.69240951538086, 11.048168182373047, -1.2010269165039062, 11.164451599121094, 31.59278106689453, 3.7071304321289062, 19.022857666015625, 12.822563171386719, 7.143459320068359, 13.914581298828125, 7.2989959716796875, -9.87823486328125, 16.65265655517578, 58.73971176147461, 13.80804443359375, 11.245048522949219, -5.3309783935546875, 17.6981201171875, 9.582748413085938, 17.329689025878906, 51.21099853515625, -2.2680816650390625, 10.182884216308594, 30.382598876953125, 2.878355026245117, 12.387237548828125, 18.171875, 29.682388305664062, 12.494003295898438, 7.663108825683594, 13.979820251464844, 6.925640106201172, 26.870479583740234, 3.9430503845214844, 4.1102294921875, 5.40728759765625, 8.033134460449219, -15.983222961425781, 16.631813049316406, 34.28715515136719, 10.218219757080078, 7.461418151855469, 1.0908832550048828, 59.105010986328125, 45.943939208984375, 33.46392822265625, -4.411966323852539, -6.492168426513672, 5.4760284423828125, 25.31622314453125, 7.481025695800781, 30.81836700439453, 3.4808197021484375, 28.037731170654297, 21.47716522216797, 35.203697204589844, 1.7304229736328125, 61.986663818359375, 22.398975372314453, 9.826065063476562, 27.32611083984375, 34.150543212890625, 32.597068786621094, 29.99054718017578, 0.45735931396484375, 9.616531372070312, 56.17576599121094, 31.41064453125, -2.108856201171875, 25.336631774902344, -3.2281036376953125, -1.5784015655517578, 15.897140502929688, 8.070175170898438, 27.461044311523438, 6.763694763183594, 44.03565979003906, -0.7728862762451172, 11.411788940429688, 25.16482925415039], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000291.npy"}
{"epoch": 0.8558823529411764, "step": 292, "batch_size": 128, "mean": 17.663528442382812, "std": 18.860416412353516, "min": -26.674285888671875, "p10": -1.7636997222900384, "median": 15.190528869628906, "p90": 44.52255401611328, "max": 121.99057006835938, "pos_frac": 0.8671875, "sample": [14.265380859375, 0.949798583984375, -7.490325927734375, 15.283065795898438, 4.0069122314453125, 2.5600433349609375, 21.833580017089844, 11.794944763183594, 23.66168212890625, -9.396820068359375, 30.016250610351562, -7.639232635498047, 9.737419128417969, 4.660572052001953, 18.36033058166504, -19.44872283935547, 25.781383514404297, 37.67671203613281, 7.977447509765625, 37.677093505859375, 13.376140594482422, 1.9007568359375, 31.63385009765625, 10.86093521118164, -9.731468200683594, 6.245872497558594, 24.176979064941406, 36.06019592285156, 1.4935684204101562, 23.455799102783203, 17.588119506835938, -2.255889892578125, 121.99057006835938, 11.158180236816406, 7.642578125, -0.5664939880371094, 6.4039764404296875, 17.807388305664062, 14.265289306640625, -5.9123382568359375, 1.4295482635498047, -7.69232177734375, 15.694427490234375, 16.33069610595703, -26.674285888671875, 8.281234741210938, 7.732593536376953, 10.770233154296875, -6.061676025390625, 47.68152618408203, 18.182655334472656, 10.24985122680664, 16.864646911621094, 1.9937515258789062, 48.72869873046875, 1.912200927734375, 32.63160705566406, 13.127843856811523, 24.95294189453125, 44.31968688964844, 46.070556640625, 28.854110717773438, 8.597480773925781, 20.478195190429688, 15.355850219726562, 10.984367370605469, 8.053524017333984, 9.463886260986328, 11.113922119140625, 9.99444580078125, 24.00677490234375, 24.594696044921875, -0.8590164184570312, 46.91767883300781, 55.55662536621094, 13.519546508789062, 19.43706512451172, 1.7362823486328125, 18.950927734375, 39.178802490234375, 19.70410919189453, -4.8096466064453125, 30.687271118164062, 48.46142578125, 7.630744934082031, 28.35852813720703, 1.8935661315917969, 7.1951446533203125, 0.7521572113037109, 32.42401123046875, 24.542030334472656, 27.711807250976562, 45.86682891845703, 44.99591064453125, 17.50210952758789, 51.824371337890625, 9.860969543457031, 4.187046051025391, 2.89727783203125, -1.5527610778808594, 21.18609619140625, 41.60630416870117, 29.578529357910156, 48.04010009765625, 11.148895263671875, -5.369926452636719, 47.84796142578125, 24.15399932861328, 22.763656616210938, 32.0567626953125, 18.345836639404297, -2.2681884765625, 15.097991943359375, 11.802469253540039, 7.133182525634766, 53.796844482421875, 7.063446044921875, 20.022127151489258, 35.029876708984375, 28.56591033935547, 1.3194007873535156, 13.637496948242188, 31.250205993652344, 19.959945678710938, 25.0398006439209, 18.944915771484375, 41.046630859375, -0.32465553283691406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000292.npy"}
{"epoch": 0.8588235294117647, "step": 293, "batch_size": 128, "mean": 16.42249298095703, "std": 18.116437911987305, "min": -27.62608528137207, "p10": -5.275712966918945, "median": 14.376519203186035, "p90": 38.4345947265625, "max": 79.73979187011719, "pos_frac": 0.8515625, "sample": [7.77220344543457, 10.685379028320312, 51.07155990600586, 30.387954711914062, 2.2072067260742188, 52.6258544921875, 31.39484214782715, -5.438640594482422, 33.822059631347656, 16.68297576904297, 69.85011291503906, 29.694015502929688, -3.8305587768554688, 56.09410095214844, 4.531890869140625, 19.785491943359375, -8.954254150390625, -8.184127807617188, 31.525733947753906, 23.805706024169922, 11.875518798828125, -22.98273468017578, 17.394058227539062, 42.259246826171875, 19.9140625, 14.317686080932617, -8.686233520507812, -3.1090316772460938, 1.8891124725341797, 1.7254104614257812, 6.5403289794921875, 79.73979187011719, 11.18368911743164, 11.458385467529297, 17.920949935913086, 10.149085998535156, 2.695953369140625, 28.105010986328125, 32.05908966064453, 47.13227081298828, 10.2679443359375, 22.639816284179688, 26.925098419189453, 18.52447509765625, 33.803497314453125, 38.757080078125, 12.030319213867188, -0.622344970703125, 20.75463104248047, 41.43736267089844, 35.127593994140625, -12.885257720947266, 26.463668823242188, 16.71947479248047, 5.0196533203125, -0.3306732177734375, 28.816001892089844, 10.47134780883789, 31.67591094970703, -9.772369384765625, 1.01123046875, 16.381258010864258, 10.159210205078125, 28.65782928466797, 70.7359619140625, 6.773170471191406, 13.594635009765625, 13.745460510253906, 15.290138244628906, 11.62820053100586, 3.7229137420654297, 18.115686416625977, 21.672500610351562, -10.952716827392578, 12.088741302490234, 3.010255813598633, 14.435352325439453, 10.357255935668945, 38.29638671875, 31.284286499023438, 7.825157165527344, 8.712820053100586, 15.267078399658203, 17.339828491210938, 8.01629638671875, -5.9909515380859375, 4.3218994140625, 20.904701232910156, 43.34380340576172, 7.795967102050781, 1.8091201782226562, 28.27960968017578, -10.919143676757812, 19.167434692382812, 42.78166198730469, 16.57623291015625, 10.53445053100586, 20.318702697753906, 8.352005004882812, 0.37534332275390625, 14.286941528320312, 30.889083862304688, -5.022891998291016, 20.416404724121094, 30.99542236328125, -7.8811798095703125, 25.098285675048828, 12.286758422851562, -5.2058868408203125, 10.5628662109375, 19.68958282470703, 13.697868347167969, 45.76934814453125, 13.378616333007812, 32.7760009765625, 18.38995361328125, -24.782913208007812, -27.62608528137207, 15.778177261352539, 13.79599380493164, 33.9283561706543, 31.074554443359375, 4.385707855224609, 1.29443359375, 21.24175262451172, 18.761253356933594, 9.463851928710938, 16.88262176513672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000293.npy"}
{"epoch": 0.861764705882353, "step": 294, "batch_size": 128, "mean": 17.321956634521484, "std": 18.70383071899414, "min": -22.137298583984375, "p10": -3.177789878845214, "median": 14.061280250549316, "p90": 45.7169219970703, "max": 65.67440795898438, "pos_frac": 0.8671875, "sample": [24.173187255859375, 44.689727783203125, 13.735401153564453, -3.017007827758789, 27.761016845703125, 0.44632720947265625, 15.105125427246094, -0.6679801940917969, 2.917724609375, -5.4105224609375, 18.552734375, 19.564619064331055, 13.922508239746094, 3.33978271484375, 13.019905090332031, 4.76812744140625, 0.46178436279296875, 24.340057373046875, 53.735130310058594, 7.676885604858398, 9.557657241821289, -12.728118896484375, 9.6236572265625, 1.7925567626953125, 23.470703125, 25.48406219482422, 59.09898376464844, 21.45953369140625, 39.20989990234375, 24.785789489746094, 26.231475830078125, 31.22845458984375, 12.125665664672852, 30.930355072021484, 6.126319885253906, 25.109085083007812, 42.53263854980469, 4.671991348266602, 18.780317306518555, 18.51611328125, 37.84370422363281, 48.69688415527344, 1.2584590911865234, 42.97425842285156, 14.156845092773438, 7.085502624511719, 10.987892150878906, 20.277061462402344, -2.843585968017578, 58.67393112182617, -5.878259658813477, 48.11370849609375, 13.730789184570312, 23.32318115234375, 4.017063140869141, 2.1747779846191406, 40.79302215576172, 5.9647674560546875, 11.900924682617188, 0.9520702362060547, 3.7985992431640625, -3.552947998046875, 25.19176483154297, 30.07659912109375, 4.72003173828125, -7.578094482421875, 5.498374938964844, 7.354022979736328, 29.981239318847656, 28.315322875976562, 11.648475646972656, 14.133020401000977, -3.8853912353515625, 36.28511047363281, -8.182865142822266, 21.95714569091797, 9.213531494140625, 16.107101440429688, 21.686798095703125, 18.41503143310547, 1.774261474609375, 38.725868225097656, 35.47090148925781, 2.6766815185546875, 21.56995391845703, 13.989540100097656, 5.6914825439453125, 50.31398010253906, -22.137298583984375, 13.264198303222656, 1.3530654907226562, 60.503875732421875, 58.568603515625, 3.9524688720703125, 18.66460418701172, 30.675350189208984, 32.18603515625, 53.48882293701172, -11.326606750488281, 21.132423400878906, -6.7987213134765625, 6.723236083984375, -5.402688980102539, 1.1843719482421875, -21.1990966796875, 16.422683715820312, 57.91596984863281, 15.730537414550781, -4.67877197265625, 4.568586349487305, 2.721712112426758, 4.491430282592773, 58.717620849609375, 23.159841537475586, 19.7718505859375, 16.65142822265625, 24.266441345214844, -1.685953140258789, 9.450294494628906, 65.67440795898438, 24.297119140625, 3.4281044006347656, 0.31271839141845703, 22.872802734375, 18.909988403320312, 10.830162048339844, 63.357643127441406, 12.471172332763672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000294.npy"}
{"epoch": 0.8647058823529412, "step": 295, "batch_size": 128, "mean": 20.575740814208984, "std": 17.009008407592773, "min": -12.141204833984375, "p10": -0.5703067779540999, "median": 20.56558609008789, "p90": 45.17715835571289, "max": 82.04963684082031, "pos_frac": 0.890625, "sample": [25.62261962890625, 41.05842590332031, 10.048599243164062, -1.8993492126464844, 22.352920532226562, 0.9155235290527344, 6.626087188720703, 31.510013580322266, 43.558998107910156, -5.3941650390625, 26.774444580078125, 27.936119079589844, 10.72629165649414, 3.070770263671875, -12.141204833984375, 33.65294647216797, 32.23902130126953, 33.81694030761719, 82.04963684082031, 21.29400634765625, 30.30780029296875, -0.09467315673828125, 23.41204833984375, 5.90754508972168, 46.20262145996094, 45.786651611328125, 50.4454345703125, -9.65625, 25.211788177490234, 18.4354248046875, 3.5292129516601562, 22.75696563720703, 38.809326171875, -9.367088317871094, 25.53321075439453, 22.531036376953125, 31.071365356445312, 5.458911895751953, 17.029842376708984, 58.40869140625, 19.085472106933594, -4.048484802246094, 17.666244506835938, 22.536502838134766, 13.54351806640625, 18.343669891357422, 5.895111083984375, 23.898284912109375, 35.563026428222656, -2.7625274658203125, 18.172576904296875, 13.002918243408203, 48.22057342529297, 8.661027908325195, 37.33929443359375, 22.59259033203125, 11.174789428710938, 35.207305908203125, 4.554525375366211, 23.502044677734375, 51.8381462097168, 13.999382019042969, -10.753913879394531, 30.56924819946289, 2.9217453002929688, 47.46644592285156, 25.30066680908203, 5.327707290649414, 7.875434875488281, 7.646335601806641, 21.74353790283203, 10.625213623046875, 14.267648696899414, -6.271026611328125, 27.775741577148438, 26.20855712890625, 28.40157699584961, 20.287033081054688, 15.399362564086914, 27.690399169921875, 14.582305908203125, 50.941650390625, 14.502853393554688, 23.764793395996094, 39.98060607910156, 16.503498077392578, 25.76258087158203, 22.38888168334961, 5.274909973144531, 59.80539321899414, 26.605751037597656, 46.642662048339844, 26.03228759765625, 29.86712646484375, 7.2434234619140625, 25.396987915039062, 41.99542999267578, 16.382171630859375, -7.341091156005859, -6.47838020324707, 12.869808197021484, 33.71586608886719, 36.33656311035156, 12.599658966064453, -1.6801185607910156, 4.435138702392578, 31.602577209472656, 29.83441925048828, 56.561126708984375, 4.91380500793457, -6.3825225830078125, 44.91594696044922, 7.079093933105469, 28.610816955566406, 10.758613586425781, 13.15106201171875, 3.7910194396972656, 20.0531005859375, 4.655185699462891, 7.795219421386719, 11.510723114013672, 27.463851928710938, 17.614173889160156, 8.87225341796875, 23.278076171875, 20.844139099121094, 14.971031188964844, 47.66425323486328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000295.npy"}
{"epoch": 0.8676470588235294, "step": 296, "batch_size": 128, "mean": 16.01596450805664, "std": 16.1382999420166, "min": -46.014312744140625, "p10": -3.182512664794922, "median": 16.239513397216797, "p90": 36.15632095336914, "max": 52.25035858154297, "pos_frac": 0.84375, "sample": [11.31298828125, 24.327850341796875, 47.534461975097656, 26.59979248046875, 38.535125732421875, 0.9018173217773438, 19.462642669677734, 9.827682495117188, 20.15093231201172, 33.244171142578125, 13.6607666015625, 7.801826477050781, 11.036849975585938, 18.687118530273438, -3.0971202850341797, 6.294801712036133, 16.945068359375, -16.467281341552734, 20.370004653930664, 16.89013671875, 5.0382537841796875, 7.2215576171875, 24.142074584960938, 7.561624526977539, 10.989898681640625, 26.583984375, 24.533348083496094, 25.600200653076172, -2.9532546997070312, 1.3135833740234375, 51.4815673828125, -6.412078857421875, 6.7347259521484375, 48.64453125, 11.109588623046875, 18.830718994140625, 23.641700744628906, 18.350540161132812, 21.71208953857422, 41.15167999267578, 31.833969116210938, 6.669118881225586, -3.3817615509033203, 37.130157470703125, 23.215003967285156, -1.0616912841796875, -2.0023880004882812, 20.141128540039062, -13.749740600585938, 22.67758560180664, 37.784183502197266, 47.176414489746094, 8.4512939453125, 9.022773742675781, 45.2022705078125, 14.437129974365234, 32.833526611328125, -10.872444152832031, 15.037513732910156, 9.245155334472656, 17.485015869140625, 0.230194091796875, 38.491912841796875, 17.65320587158203, 35.81041717529297, 8.366561889648438, -7.898050308227539, 25.056350708007812, -2.268218994140625, -3.7297744750976562, 36.27227020263672, 15.371734619140625, 25.233985900878906, 26.25314712524414, 32.05998992919922, 52.25035858154297, 17.08590316772461, 6.315685272216797, 13.633232116699219, 29.591995239257812, 23.188827514648438, 7.51422119140625, 6.7122650146484375, 7.1148834228515625, 17.10693359375, 14.852958679199219, 4.31591796875, 36.10662841796875, 16.32115936279297, 32.782508850097656, 3.1364364624023438, 28.3282470703125, 17.704452514648438, 11.3951416015625, -11.527030944824219, 32.018699645996094, 50.103580474853516, 25.894989013671875, 15.511123657226562, -1.3338394165039062, -4.632333755493164, 16.157867431640625, 0.9335746765136719, -13.674293518066406, 9.290943145751953, 5.598920822143555, 6.039112091064453, 31.765884399414062, 34.90951919555664, 31.77115249633789, 3.045013427734375, -0.9891281127929688, 24.77281379699707, 15.514335632324219, 25.336410522460938, 26.010055541992188, 9.223762512207031, -12.22784423828125, 21.82250213623047, -7.375373840332031, 10.907997131347656, 13.182655334472656, 33.077720642089844, 7.807151794433594, -46.014312744140625, 22.056182861328125, 27.783767700195312, 18.350149154663086], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000296.npy"}
{"epoch": 0.8705882352941177, "step": 297, "batch_size": 128, "mean": 16.948570251464844, "std": 18.08913803100586, "min": -35.21858215332031, "p10": -3.9681999206542966, "median": 16.358417510986328, "p90": 41.151729583740234, "max": 64.57318115234375, "pos_frac": 0.8203125, "sample": [-2.9105072021484375, -7.199127197265625, -12.669700622558594, -9.051090240478516, 5.1211395263671875, 2.8129310607910156, 26.427059173583984, 13.530715942382812, 46.32893371582031, 36.777549743652344, 27.595951080322266, 14.860851287841797, 27.82508659362793, 19.925880432128906, 1.9768447875976562, 14.139694213867188, -1.5502128601074219, 3.9766082763671875, 23.165611267089844, 30.514617919921875, 10.3038330078125, 29.25525665283203, 5.91900634765625, -0.1826457977294922, 36.46941375732422, 26.32849884033203, -0.8070297241210938, 11.05303955078125, 38.801856994628906, 42.092742919921875, 33.29719543457031, 12.555381774902344, 47.458473205566406, 34.614044189453125, 2.80859375, 48.7579345703125, 5.287635803222656, 4.457244873046875, 7.052520751953125, 14.183324813842773, 23.43779182434082, 52.09449768066406, 13.49755859375, 27.936420440673828, 0.9818572998046875, 7.386598587036133, -6.115446090698242, 40.780494689941406, -33.65226745605469, 16.642181396484375, -14.783119201660156, 19.740272521972656, 19.917367935180664, -12.223960876464844, 14.958711624145508, 6.617790222167969, 42.0179443359375, -1.4430198669433594, -7.234014511108398, 15.523475646972656, -4.183441162109375, -6.975555419921875, -1.561431884765625, 18.51751708984375, 42.754974365234375, 3.7081832885742188, 2.18475341796875, 1.1267375946044922, 5.74920654296875, 22.554027557373047, 47.450382232666016, 8.47750473022461, 29.868316650390625, 36.04290008544922, -2.4961166381835938, -35.21858215332031, 24.677261352539062, 16.79766845703125, -4.492591857910156, 38.47606658935547, 11.458511352539062, 17.72412109375, 38.729522705078125, 13.592430114746094, 14.567184448242188, -1.30010986328125, 30.303573608398438, -21.755905151367188, 5.421150207519531, 18.062240600585938, -2.1280517578125, 3.8861083984375, 22.819541931152344, 12.75885009765625, 43.77993392944336, 2.1023101806640625, 16.57880401611328, 20.248172760009766, 30.58775520324707, 39.03932189941406, 50.61852264404297, 23.453651428222656, 18.315122604370117, 14.15032958984375, 9.559072494506836, 32.23600769042969, -3.8759536743164062, 30.70207977294922, 21.864402770996094, 28.219322204589844, 9.726062774658203, 25.21627426147461, 1.8466262817382812, 25.510894775390625, 22.179058074951172, 31.62889862060547, 27.761775970458984, 42.66950607299805, 30.893672943115234, 32.6988525390625, 21.0771484375, 39.41615295410156, 50.896507263183594, 12.302139282226562, 64.57318115234375, 2.2402095794677734, 16.138031005859375, 30.079971313476562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000297.npy"}
{"epoch": 0.8735294117647059, "step": 298, "batch_size": 128, "mean": 17.67483139038086, "std": 18.151256561279297, "min": -18.758167266845703, "p10": -1.591040229797361, "median": 14.731271743774414, "p90": 43.343933486938475, "max": 93.01943969726562, "pos_frac": 0.8671875, "sample": [2.4702281951904297, 11.222049713134766, 9.327869415283203, 12.31254768371582, 0.2195453643798828, 33.54994201660156, 3.8669605255126953, 33.480804443359375, 21.767681121826172, 93.01943969726562, 27.611427307128906, 1.8434944152832031, 7.833446502685547, 19.276702880859375, 18.858963012695312, 17.547637939453125, 14.729267120361328, 5.46044921875, 25.05926513671875, 12.267559051513672, -3.4369964599609375, 26.35772705078125, 25.32770538330078, 28.63172149658203, 24.276473999023438, 31.011428833007812, -0.35604095458984375, 50.885223388671875, -3.4444122314453125, 8.206199645996094, 23.636676788330078, 22.18143081665039, -3.116537094116211, 33.304443359375, 9.7449951171875, 12.70684814453125, 14.005144119262695, 50.00532531738281, -0.5086116790771484, 19.665740966796875, -4.212730407714844, 5.0399932861328125, 11.419380187988281, 20.957338333129883, 7.2521514892578125, 23.786022186279297, -9.784317016601562, 16.79409408569336, 26.040245056152344, 5.1218719482421875, 19.686500549316406, 0.44310760498046875, 2.417388916015625, 17.67538833618164, 10.239635467529297, -3.1410980224609375, 32.37633514404297, 8.027297973632812, 2.7466506958007812, 1.6125030517578125, 31.0941162109375, 37.64423370361328, 30.315261840820312, 44.9122314453125, -7.115814208984375, 0.083740234375, 40.08797073364258, -6.754203796386719, 8.917678833007812, 47.89613723754883, -0.331024169921875, 15.41925048828125, 19.0009765625, 3.2645301818847656, 14.979644775390625, 9.447677612304688, -4.560277938842773, 63.65559387207031, 38.44927215576172, 17.94611358642578, -9.695953369140625, 32.528656005859375, 6.4366302490234375, 1.3502044677734375, 9.651687622070312, 15.370281219482422, 21.163589477539062, -15.352563858032227, 19.408550262451172, 36.84723663330078, 8.558235168457031, 4.708738327026367, -4.412347793579102, 37.87457275390625, 1.5844535827636719, -18.758167266845703, 26.67388916015625, 0.6069355010986328, 61.635589599609375, 24.725955963134766, -0.937255859375, 36.198036193847656, 58.975337982177734, 51.66038513183594, 11.390592575073242, 46.461334228515625, 4.5220794677734375, 11.046024322509766, 28.52910614013672, 5.147071838378906, 13.967605590820312, 10.962301254272461, 19.78676986694336, 8.16592788696289, 15.546180725097656, 16.12103271484375, 34.07997131347656, 42.888633728027344, 7.895454406738281, 14.7332763671875, 17.954452514648438, 9.28378677368164, 44.40629959106445, 58.409339904785156, 47.80912780761719, 12.357831954956055, 23.864200592041016, 10.586551666259766], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000298.npy"}
{"epoch": 0.8764705882352941, "step": 299, "batch_size": 128, "mean": 15.682223320007324, "std": 16.013458251953125, "min": -13.677703857421875, "p10": -3.5863235473632797, "median": 15.407928466796875, "p90": 37.744432830810545, "max": 57.0592041015625, "pos_frac": 0.84375, "sample": [19.378341674804688, 20.793909072875977, 17.77045440673828, 31.09777069091797, 0.051788330078125, 35.004913330078125, 5.060951232910156, 37.250816345214844, 21.37102508544922, 22.589038848876953, 41.436370849609375, 42.79185485839844, 18.410749435424805, 20.310531616210938, 7.533145904541016, 11.551055908203125, 12.775444030761719, 4.6006317138671875, 11.161121368408203, 27.564186096191406, 22.340133666992188, 3.6577301025390625, 4.468437194824219, 2.8127479553222656, 6.107673645019531, 31.898406982421875, 21.505081176757812, 0.4790496826171875, -11.546009063720703, 35.931915283203125, 31.1278076171875, 0.07806396484375, 13.194786071777344, -0.6335906982421875, 1.5486640930175781, 17.522064208984375, -13.646820068359375, -4.503538131713867, 20.795257568359375, 5.702869415283203, 42.35404968261719, 8.716873168945312, -7.955657958984375, 2.7600326538085938, 21.927642822265625, -8.360115051269531, -2.1881446838378906, -12.474832534790039, 20.508298873901367, 32.361412048339844, -0.144866943359375, 38.45526885986328, -9.716987609863281, 26.787456512451172, 6.691749572753906, 20.727127075195312, 43.5185546875, 0.5414657592773438, 44.71443176269531, 37.439788818359375, 40.03871154785156, 13.801544189453125, -2.540454864501953, 36.27306365966797, 30.926055908203125, 15.5087890625, 14.999347686767578, 55.53398895263672, -13.677703857421875, 44.572357177734375, 25.444595336914062, 13.567550659179688, 9.308021545410156, 2.0925827026367188, 30.428543090820312, 18.68766212463379, 2.8428268432617188, 24.505943298339844, 44.90765380859375, 41.32520294189453, 13.847126007080078, 7.285255432128906, 0.37425994873046875, 1.8354721069335938, 17.952194213867188, 6.4342498779296875, 10.634613037109375, 15.33868408203125, 15.036291122436523, 21.85535430908203, 15.4771728515625, 2.8766937255859375, 31.41124725341797, 29.64618682861328, 16.753204345703125, 8.505937576293945, 1.0998916625976562, 16.08725357055664, -0.031406402587890625, -6.009304046630859, -11.970561981201172, 19.200607299804688, 18.14971923828125, -3.1932315826416016, 32.092750549316406, 16.477031707763672, 57.0592041015625, 3.853382110595703, -11.786117553710938, 6.587085723876953, 2.26837158203125, 23.697612762451172, -2.562175750732422, 14.230842590332031, 1.3720779418945312, 26.703506469726562, -5.23365592956543, 28.353538513183594, 27.823287963867188, 8.98775863647461, 21.729568481445312, 48.85826110839844, 29.688125610351562, 31.317245483398438, 18.16443634033203, -7.19837760925293, 22.0552978515625, 11.633964538574219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000299.npy"}
{"epoch": 0.8794117647058823, "step": 300, "batch_size": 128, "mean": 16.3056640625, "std": 15.564932823181152, "min": -37.317970275878906, "p10": -1.7164447784423822, "median": 14.351016998291016, "p90": 34.06666107177734, "max": 65.07376098632812, "pos_frac": 0.859375, "sample": [43.22980880737305, 3.738555908203125, 9.590692520141602, -16.961326599121094, 30.351722717285156, 63.10418701171875, 27.62091064453125, 31.301864624023438, 13.785881042480469, 18.29167938232422, 8.918428421020508, 20.274070739746094, 13.145614624023438, 21.017440795898438, 18.41427230834961, -2.714458465576172, 16.790122985839844, 17.424957275390625, 15.558029174804688, 31.5330810546875, -5.2725677490234375, 4.3128662109375, 17.581161499023438, 12.848808288574219, 32.53348159790039, 32.155357360839844, 21.514999389648438, 3.1845016479492188, 2.6935958862304688, -5.540718078613281, 29.34552764892578, 26.10633087158203, 4.7999420166015625, 6.1210784912109375, 65.07376098632812, 33.532386779785156, 4.197649002075195, 25.261585235595703, -2.737934112548828, 14.323471069335938, 33.389007568359375, 0.8162841796875, 23.881683349609375, 20.690811157226562, 7.09239387512207, 3.3770675659179688, 40.96556091308594, 11.508148193359375, 24.23845672607422, 30.590213775634766, 32.627098083496094, 15.68447494506836, -0.08453369140625, 13.096826553344727, 31.701414108276367, -1.5570869445800781, 24.66594696044922, 11.714073181152344, 4.1743011474609375, 20.614158630371094, 3.41058349609375, -3.156404495239258, 10.807319641113281, 10.835281372070312, 11.573163986206055, -0.4058990478515625, 23.521881103515625, -14.50103759765625, 26.91645050048828, 1.8394851684570312, -11.747764587402344, 3.12713623046875, 6.324726104736328, 35.900360107421875, 13.200342178344727, 9.99677848815918, -0.129730224609375, 31.779342651367188, 5.9820098876953125, 11.323938369750977, 44.108306884765625, 14.378562927246094, 30.013275146484375, -5.147697448730469, -4.6343536376953125, 23.951873779296875, -6.552787780761719, 13.35565185546875, 7.7180023193359375, 12.732566833496094, 15.977962493896484, 12.780311584472656, 25.664581298828125, 13.399555206298828, 16.23568344116211, 49.661781311035156, 7.583553314208984, 11.811286926269531, 8.461395263671875, 12.826841354370117, 20.40538787841797, -2.0882797241210938, 13.549890518188477, 22.47937774658203, 33.225982666015625, 21.827362060546875, 12.777679443359375, 28.86048126220703, 47.153785705566406, 33.36502456665039, 14.618782043457031, 33.78839111328125, 35.99775695800781, 37.05145263671875, 19.403846740722656, 35.83758544921875, 34.71595764160156, 6.713962554931641, 17.105594635009766, 23.76453971862793, -37.317970275878906, 6.4810791015625, 21.44073486328125, 1.9414253234863281, 42.42791748046875, 10.539093017578125, -0.506805419921875, 15.003551483154297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000300.npy"}
{"epoch": 0.8823529411764706, "step": 301, "batch_size": 128, "mean": 17.905895233154297, "std": 18.23550033569336, "min": -39.1783561706543, "p10": -0.8134683609008783, "median": 15.838998794555664, "p90": 43.52018890380859, "max": 72.5899658203125, "pos_frac": 0.8828125, "sample": [1.3808975219726562, 6.220867156982422, 71.1900405883789, 21.32708168029785, 13.389461517333984, 23.300445556640625, 8.077018737792969, 11.796409606933594, 5.580644607543945, 52.49705505371094, 12.168739318847656, 20.389328002929688, 13.83575439453125, 44.45806884765625, 5.75262451171875, 21.73048973083496, -22.90386199951172, -1.632293701171875, 17.8062744140625, 35.55879592895508, 7.339324951171875, 46.51404571533203, 6.8188934326171875, 4.23236083984375, 40.34361267089844, -2.760700225830078, 22.732330322265625, 36.471588134765625, 14.1519775390625, 16.434730529785156, -39.1783561706543, 58.99024200439453, 17.49557876586914, 29.529518127441406, 7.024320602416992, 13.766372680664062, 53.58592224121094, 21.296875, 14.010032653808594, 8.448692321777344, 20.85700798034668, 20.379127502441406, 4.726491928100586, 15.816097259521484, 30.054519653320312, 13.890800476074219, 7.809326171875, 60.4514045715332, 2.851428985595703, 33.271514892578125, -0.1282329559326172, -11.768844604492188, 22.697418212890625, -4.944831848144531, 5.347076416015625, 37.58589172363281, 9.488502502441406, 23.337326049804688, 32.837249755859375, 11.967155456542969, 48.70708084106445, 8.043519973754883, 22.70034408569336, 22.2860107421875, 29.425743103027344, -1.435150146484375, -4.349027633666992, 10.668106079101562, 14.886075973510742, 27.60894775390625, 18.98485565185547, 27.114700317382812, 26.66656494140625, 18.569183349609375, 59.22736358642578, 19.150588989257812, 29.769729614257812, 10.30799674987793, -24.3055419921875, 0.8216476440429688, 43.11824035644531, -5.593128204345703, 1.8516998291015625, 25.388099670410156, 1.9841270446777344, 14.325366973876953, 12.032432556152344, -0.6228847503662109, 32.38298416137695, 14.803138732910156, 23.492347717285156, 28.495145797729492, 26.787464141845703, 20.07392120361328, 38.6907958984375, 18.41333770751953, -1.2581634521484375, 9.79315185546875, 17.954513549804688, 13.263031005859375, 7.344886779785156, 7.138755798339844, 53.75420379638672, 13.654363632202148, 10.567825317382812, 12.663284301757812, 13.010711669921875, 0.8822097778320312, -21.014312744140625, 0.5258102416992188, 6.748941421508789, 9.523849487304688, 23.18946075439453, 24.836143493652344, 50.71808624267578, 27.89849853515625, 23.503700256347656, 12.167388916015625, 72.5899658203125, 8.74658203125, -11.898246765136719, 15.861900329589844, 18.761306762695312, 25.48134994506836, 26.435958862304688, 44.71366882324219, 19.698745727539062, 18.527450561523438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000301.npy"}
{"epoch": 0.8852941176470588, "step": 302, "batch_size": 128, "mean": 17.32929229736328, "std": 16.607627868652344, "min": -18.70966339111328, "p10": -2.4453792572021475, "median": 16.72083282470703, "p90": 38.60922775268554, "max": 103.43478393554688, "pos_frac": 0.8515625, "sample": [-2.968475341796875, 16.116474151611328, 12.377883911132812, 6.345012664794922, 46.41937255859375, 21.141265869140625, 42.41710662841797, 37.32704162597656, 15.437591552734375, 42.652854919433594, 23.787551879882812, 23.008569717407227, 44.62274169921875, 18.413925170898438, 6.6701812744140625, 11.19489860534668, 31.88121795654297, -18.70966339111328, 18.624324798583984, 26.390457153320312, 44.53944396972656, -2.2211952209472656, 3.4334068298339844, 15.974342346191406, -4.695396423339844, 41.46324920654297, -1.52850341796875, 32.4146842956543, 29.266136169433594, -0.141693115234375, 2.7780990600585938, 7.454109191894531, 24.299285888671875, 44.54668426513672, 19.63812255859375, 17.94652557373047, -11.174713134765625, 32.51678466796875, 1.4996795654296875, 26.654739379882812, 6.176910400390625, 27.16973876953125, 19.94915008544922, 15.941596984863281, 6.372810363769531, 15.722770690917969, 14.642433166503906, -4.850101470947266, 40.527931213378906, 23.107934951782227, 0.8893966674804688, -6.075344085693359, 0.18100357055664062, 9.044792175292969, 24.46721649169922, 8.911041259765625, 33.654937744140625, 23.18268585205078, 42.77337646484375, 12.611572265625, 35.39045715332031, 19.434097290039062, 22.017967224121094, -7.277965545654297, 2.7365856170654297, 9.727798461914062, 3.5771751403808594, -2.0757904052734375, 29.42644500732422, 9.460418701171875, 23.190471649169922, 23.79574966430664, 4.371284484863281, 10.344614028930664, 15.345779418945312, 26.18206024169922, 22.58837890625, -0.1036834716796875, -1.6783905029296875, 15.653289794921875, 12.818267822265625, 25.938720703125, 21.728713989257812, 31.970474243164062, 14.741607666015625, -3.4454345703125, 29.580474853515625, 19.007179260253906, 31.728839874267578, 26.64167022705078, 5.799552917480469, 26.84868621826172, 42.39613342285156, 5.570075988769531, 36.42168426513672, 36.436485290527344, 1.3930435180664062, 6.608024597167969, -8.25197982788086, 25.04979705810547, -3.750072479248047, 17.563262939453125, 16.610946655273438, 17.72039794921875, -3.8832836151123047, -9.225837707519531, 29.774063110351562, 9.81817626953125, 15.332550048828125, 0.80474853515625, 0.0915679931640625, 17.45149040222168, 18.57703399658203, 17.88385009765625, 6.016315460205078, -7.144111633300781, 45.24845886230469, 46.334075927734375, 25.698177337646484, 103.43478393554688, 13.454547882080078, 11.718841552734375, 37.78692626953125, 20.074024200439453, 6.4434356689453125, 8.520606994628906, 25.659141540527344, 16.830718994140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000302.npy"}
{"epoch": 0.888235294117647, "step": 303, "batch_size": 128, "mean": 16.833295822143555, "std": 17.489248275756836, "min": -29.840084075927734, "p10": -1.7477663040161113, "median": 13.55246353149414, "p90": 40.53519592285156, "max": 94.08441162109375, "pos_frac": 0.8671875, "sample": [16.80621337890625, -0.9291763305664062, 23.17767333984375, 12.370214462280273, 13.122917175292969, 15.009159088134766, 27.868526458740234, 32.995391845703125, 21.987634658813477, 43.527679443359375, 44.77546691894531, 3.3946609497070312, 94.08441162109375, 3.3306121826171875, 2.2173328399658203, 14.598560333251953, 24.829051971435547, -14.542999267578125, 1.631744384765625, 6.3358306884765625, 50.15492248535156, 12.490968704223633, -4.975944519042969, 44.64689636230469, -3.1986923217773438, 9.054126739501953, 14.297111511230469, 11.486610412597656, 10.485504150390625, 19.012771606445312, -3.005422592163086, 23.352108001708984, 46.61148452758789, 9.851949691772461, 16.371597290039062, 9.826080322265625, 8.407039642333984, 14.867401123046875, 31.016677856445312, 36.997474670410156, 0.11502838134765625, 12.961334228515625, 22.42449951171875, 9.882621765136719, -29.840084075927734, 0.1411590576171875, -15.085487365722656, -4.160308837890625, 28.665557861328125, 56.091552734375, 6.161367416381836, 2.799579620361328, 2.4338531494140625, 3.1770172119140625, 1.4871845245361328, 13.18935775756836, 12.21054458618164, -3.7168922424316406, 17.89586639404297, 27.669715881347656, 15.380966186523438, 13.51959228515625, 5.6539306640625, 30.423187255859375, -3.0772132873535156, -9.732452392578125, 42.21619415283203, -5.527986526489258, 4.9177398681640625, -1.208770751953125, 7.565889358520508, -7.462646484375, 23.50564193725586, 4.163183212280273, 22.391517639160156, 30.909896850585938, 16.050201416015625, 13.32568359375, 51.95164489746094, 10.388137817382812, 2.9893035888671875, 11.461807250976562, -3.1200408935546875, 1.204416275024414, 7.208610534667969, 37.31769943237305, 13.210132598876953, 9.217941284179688, 32.154441833496094, 11.474052429199219, 20.869728088378906, 21.150177001953125, 12.005241394042969, 30.53643798828125, 10.010154724121094, 38.2332763671875, 24.18096923828125, 31.152835845947266, 14.598472595214844, 76.12107849121094, 21.613983154296875, -0.3831939697265625, 11.444107055664062, 18.89593505859375, 17.773910522460938, 45.35903549194336, 29.43780517578125, 16.203903198242188, 5.691083908081055, 13.965728759765625, 26.367706298828125, 7.870643615722656, 13.743274688720703, 24.690143585205078, 41.37176513671875, 41.53041458129883, 24.89214324951172, 10.040420532226562, 22.02326202392578, 40.176666259765625, 9.675235748291016, 22.98932647705078, 13.585334777832031, 35.96549987792969, 34.44554901123047, -0.31125640869140625, 20.03372573852539, 9.393674850463867], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000303.npy"}
{"epoch": 0.8911764705882353, "step": 304, "batch_size": 128, "mean": 17.535350799560547, "std": 15.632482528686523, "min": -9.655593872070312, "p10": -0.4250823974609369, "median": 15.626581192016602, "p90": 36.30956115722656, "max": 67.5570297241211, "pos_frac": 0.8828125, "sample": [46.34071731567383, 28.364013671875, 29.174842834472656, 10.814868927001953, 25.230876922607422, 9.89593505859375, 17.129783630371094, 31.684295654296875, 12.7095947265625, 28.433208465576172, 21.818851470947266, -1.672780990600586, 5.713409423828125, 4.585222244262695, 33.12960433959961, 16.06159210205078, 44.46630096435547, 9.075929641723633, 28.140907287597656, 2.0407180786132812, 30.674850463867188, -7.6146240234375, 33.41838836669922, 20.600296020507812, 10.068145751953125, 32.251502990722656, 16.98583984375, 31.437576293945312, 19.076156616210938, 15.628070831298828, -6.107463836669922, -2.0383987426757812, 10.232002258300781, 8.722946166992188, -9.655593872070312, 21.36363410949707, 15.617897033691406, 7.932044982910156, 5.721347808837891, 67.5570297241211, 5.72010612487793, 21.147846221923828, -6.6911468505859375, 16.053937911987305, 3.8630294799804688, 13.934089660644531, 36.15946960449219, 12.696697235107422, -0.7930374145507812, 38.28580856323242, -4.192626953125, 9.537933349609375, 7.537147521972656, 14.999176025390625, 12.812240600585938, 24.705184936523438, 29.719886779785156, 63.18743896484375, 0.5592803955078125, 11.862991333007812, 15.016714096069336, -6.285329818725586, 43.2281494140625, 26.585996627807617, 64.3645248413086, 20.847412109375, 11.065845489501953, 7.397132873535156, 21.8538818359375, 45.94041442871094, 18.000038146972656, 17.763957977294922, -0.1257476806640625, 12.312698364257812, 3.9743518829345703, 23.54241943359375, 50.75439453125, 10.723030090332031, 16.629287719726562, 13.719566345214844, 32.51728820800781, 5.479764938354492, 16.034652709960938, 26.498947143554688, 1.9085235595703125, -9.13909912109375, 4.331783294677734, -7.758506774902344, 17.952285766601562, 24.09339714050293, 24.638336181640625, -8.188335418701172, 25.54615020751953, 23.860549926757812, -0.26738739013671875, 26.844316482543945, 22.293418884277344, 22.6416015625, 14.008041381835938, 4.848405838012695, 17.224756240844727, 24.054534912109375, -6.048250198364258, 13.254688262939453, 3.7598228454589844, 4.4959869384765625, 1.3053436279296875, 14.354545593261719, 26.892051696777344, 2.9445877075195312, 15.625091552734375, 5.961311340332031, 38.44414520263672, 12.942657470703125, 59.154541015625, 12.015289306640625, 6.3309173583984375, 11.098030090332031, 5.14225959777832, 21.5843505859375, 4.600683212280273, 25.568084716796875, 28.983726501464844, 36.65977478027344, 29.80405044555664, 33.672454833984375, 28.838367462158203, 44.29314422607422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000304.npy"}
{"epoch": 0.8941176470588236, "step": 305, "batch_size": 128, "mean": 17.51129150390625, "std": 17.125381469726562, "min": -29.313465118408203, "p10": -1.45289134979248, "median": 16.329242706298828, "p90": 36.85822448730468, "max": 77.51349639892578, "pos_frac": 0.8828125, "sample": [18.829952239990234, 18.25910186767578, 10.440460205078125, 27.490768432617188, 6.205638885498047, 32.596046447753906, 12.085113525390625, 13.961849212646484, 10.138870239257812, 10.065299987792969, 42.90166473388672, 16.283790588378906, 0.6393051147460938, 27.24884033203125, 19.079879760742188, 20.969696044921875, 35.183815002441406, 18.993865966796875, 59.559730529785156, 18.56298828125, -8.984054565429688, 10.835384368896484, 28.090295791625977, 37.5177001953125, -9.74981689453125, 14.590570449829102, 32.46772766113281, 12.223800659179688, 3.1899566650390625, -7.987602233886719, 7.24969482421875, -0.8108367919921875, 13.81739616394043, -4.04833984375, 30.12067413330078, 29.556747436523438, 1.2712249755859375, 25.819726943969727, 25.711830139160156, -1.32427978515625, -2.7560501098632812, 19.082443237304688, 29.774948120117188, 4.916450500488281, -8.506732940673828, 52.69272232055664, 21.462112426757812, 13.851303100585938, 4.024621963500977, 6.4747314453125, 23.62824249267578, 77.51349639892578, 13.971092224121094, 25.43621826171875, 34.45233917236328, 8.492122650146484, 10.343330383300781, 12.643486022949219, 17.434226989746094, 63.518829345703125, -9.425365447998047, 5.158855438232422, 16.091041564941406, 1.891183853149414, 39.931549072265625, 20.60882568359375, 15.725875854492188, 29.0483455657959, 2.9966964721679688, 17.80303955078125, 9.654815673828125, 9.35162353515625, 15.24542236328125, 13.543699264526367, -16.91683578491211, 14.636505126953125, 4.470493316650391, 18.64760398864746, 0.5465431213378906, 25.735702514648438, 0.3936805725097656, 32.561439514160156, 21.300865173339844, 23.420127868652344, 8.064464569091797, 30.06439971923828, 12.04974365234375, 28.954692840576172, -4.374002456665039, 29.039234161376953, 0.99517822265625, 19.04779052734375, 19.33123016357422, 2.430604934692383, 32.4603157043457, 31.60940933227539, 20.36650848388672, 17.626190185546875, -1.7529850006103516, 24.486618041992188, 0.08013916015625, 32.75920104980469, 47.297149658203125, 25.807409286499023, 16.37469482421875, 7.3359832763671875, -1.8263072967529297, 5.3058319091796875, 60.87353515625, 64.37728881835938, 44.062103271484375, 9.064071655273438, 20.206945419311523, 23.943588256835938, -14.981813430786133, 10.197639465332031, 41.48088073730469, 7.7619781494140625, 15.150348663330078, 18.595985412597656, 28.96286392211914, 38.43936538696289, 5.1671295166015625, 36.575592041015625, -29.313465118408203, 0.8763275146484375, 24.54864501953125, 31.996826171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000305.npy"}
{"epoch": 0.8970588235294118, "step": 306, "batch_size": 128, "mean": 19.265880584716797, "std": 17.903738021850586, "min": -19.204994201660156, "p10": -0.5127729415893554, "median": 17.025508880615234, "p90": 43.20278816223144, "max": 76.77458190917969, "pos_frac": 0.875, "sample": [14.316864013671875, 36.652069091796875, 22.111045837402344, 28.749923706054688, 43.8736686706543, 16.469032287597656, 0.8337841033935547, 30.93255615234375, 2.072359085083008, -19.204994201660156, 17.471038818359375, 50.85363006591797, 5.655853271484375, 67.967041015625, 23.615264892578125, -4.897296905517578, 24.850906372070312, 8.985652923583984, -4.32267951965332, 20.52257537841797, -0.4811992645263672, 10.05026626586914, -2.849376678466797, 19.132755279541016, 24.43933868408203, 2.156707763671875, 23.1544246673584, 3.0244007110595703, 38.04912567138672, 32.05841064453125, -0.5864448547363281, -0.0998687744140625, 24.21734619140625, 1.1598892211914062, 3.864229202270508, 51.07270812988281, -15.336219787597656, 2.289501190185547, -3.599018096923828, 2.962688446044922, 25.49696159362793, 14.267539978027344, 16.989547729492188, 62.23324203491211, 9.3944091796875, 19.64412498474121, 13.53558349609375, 15.919937133789062, 37.56245422363281, 42.91526794433594, 12.15584945678711, 36.95343780517578, 17.631439208984375, 40.456993103027344, 19.48345375061035, 10.282180786132812, 37.12493133544922, 11.749053955078125, 0.307281494140625, 23.9267578125, 50.62480163574219, 0.5524082183837891, 25.424362182617188, 3.942535400390625, 37.728240966796875, 27.090709686279297, 27.216537475585938, 8.07440185546875, 16.38958740234375, 25.437946319580078, 30.913406372070312, 19.2347412109375, 50.00245666503906, 16.15140151977539, 20.666095733642578, 37.38988494873047, 11.67047119140625, 8.688383102416992, 13.80643081665039, 17.06147003173828, 2.2110443115234375, 18.335464477539062, 23.9786376953125, 11.546344757080078, 24.183462142944336, 6.464527130126953, 38.09181213378906, -0.3270301818847656, 53.20796203613281, 1.4267082214355469, -4.200340270996094, 33.783729553222656, 76.77458190917969, 1.3466796875, 15.416374206542969, 7.251747131347656, -5.903717041015625, -10.052526473999023, 14.00054931640625, 26.045928955078125, 25.32373046875, 22.711456298828125, 11.140121459960938, 18.399871826171875, 46.66139221191406, 10.95263671875, 8.29543685913086, 7.094146728515625, 8.544029235839844, 28.807464599609375, 34.47032928466797, -4.421142578125, 8.704368591308594, 25.09971809387207, -8.457122802734375, 34.91630554199219, 51.07127380371094, 13.709878921508789, 55.938323974609375, 14.060958862304688, 36.247283935546875, 31.037765502929688, 3.9079742431640625, 25.376434326171875, 61.69708251953125, 29.515243530273438, 13.96328353881836, -0.6023406982421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000306.npy"}
{"epoch": 0.9, "step": 307, "batch_size": 128, "mean": 17.426212310791016, "std": 16.444828033447266, "min": -17.305641174316406, "p10": 0.7029994964599611, "median": 14.678707122802734, "p90": 40.360504913330075, "max": 83.33472442626953, "pos_frac": 0.90625, "sample": [14.1171875, 14.90692138671875, 15.78281021118164, 11.343055725097656, 22.544876098632812, 42.098411560058594, 25.906024932861328, 28.90178680419922, 13.782806396484375, 25.938140869140625, 11.77685546875, 2.781522750854492, 21.529582977294922, 23.246227264404297, 47.58623504638672, 40.054405212402344, -13.053726196289062, 6.978675842285156, 7.876373291015625, 53.50721740722656, 11.659341812133789, 8.464302062988281, 36.125152587890625, 0.7414207458496094, 1.2077770233154297, -0.396728515625, 28.553573608398438, 15.601837158203125, 3.576019287109375, 11.839223861694336, 32.25315856933594, 9.122940063476562, 4.282081604003906, 19.277259826660156, 9.207794189453125, -9.93532943725586, 45.85212707519531, 9.903831481933594, 53.34419250488281, 20.36038589477539, 30.640289306640625, 23.58868408203125, 42.709861755371094, 56.98888397216797, 18.211318969726562, 14.844795227050781, 8.507377624511719, 17.075820922851562, 13.948196411132812, 36.91700744628906, 23.52230453491211, 15.206085205078125, -7.889556884765625, 33.60322570800781, 5.9854736328125, 83.33472442626953, 20.233505249023438, 13.07659912109375, 2.9893798828125, 19.678245544433594, 3.96588134765625, 24.901016235351562, 28.173812866210938, -5.221122741699219, 9.392322540283203, 22.743362426757812, 10.826370239257812, 4.714996337890625, 17.55206298828125, 4.607635498046875, 9.344940185546875, 18.29326629638672, 12.992034912109375, 41.074737548828125, 10.897003173828125, 11.612585067749023, 36.91748046875, 12.81793212890625, 0.6133499145507812, 11.134443283081055, 23.61408233642578, 18.557830810546875, 14.368759155273438, 6.431146621704102, 18.72620391845703, 1.8580646514892578, 16.602962493896484, 31.23595428466797, 4.4009552001953125, 2.3755035400390625, 11.093507766723633, 3.8765697479248047, 19.727371215820312, 32.355186462402344, 44.841339111328125, 32.018402099609375, 4.477733612060547, 3.5769519805908203, 25.55973243713379, -12.801582336425781, 13.503509521484375, 33.15557861328125, 21.681121826171875, -17.305641174316406, 6.0876617431640625, 8.254470825195312, 9.5408935546875, 14.512619018554688, -0.015995025634765625, 5.5002593994140625, -3.1852798461914062, 17.079833984375, 14.488929748535156, -0.81817626953125, 33.7838134765625, 21.285995483398438, 22.043060302734375, 50.60142517089844, -12.333805084228516, 15.620086669921875, 15.853805541992188, 5.740379333496094, 8.69873046875, -5.258262634277344, 62.67340087890625, 15.36236572265625, 33.381103515625, 48.554481506347656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000307.npy"}
{"epoch": 0.9029411764705882, "step": 308, "batch_size": 128, "mean": 16.335128784179688, "std": 16.75162696838379, "min": -27.618438720703125, "p10": -2.9213928222656227, "median": 14.75571060180664, "p90": 37.63945350646972, "max": 78.44221496582031, "pos_frac": 0.8671875, "sample": [2.7221298217773438, 35.01136779785156, 23.66424560546875, 6.2305908203125, 5.680217742919922, 1.3541183471679688, 3.1307220458984375, 5.3998870849609375, 11.038108825683594, 36.79575729370117, -2.2474937438964844, 4.532234191894531, 7.8917388916015625, -4.3892822265625, 33.11762237548828, 4.28948974609375, 42.231597900390625, 12.38568115234375, 23.66944122314453, 30.745925903320312, 41.57732009887695, 21.44443130493164, 3.7764320373535156, 17.819520950317383, 18.218780517578125, 40.69204330444336, 20.234642028808594, 3.155550003051758, 23.844879150390625, -7.3404693603515625, 7.726280212402344, 6.243335723876953, 1.198455810546875, 4.709197998046875, 19.84393310546875, 29.500030517578125, 13.356452941894531, 23.734085083007812, 36.219635009765625, 9.035064697265625, 8.565670013427734, 47.405242919921875, 7.9361114501953125, 19.207183837890625, 28.324905395507812, 15.146183013916016, 11.436651229858398, 0.485076904296875, -4.4656524658203125, 26.01123809814453, 13.850296020507812, -0.3951911926269531, 11.26272201538086, 14.952598571777344, 16.464630126953125, 41.861785888671875, 10.892807006835938, 28.518951416015625, 23.34654998779297, 40.97370910644531, -8.911163330078125, 24.09613037109375, 34.20780944824219, 10.207099914550781, 25.04979705810547, 3.5472793579101562, 24.846954345703125, 23.917442321777344, 8.708953857421875, 20.886611938476562, 8.50518798828125, -2.29229736328125, 9.135848999023438, 14.956008911132812, 11.474185943603516, 17.434659957885742, 69.27848052978516, 34.60810089111328, -1.97454833984375, 23.222686767578125, -17.508466720581055, 7.952167510986328, 2.9790267944335938, -7.443000793457031, 2.4523983001708984, 22.48710060119629, 23.082670211791992, 20.804931640625, 24.73390769958496, 10.509605407714844, -13.3927001953125, -6.943092346191406, 35.46718978881836, -8.426422119140625, 7.4201202392578125, 16.864883422851562, 29.226165771484375, 34.33124542236328, 27.416736602783203, 24.438167572021484, 18.718521118164062, 24.51708221435547, 40.810279846191406, 9.985818862915039, -14.911209106445312, -27.618438720703125, 7.3918304443359375, 14.479339599609375, 3.7098236083984375, 23.405242919921875, 78.44221496582031, 8.613903045654297, 11.684249877929688, 21.942481994628906, 8.232345581054688, 27.985122680664062, -4.423576354980469, 54.9290771484375, 16.382598876953125, 8.557807922363281, 39.60807800292969, 26.57761001586914, 15.356735229492188, 14.558822631835938, 45.13311767578125, 4.1031646728515625, 51.55164337158203, -6.210273742675781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000308.npy"}
{"epoch": 0.9058823529411765, "step": 309, "batch_size": 128, "mean": 15.197957992553711, "std": 16.688989639282227, "min": -17.334381103515625, "p10": -4.8457544326782225, "median": 14.50747299194336, "p90": 33.71338882446289, "max": 90.07107543945312, "pos_frac": 0.8125, "sample": [10.453941345214844, 21.89448356628418, -4.159423828125, 13.803129196166992, -8.901411056518555, 8.05801010131836, 36.90455627441406, 9.66293716430664, 3.747018814086914, 5.800758361816406, 5.389503479003906, 79.42755889892578, 12.110191345214844, 23.680152893066406, 17.042896270751953, 21.732025146484375, 25.311073303222656, -3.71380615234375, 37.980072021484375, 48.221282958984375, 32.00914001464844, 17.004310607910156, -0.4815826416015625, 25.221759796142578, 10.175228118896484, 6.9612274169921875, -0.3717193603515625, -3.456989288330078, 5.73541259765625, 10.738594055175781, 6.899604797363281, -1.5230960845947266, 18.112289428710938, 6.920377731323242, 6.771018981933594, 30.18377685546875, -13.6263427734375, 25.894588470458984, 16.79683494567871, 14.482540130615234, -9.02855110168457, 19.825218200683594, 32.73252868652344, 2.6348114013671875, 18.377418518066406, 23.253753662109375, 8.159748077392578, 19.132871627807617, 14.532405853271484, -11.150074005126953, 22.24951934814453, 33.501373291015625, 36.45452880859375, 28.50238800048828, 12.750621795654297, -4.1982269287109375, 13.597831726074219, 14.817695617675781, -4.807867050170898, 9.5106201171875, 29.223678588867188, 22.897140502929688, 13.796722412109375, 19.440690994262695, -12.95758056640625, 30.874067306518555, 4.0748748779296875, 15.132514953613281, -5.477020263671875, 12.54827880859375, 10.441352844238281, 28.72845458984375, -4.9341583251953125, 37.603050231933594, 3.6255264282226562, -4.761810302734375, -12.658222198486328, 5.62232780456543, 14.689804077148438, 20.6834716796875, 21.808284759521484, 28.870513916015625, 47.383392333984375, -10.479347229003906, -17.334381103515625, 17.245635986328125, -2.3298683166503906, 24.159435272216797, 20.767318725585938, 27.684158325195312, 21.49037742614746, 4.20454216003418, 38.61316680908203, 29.766845703125, 42.083091735839844, 26.474365234375, 90.07107543945312, 20.86749267578125, 25.36078643798828, -7.407905578613281, 34.208091735839844, 7.856048583984375, 30.278560638427734, 3.1373748779296875, -9.12884521484375, 3.2651901245117188, 14.875694274902344, 4.3874359130859375, 21.75231170654297, 17.637163162231445, 3.751312255859375, -1.005361557006836, 17.463455200195312, 7.787681579589844, 24.230926513671875, 8.589675903320312, 28.37109375, 3.2660064697265625, 12.146484375, 19.906513214111328, 4.6902923583984375, 11.866058349609375, 38.02496337890625, -6.7199554443359375, 11.862380981445312, 36.20734405517578, 29.449691772460938, 29.548316955566406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000309.npy"}
{"epoch": 0.9088235294117647, "step": 310, "batch_size": 128, "mean": 17.271244049072266, "std": 18.77022933959961, "min": -15.084667205810547, "p10": -2.1201622009277337, "median": 14.098132133483887, "p90": 47.82887611389159, "max": 77.35031127929688, "pos_frac": 0.859375, "sample": [-0.7396240234375, -2.5842742919921875, 15.675872802734375, 24.700050354003906, 20.349899291992188, 2.8779754638671875, 9.1556396484375, 13.107364654541016, 70.8992919921875, 1.6198482513427734, 17.098281860351562, 26.290664672851562, -14.281135559082031, -6.422630310058594, 29.346145629882812, 50.88731002807617, 13.51888656616211, -4.343231201171875, 18.96599578857422, 8.298416137695312, 11.609676361083984, -9.356285095214844, 1.7157955169677734, 24.484094619750977, 19.79400634765625, 1.0590972900390625, 1.3528690338134766, 2.237590789794922, 6.9958953857421875, 60.7396240234375, 1.4546737670898438, 14.728754043579102, 52.028038024902344, 14.677377700805664, -4.72393798828125, 16.49066162109375, 16.720001220703125, 57.449371337890625, 11.316253662109375, 15.559947967529297, 4.190944671630859, -15.084667205810547, -1.7463912963867188, -11.518218994140625, 7.219249725341797, 21.610305786132812, -5.974681854248047, 20.78075408935547, 13.408336639404297, 70.77885437011719, 19.86505126953125, 35.129783630371094, 29.52240753173828, 5.67991828918457, -11.560150146484375, 53.75483703613281, 25.518756866455078, 2.1075363159179688, 34.134490966796875, 15.353111267089844, 10.699020385742188, 11.288993835449219, -1.6957931518554688, 20.31409454345703, 17.003433227539062, 5.315393447875977, 50.669456481933594, 16.843528747558594, 9.48745346069336, 51.340980529785156, 32.62456512451172, 13.097349166870117, 30.545562744140625, 17.996292114257812, 31.389568328857422, 22.32037353515625, 30.219993591308594, 20.617469787597656, 9.711462020874023, 9.338241577148438, 14.709159851074219, 73.7728271484375, 6.686981201171875, 11.92425537109375, 33.12693786621094, 41.810768127441406, 77.35031127929688, 28.308578491210938, 32.76056671142578, 19.947986602783203, 7.900611877441406, 17.193763732910156, 50.96131896972656, 8.232063293457031, 18.038558959960938, 34.63806915283203, 7.8658599853515625, 17.246192932128906, -1.9212570190429688, 7.6512603759765625, 3.2952194213867188, 46.61148452758789, 28.246124267578125, 11.168106079101562, 1.6809024810791016, -2.61865234375, 16.633575439453125, -4.695644378662109, 23.90392303466797, 6.8416595458984375, 8.944438934326172, 17.021102905273438, 10.444524765014648, 27.467166900634766, -3.5011444091796875, 8.471328735351562, 59.35386657714844, 17.021194458007812, 28.663726806640625, 23.596145629882812, 2.6029434204101562, -1.2077484130859375, 9.43853759765625, 9.322254180908203, 0.1479320526123047, 1.5929794311523438, 7.48126220703125, 11.535331726074219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000310.npy"}
{"epoch": 0.9117647058823529, "step": 311, "batch_size": 128, "mean": 18.328840255737305, "std": 16.916139602661133, "min": -31.29498291015625, "p10": -2.407643127441406, "median": 18.357735633850098, "p90": 39.05059967041015, "max": 67.21821594238281, "pos_frac": 0.875, "sample": [24.84429931640625, 3.4995346069335938, -31.29498291015625, 16.315040588378906, 23.15082550048828, 29.49853515625, 21.458545684814453, 27.49884033203125, -10.451288223266602, 8.952983856201172, 20.88408660888672, 25.99859619140625, 67.21821594238281, 7.650787353515625, 41.927734375, 20.259021759033203, 16.46258544921875, -5.251289367675781, 30.059059143066406, 37.7615966796875, -0.886688232421875, 6.0306854248046875, 9.186275482177734, 2.469818115234375, 2.2106285095214844, 33.50018310546875, -9.239143371582031, 2.306600570678711, 10.147918701171875, 31.05504608154297, 20.476539611816406, 4.320030212402344, 3.1006393432617188, 28.171401977539062, 21.211044311523438, 12.067546844482422, 7.403419494628906, 38.2193603515625, 3.5815200805664062, 12.966329574584961, 55.981285095214844, 46.405029296875, 40.99015808105469, -2.5872039794921875, 32.71601867675781, 21.518630981445312, 5.9666595458984375, 25.862194061279297, -7.880805969238281, 28.379730224609375, 6.270133972167969, 31.570724487304688, 9.320077896118164, 37.43400955200195, 28.11370849609375, 20.012176513671875, 19.222557067871094, 31.918975830078125, 9.63912582397461, 27.43682861328125, 43.934783935546875, 22.88286590576172, 14.581352233886719, -3.2999916076660156, 23.621131896972656, 30.59088897705078, 8.57931137084961, 19.08732032775879, 35.243247985839844, 24.474937438964844, 1.288360595703125, 18.998727798461914, 15.287342071533203, 14.882759094238281, 31.130218505859375, -8.9652099609375, 23.936595916748047, -2.3306884765625, -8.948516845703125, 46.07318115234375, 50.41334533691406, -3.4242496490478516, 16.726184844970703, 50.03363037109375, -6.803535461425781, 17.24289321899414, 30.158721923828125, 13.349052429199219, 11.86614990234375, 3.5367660522460938, 44.88676452636719, 21.7540283203125, 53.77985382080078, 45.470176696777344, 7.7403564453125, 20.848121643066406, 63.65501403808594, 28.37548828125, 13.18450927734375, 31.9530086517334, 9.66639518737793, 10.359184265136719, 6.5390472412109375, 35.69696807861328, 1.1186103820800781, 0.07726287841796875, 12.81118392944336, 23.90618896484375, 27.18332290649414, 7.38768196105957, 29.140838623046875, 13.10076904296875, 35.412872314453125, 1.5396575927734375, 2.136566162109375, 7.559226989746094, 28.97943878173828, 5.803886413574219, 6.801727294921875, -9.625236511230469, 21.34075927734375, -2.9683990478515625, -1.8676834106445312, 30.767379760742188, 36.98371124267578, 17.71674346923828, 10.251483917236328, 29.44519805908203], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000311.npy"}
{"epoch": 0.9147058823529411, "step": 312, "batch_size": 128, "mean": 18.518695831298828, "std": 18.69999122619629, "min": -29.45697021484375, "p10": -3.5647636413574215, "median": 18.84124755859375, "p90": 42.56982116699218, "max": 79.71766662597656, "pos_frac": 0.8515625, "sample": [19.36922836303711, 1.59130859375, 0.7490291595458984, 48.462074279785156, 26.01128387451172, -9.25750732421875, 12.263534545898438, 56.513092041015625, 41.57182312011719, -29.45697021484375, 3.8193740844726562, 12.785873413085938, -0.5628623962402344, 4.95013427734375, 22.883522033691406, 14.187862396240234, 3.6603050231933594, 10.917280197143555, -12.896360397338867, 3.8853282928466797, 32.56871032714844, 24.030853271484375, 3.6428260803222656, 7.296836853027344, 20.47057342529297, -19.673080444335938, 79.71766662597656, 54.841705322265625, 11.588638305664062, 3.6097946166992188, -0.23915481567382812, 31.01740074157715, 39.23744201660156, 5.6723785400390625, 44.89848327636719, 5.715812683105469, 23.573036193847656, -8.63623046875, 32.77447509765625, -3.4315109252929688, -20.853240966796875, 11.671361923217773, 49.8948974609375, 13.071765899658203, 6.551456451416016, 22.871246337890625, 17.324325561523438, 1.3374862670898438, 25.401386260986328, 25.302406311035156, 39.998146057128906, 55.126373291015625, 10.431060791015625, 7.44154167175293, 35.94969177246094, 21.427490234375, -10.419876098632812, 11.800159454345703, 18.6182861328125, 20.40436553955078, 48.84662628173828, 9.436859130859375, 6.81787109375, -3.8756866455078125, 20.403749465942383, 5.526786804199219, 14.64996337890625, 24.619110107421875, -2.8660964965820312, 22.025306701660156, 19.064208984375, 29.93975830078125, 39.082481384277344, 19.320541381835938, 32.84266662597656, 10.950538635253906, 14.718338012695312, 31.12004852294922, 21.282432556152344, 10.149663925170898, -0.934326171875, 19.314807891845703, 5.7742767333984375, 37.65320587158203, 33.10424041748047, 7.52928352355957, 33.06812286376953, 10.358695983886719, 22.2786865234375, 20.24842071533203, 6.8220367431640625, -4.056884765625, 1.7879867553710938, 45.60798645019531, -9.062427520751953, 5.93994140625, 18.061981201171875, 22.331878662109375, 29.921897888183594, 4.809967041015625, 31.545135498046875, 34.0435791015625, 40.04148864746094, 29.97142791748047, 40.85974884033203, 5.927742004394531, 52.62310791015625, 57.536407470703125, -9.729904174804688, 7.307212829589844, 21.90192985534668, 31.210792541503906, 8.428024291992188, 11.881271362304688, 22.213783264160156, -5.634426116943359, 30.17633056640625, 39.32826232910156, 49.98020935058594, 33.39100646972656, -5.824882507324219, 9.991874694824219, -1.3259048461914062, 26.15570068359375, 26.248397827148438, 21.521575927734375, 38.380279541015625, 54.1536865234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000312.npy"}
{"epoch": 0.9176470588235294, "step": 313, "batch_size": 128, "mean": 16.741737365722656, "std": 17.54796028137207, "min": -17.493316650390625, "p10": -2.781155776977539, "median": 14.150964736938477, "p90": 39.416600036621084, "max": 70.21641540527344, "pos_frac": 0.8515625, "sample": [1.4430503845214844, 18.634681701660156, -4.970954895019531, 21.91580581665039, 31.583547592163086, 25.869930267333984, 18.551910400390625, 43.30940246582031, 6.201692581176758, 12.525550842285156, 50.237998962402344, 0.6083984375, 34.82513427734375, 17.21329116821289, 1.4032936096191406, 2.2733402252197266, 50.46424102783203, 19.401782989501953, 37.03710174560547, -0.08025360107421875, -14.807701110839844, 33.70716094970703, 18.92041778564453, 4.81468391418457, 19.668289184570312, 18.341106414794922, -10.890182495117188, 0.1810455322265625, 10.445060729980469, 14.241546630859375, 14.81904411315918, 29.428314208984375, 14.772109985351562, 27.076339721679688, 10.765926361083984, 0.9435348510742188, 12.344779968261719, -2.7610015869140625, 48.60993957519531, 37.34574890136719, 5.8504486083984375, 37.408935546875, 4.07830810546875, 3.134014129638672, 25.867721557617188, 8.73122787475586, 37.597259521484375, 28.954208374023438, -12.292289733886719, -6.354927062988281, 33.97064208984375, 11.258567810058594, 22.462936401367188, 11.641647338867188, 2.2645225524902344, 36.11766052246094, 5.6641082763671875, 9.964988708496094, 36.86605453491211, -17.493316650390625, 45.899200439453125, 8.799942016601562, 59.143402099609375, 21.74842071533203, 11.716712951660156, 19.004981994628906, 9.406539916992188, 1.0073928833007812, -0.9462223052978516, 15.926933288574219, 70.21641540527344, 3.5984878540039062, 19.712356567382812, 5.105119705200195, 20.14410400390625, 0.39969444274902344, 28.191925048828125, -3.55169677734375, 13.35440444946289, 5.462192535400391, 9.115110397338867, 2.757204055786133, 38.633087158203125, 35.65069580078125, 12.837646484375, 26.49655532836914, 14.060382843017578, 10.172569274902344, 17.553804397583008, 5.3068084716796875, -5.455841064453125, 15.583396911621094, 35.54467010498047, 33.7474365234375, 10.303787231445312, -5.401424407958984, 28.7567138671875, -17.34557342529297, 16.744140625, 25.60650634765625, 38.18457794189453, 28.845306396484375, -6.1137542724609375, 6.166461944580078, -6.699687957763672, 11.604454040527344, 26.31695556640625, 14.481491088867188, 9.348320007324219, 19.71686553955078, 1.6215705871582031, 46.79529571533203, 41.24479675292969, 2.0568466186523438, 61.459991455078125, 51.657684326171875, 43.40180969238281, 14.307121276855469, 12.806793212890625, 27.351417541503906, -2.8281822204589844, 52.60950469970703, -1.8846435546875, -0.8044013977050781, 3.0113372802734375, 8.319400787353516, 15.486373901367188, -2.637025833129883], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000313.npy"}
{"epoch": 0.9205882352941176, "step": 314, "batch_size": 128, "mean": 18.45773696899414, "std": 19.563859939575195, "min": -31.60845184326172, "p10": -2.419208335876464, "median": 16.425122261047363, "p90": 43.63862762451171, "max": 75.02793884277344, "pos_frac": 0.8359375, "sample": [18.248138427734375, 32.58271789550781, 43.18255615234375, 28.45867156982422, 8.089805603027344, -1.6346588134765625, 31.861488342285156, 0.4801177978515625, 6.71623420715332, 32.845672607421875, 14.944595336914062, 44.865234375, -31.60845184326172, 20.604888916015625, 32.59961700439453, 0.9407501220703125, 13.858688354492188, 70.92645263671875, 39.24261474609375, 12.145156860351562, 26.01568603515625, -0.6091156005859375, 44.665374755859375, -3.157440185546875, 8.914810180664062, 14.321029663085938, 32.51654052734375, 20.32836151123047, 25.18958854675293, -21.418975830078125, 0.5509986877441406, 23.9609432220459, 32.184661865234375, 10.08807373046875, 39.709632873535156, -21.61827850341797, 16.907630920410156, 20.79450798034668, -2.2339839935302734, 34.647369384765625, 75.02793884277344, -13.384429931640625, 34.808082580566406, 52.803955078125, 2.260814666748047, 0.1401042938232422, 9.149166107177734, 5.332782745361328, 3.5177230834960938, 7.897548675537109, 65.01409912109375, -18.543601989746094, 17.138565063476562, -5.4056549072265625, 30.45745849609375, 28.077491760253906, 5.753326416015625, 14.024818420410156, 8.212181091308594, 31.61638641357422, 38.21217346191406, -5.748268127441406, 20.5111083984375, 15.13473892211914, 5.793039321899414, 0.29552268981933594, 28.33121681213379, 31.984573364257812, 27.935897827148438, 38.070648193359375, 10.92922592163086, 11.433769226074219, 9.962991714477539, -0.682098388671875, 73.851806640625, 24.558197021484375, 16.270980834960938, 14.172714233398438, 23.599088668823242, 21.576492309570312, 29.83099365234375, 29.70983123779297, 31.8623046875, 28.74786949157715, 20.277332305908203, 13.64849853515625, 18.48602867126465, -2.851398468017578, -1.0598602294921875, -7.833587646484375, 12.85919189453125, 5.614967346191406, 56.438018798828125, 17.649620056152344, 19.832427978515625, 19.551313400268555, 7.459327697753906, -10.903831481933594, 11.526412963867188, 8.519905090332031, 3.6573410034179688, 55.811920166015625, 21.356063842773438, 27.937408447265625, 10.109233856201172, 4.254005432128906, 50.93110656738281, 28.013931274414062, 9.581748962402344, 15.072710037231445, 57.24433135986328, 3.5405120849609375, 16.57926368713379, -0.5091934204101562, 11.999734878540039, -2.1081695556640625, 54.105010986328125, 16.129676818847656, 45.55462646484375, -3.392528533935547, 33.2196044921875, -3.771045684814453, 24.735687255859375, 23.056446075439453, 43.19859313964844, 1.6847915649414062, 25.839012145996094, -1.8070526123046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000314.npy"}
{"epoch": 0.9235294117647059, "step": 315, "batch_size": 128, "mean": 14.94253921508789, "std": 15.344103813171387, "min": -21.2882080078125, "p10": -2.733898162841796, "median": 12.246363639831543, "p90": 33.147928619384764, "max": 74.99162292480469, "pos_frac": 0.859375, "sample": [29.844833374023438, 30.21044158935547, 5.798679351806641, 16.69901466369629, 28.5411376953125, 17.6705322265625, 12.516258239746094, 15.335651397705078, 19.920272827148438, 12.483640670776367, 11.103048324584961, 37.47663879394531, -3.9648818969726562, 50.379608154296875, 11.725067138671875, -6.88055419921875, 74.99162292480469, 12.531654357910156, 11.058883666992188, 4.172882080078125, -4.0804290771484375, 30.197921752929688, 33.231300354003906, -9.138824462890625, 15.193984985351562, 2.4383716583251953, 20.86064910888672, 15.011810302734375, -1.2917022705078125, 22.43877410888672, -0.5436420440673828, 55.4056282043457, 19.114898681640625, 11.814033508300781, 4.085777282714844, -5.558441162109375, 21.980262756347656, 11.358024597167969, 19.350990295410156, -4.435264587402344, -1.7113571166992188, 21.932186126708984, 27.414382934570312, 5.82838249206543, 10.633216857910156, 30.19659996032715, 15.897224426269531, 4.874006271362305, -4.8484344482421875, 6.394927978515625, 14.795272827148438, 3.276611328125, 44.630157470703125, 18.129348754882812, 23.85155487060547, 13.776756286621094, 11.02960205078125, 31.051437377929688, 19.149871826171875, 18.139801025390625, 6.486839294433594, 6.4350433349609375, -9.274925231933594, -21.2882080078125, 54.13353729248047, 2.1717453002929688, 0.3311119079589844, 6.478263854980469, -6.188804626464844, 33.33348846435547, 6.9504241943359375, 30.4031982421875, 4.516939163208008, -13.280567169189453, 21.32526397705078, 7.0635986328125, 8.524444580078125, 13.087915420532227, 5.2433013916015625, 30.004798889160156, 28.082260131835938, -3.1982879638671875, 12.009086608886719, 10.555208206176758, 18.78564453125, 20.230133056640625, 11.44842529296875, 9.988624572753906, 14.884368896484375, 11.877086639404297, 8.440895080566406, 18.483028411865234, 16.347747802734375, 21.817310333251953, 9.44482421875, 32.139129638671875, 37.37451171875, 11.069612503051758, 16.937084197998047, 33.11219787597656, 9.381118774414062, 58.44664001464844, 29.539161682128906, 0.3484039306640625, -2.00006103515625, 44.89838409423828, 19.05352020263672, 42.466461181640625, 4.765514373779297, 7.285888671875, 1.5331954956054688, 3.5699005126953125, 2.9762344360351562, 10.14712905883789, 10.979484558105469, -5.770904541015625, 20.126907348632812, 15.444244384765625, 18.47858428955078, 32.98704528808594, 18.36312484741211, 3.0843658447265625, 9.003692626953125, -2.5348739624023438, 33.89982604980469, 1.1653881072998047, 24.108680725097656, 11.121625900268555], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000315.npy"}
{"epoch": 0.9264705882352942, "step": 316, "batch_size": 128, "mean": 14.720212936401367, "std": 19.161497116088867, "min": -33.43486022949219, "p10": -7.369859886169433, "median": 11.789043426513672, "p90": 38.23590698242187, "max": 105.98277282714844, "pos_frac": 0.8125, "sample": [5.803863525390625, 7.942619323730469, 2.5517730712890625, 28.176231384277344, -4.9844207763671875, 22.080894470214844, 22.189865112304688, 9.187835693359375, 23.08636474609375, 36.999473571777344, 5.4866180419921875, 17.947765350341797, 7.011684417724609, 3.7254695892333984, 16.262880325317383, -7.24224853515625, 11.421890258789062, 5.139007568359375, 57.537750244140625, 1.084869384765625, 2.9848289489746094, 21.644866943359375, 16.176076889038086, 10.692291259765625, 5.0916748046875, 28.476104736328125, 20.217132568359375, -7.667619705200195, 10.106164932250977, 20.746917724609375, -9.777412414550781, -12.267318725585938, -17.77100944519043, 10.403900146484375, -4.6610107421875, 19.746414184570312, -22.059249877929688, 39.961265563964844, 22.070362091064453, 2.7496261596679688, 20.93851089477539, 12.884176254272461, 18.002349853515625, 7.011192321777344, 29.013694763183594, 5.398712158203125, -7.804206848144531, 19.450180053710938, -6.432373046875, -0.6982421875, 54.86114501953125, 34.493194580078125, 5.514232635498047, -12.888267517089844, 34.45873260498047, -8.960556030273438, 12.28369140625, 22.963153839111328, 105.98277282714844, -5.269329071044922, 3.1006526947021484, 9.913589477539062, -0.5772857666015625, 26.267013549804688, 24.279842376708984, -33.43486022949219, 19.930496215820312, 17.560352325439453, 24.636756896972656, 9.349876403808594, 15.408676147460938, 44.43162536621094, 34.655548095703125, 14.820907592773438, 2.640411376953125, 6.852146148681641, 26.486045837402344, 7.9277496337890625, 51.27023696899414, 46.32798767089844, 64.959228515625, 13.689224243164062, -2.473562240600586, 6.435861587524414, 6.010993957519531, 20.626724243164062, 5.083900451660156, 17.591388702392578, 29.306167602539062, 63.94480895996094, 43.93603515625, 7.176361083984375, -12.92170524597168, -0.13472747802734375, 9.032764434814453, 19.85094451904297, 40.319305419921875, 34.40088653564453, 37.49646759033203, 11.587730407714844, 15.552148818969727, 8.575973510742188, 12.722915649414062, 10.481710433959961, -10.185005187988281, 20.4892578125, 45.03694152832031, -7.967132568359375, 26.87164306640625, 14.204751968383789, 3.19073486328125, -18.074127197265625, 49.27086639404297, 7.576148986816406, 3.1450881958007812, 27.078956604003906, 11.9903564453125, 26.847427368164062, 11.41313362121582, 15.801933288574219, -0.3974285125732422, 14.791656494140625, -0.5275993347167969, 5.896453857421875, 17.023834228515625, 9.227392196655273, 5.3807830810546875, 21.525054931640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000316.npy"}
{"epoch": 0.9294117647058824, "step": 317, "batch_size": 128, "mean": 13.802099227905273, "std": 18.397342681884766, "min": -31.550125122070312, "p10": -6.240677833557128, "median": 12.325286865234375, "p90": 35.51110763549805, "max": 67.18315124511719, "pos_frac": 0.7890625, "sample": [18.66537094116211, 35.49219512939453, 9.332347869873047, -0.7707653045654297, -2.0281105041503906, 13.595710754394531, -0.1070556640625, -4.4159088134765625, 18.42607307434082, 34.50348663330078, 9.698501586914062, 18.258995056152344, 1.5651092529296875, 35.55523681640625, 33.08326721191406, -2.0791778564453125, 13.846559524536133, 15.722618103027344, -9.50729751586914, -3.071002960205078, -6.699119567871094, 6.912872314453125, -31.550125122070312, 11.465959548950195, 45.68511199951172, 2.00732421875, -13.866180419921875, 2.949310302734375, 14.94317626953125, 16.53565216064453, 1.72637939453125, 17.031883239746094, 49.097930908203125, 25.18783950805664, 10.679508209228516, 8.99285888671875, 24.120712280273438, 7.815132141113281, -0.30535888671875, 6.125953674316406, 24.3935546875, 36.80821228027344, 3.4443130493164062, 22.997791290283203, -6.8973541259765625, -1.8268051147460938, 34.78632736206055, 29.750228881835938, 27.504364013671875, 21.591655731201172, 7.707557678222656, 13.773788452148438, 18.50408172607422, 2.077911376953125, 14.924524307250977, 33.68328857421875, 20.442399978637695, 49.809326171875, 0.1295928955078125, 1.660888671875, 3.4989776611328125, -1.2189598083496094, 14.786346435546875, 22.628971099853516, -19.046058654785156, 16.78298568725586, 8.995426177978516, 50.18474578857422, 9.049619674682617, 5.5683135986328125, 9.43991470336914, 9.199527740478516, -0.7493820190429688, 8.57257080078125, 12.265510559082031, 21.571563720703125, 20.533843994140625, -21.001930236816406, -10.842300415039062, 24.7882080078125, 12.385063171386719, -13.724014282226562, 61.13134765625, 25.106828689575195, 63.676177978515625, 11.433670043945312, 19.340778350830078, 7.567577362060547, 33.517173767089844, 27.72332000732422, -13.108932495117188, 17.45452880859375, 13.759933471679688, 41.20252227783203, 28.1234130859375, 20.33707618713379, 12.101211547851562, 67.18315124511719, -30.770200729370117, 17.202850341796875, -5.41912841796875, 0.8706817626953125, 51.514068603515625, 14.052986145019531, -15.408355712890625, 8.591777801513672, 4.6845703125, 29.48383331298828, 33.38690185546875, -6.04420280456543, 62.344261169433594, 20.230886459350586, 22.55467414855957, 15.429691314697266, 1.2448577880859375, -10.115203857421875, 24.283119201660156, 2.5781936645507812, 16.97784996032715, 8.218399047851562, 2.028369903564453, 43.65325927734375, -5.9148101806640625, 27.5745849609375, -6.0139923095703125, 1.5922164916992188, 1.7474803924560547, 26.001800537109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000317.npy"}
{"epoch": 0.9323529411764706, "step": 318, "batch_size": 128, "mean": 15.976702690124512, "std": 15.408635139465332, "min": -17.770973205566406, "p10": -2.4987785339355466, "median": 15.189638137817383, "p90": 36.81857757568359, "max": 62.92424774169922, "pos_frac": 0.8515625, "sample": [0.0046405792236328125, 17.418563842773438, 29.265914916992188, 8.806648254394531, 33.612510681152344, 13.41726303100586, 0.7233848571777344, 0.2478790283203125, 12.285400390625, 17.13360595703125, 40.10255432128906, 16.765243530273438, 14.448265075683594, 1.838348388671875, 19.589035034179688, 14.86260986328125, -6.3536376953125, 14.484477996826172, 12.421440124511719, 6.66656494140625, 19.02194595336914, -4.748634338378906, -1.4947509765625, 29.895652770996094, -17.770973205566406, 6.777931213378906, 21.437362670898438, -2.7248077392578125, 7.1259002685546875, 25.803577423095703, 18.337318420410156, 4.509700775146484, 36.67845153808594, 8.78947639465332, 15.810585021972656, 21.028244018554688, 19.86355209350586, 1.5105743408203125, 8.308837890625, 35.54757308959961, 23.13924789428711, 12.084327697753906, 2.2137908935546875, 13.236114501953125, 16.430877685546875, 50.61747741699219, -2.0394248962402344, 28.880355834960938, 15.218143463134766, 42.461509704589844, 37.32738494873047, 17.40293312072754, 13.568572998046875, 16.369403839111328, 16.53740119934082, -8.47503662109375, 9.938308715820312, -2.4019088745117188, -14.281288146972656, 51.65635681152344, 8.363826751708984, -5.881328582763672, -3.00018310546875, 7.4820709228515625, 9.863321304321289, 19.723419189453125, 29.012298583984375, 8.254669189453125, 10.494094848632812, -0.5096302032470703, 48.13916015625, -2.090879440307617, -3.445465087890625, 26.018936157226562, 30.188522338867188, 3.453227996826172, 29.09027099609375, 9.935737609863281, 7.581977844238281, 4.614631652832031, 17.804893493652344, 49.169189453125, 8.4886474609375, 34.07600402832031, 31.071502685546875, 35.7247200012207, 18.20972442626953, -10.029373168945312, 5.239969253540039, -6.373023986816406, 30.720802307128906, -16.365882873535156, 25.078109741210938, 20.26752471923828, 41.70919418334961, 14.311386108398438, 23.139450073242188, -0.03925323486328125, 21.19158172607422, 29.881546020507812, 15.1611328125, 23.413333892822266, 27.060649871826172, 9.982978820800781, 37.53102111816406, 13.57403564453125, 24.537364959716797, 37.145538330078125, 5.178901672363281, 40.74079895019531, 2.1672210693359375, -17.11669158935547, 28.467666625976562, 18.759363174438477, 29.081363677978516, 28.550521850585938, 17.36748695373535, 33.43324279785156, 15.026847839355469, 16.81085205078125, 40.938499450683594, 62.92424774169922, 11.884603500366211, 16.482158660888672, 4.970909118652344, 6.66455078125, 2.5930747985839844, 19.788070678710938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000318.npy"}
{"epoch": 0.9352941176470588, "step": 319, "batch_size": 128, "mean": 17.99627113342285, "std": 15.631632804870605, "min": -8.040298461914062, "p10": 0.7711900711059575, "median": 15.3094482421875, "p90": 40.56772155761718, "max": 70.36139678955078, "pos_frac": 0.90625, "sample": [17.753812789916992, 12.004798889160156, -3.02783203125, 24.77921485900879, 32.93809127807617, 26.642112731933594, 8.796424865722656, -3.639934539794922, 22.107616424560547, -4.1212615966796875, 31.793701171875, 8.854007720947266, 37.57152557373047, 4.628684997558594, 10.837831497192383, 70.36139678955078, 16.629859924316406, 7.4162445068359375, 28.44782257080078, -4.994874954223633, 12.146499633789062, 2.8097686767578125, 29.610637664794922, 17.85596466064453, 29.826698303222656, 12.563411712646484, 26.7568359375, 6.6918182373046875, 16.978607177734375, 25.383941650390625, 3.5570640563964844, 9.163484573364258, 48.35137176513672, 52.15815734863281, 27.514663696289062, 18.049114227294922, 9.454599380493164, 4.6707305908203125, 13.5767822265625, 45.464630126953125, 20.651811599731445, 14.069671630859375, 10.065502166748047, 14.777032852172852, 8.292892456054688, 12.674335479736328, -3.85797119140625, 8.968019485473633, 39.936309814453125, 53.57891082763672, 22.928062438964844, 17.96131134033203, -3.1421432495117188, 25.317214965820312, -5.510589599609375, -6.308876037597656, 18.056373596191406, 48.355621337890625, 21.208114624023438, -8.040298461914062, 1.3234367370605469, 6.71942138671875, 15.20648193359375, 3.391866683959961, 47.37188720703125, -6.76715087890625, 24.37823486328125, 16.53003692626953, 9.27237319946289, 12.081787109375, 30.791709899902344, 10.980392456054688, 3.45489501953125, 27.90904998779297, 30.534286499023438, 5.112022399902344, 1.3670158386230469, 14.349563598632812, 17.254241943359375, 22.77352523803711, 7.7477874755859375, 9.478052139282227, 36.322235107421875, 12.925662994384766, 15.766143798828125, 7.381439208984375, 11.691497802734375, 7.997646331787109, 16.712432861328125, 13.244121551513672, 12.882894515991211, 0.4548530578613281, 22.61114501953125, 5.779155731201172, 16.028472900390625, 1.779083251953125, 24.825990676879883, 0.9067630767822266, 37.085533142089844, 56.91297149658203, 15.455123901367188, 47.312957763671875, 14.073287963867188, 10.725021362304688, 12.823793411254883, 48.99152374267578, 9.547821044921875, 15.41241455078125, 42.041015625, 4.101409912109375, 31.792388916015625, 53.36864471435547, -2.1622657775878906, 18.486366271972656, 23.443634033203125, 31.8416748046875, -1.2648544311523438, 2.2656211853027344, 38.043296813964844, 32.519317626953125, 7.5881500244140625, 38.552391052246094, 19.523231506347656, 46.24485778808594, 2.8091354370117188, 15.582298278808594, 21.911727905273438, 21.578407287597656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000319.npy"}
{"epoch": 0.9382352941176471, "step": 320, "batch_size": 128, "mean": 16.411710739135742, "std": 17.618513107299805, "min": -14.906076431274414, "p10": -5.620159912109375, "median": 13.816362380981445, "p90": 40.53978080749511, "max": 76.30010986328125, "pos_frac": 0.8203125, "sample": [8.475677490234375, 9.359321594238281, 14.87213134765625, 29.055892944335938, 37.60388946533203, -6.2657928466796875, 54.08639907836914, 27.401611328125, -14.906076431274414, -4.975433349609375, 13.003986358642578, 15.857147216796875, 38.31863021850586, -9.436691284179688, 36.68079376220703, 27.97530746459961, 14.893142700195312, -7.871849060058594, 10.479866027832031, 26.876075744628906, 14.22292709350586, 21.695716857910156, 33.833740234375, -3.2916717529296875, -10.994384765625, 17.412490844726562, 8.588953018188477, 6.962333679199219, 12.273197174072266, 7.880088806152344, -9.059078216552734, -1.9552841186523438, 43.39311981201172, 5.6099395751953125, 14.718734741210938, -3.0323867797851562, 0.19579315185546875, -12.116645812988281, 13.149368286132812, 13.650314331054688, 23.98127555847168, -5.635978698730469, 6.052635192871094, -5.613380432128906, 10.403656005859375, 16.219009399414062, 28.118186950683594, 11.8193359375, 15.524612426757812, 8.656387329101562, 19.347312927246094, 16.883399963378906, 32.31610107421875, 6.987937927246094, 1.3414154052734375, 9.388587951660156, 25.537109375, -4.69757080078125, 20.672508239746094, 40.75065612792969, 3.9055728912353516, 41.310943603515625, 49.791290283203125, 13.633865356445312, 25.838775634765625, 42.44207763671875, 8.377935409545898, 14.340087890625, 5.369569778442383, 18.168869018554688, 26.328067779541016, 10.787872314453125, 27.067764282226562, -10.798431396484375, 11.124298095703125, -11.314014434814453, 0.34375, 9.512557983398438, 13.555130004882812, 9.006298065185547, 21.069557189941406, 24.654037475585938, 58.41290283203125, 0.46858978271484375, -0.2845478057861328, 14.061431884765625, -6.3407440185546875, 25.412322998046875, -4.692890167236328, 39.224212646484375, 63.40985107421875, 35.502227783203125, -13.82586669921875, 25.251998901367188, -0.9292144775390625, 0.07001304626464844, 3.332416534423828, 12.293830871582031, 20.20600128173828, 12.538139343261719, 15.820388793945312, 13.982410430908203, 13.640226364135742, 12.900579452514648, -2.0421371459960938, -11.32821273803711, 12.705406188964844, 4.4400634765625, 76.30010986328125, 30.115562438964844, 40.449405670166016, 44.81279754638672, 21.59064483642578, 39.32605743408203, 23.024314880371094, 29.577171325683594, 25.140399932861328, 9.378829956054688, 33.246002197265625, 51.086273193359375, 14.597091674804688, 11.446762084960938, 19.404666900634766, 55.812782287597656, 29.172164916992188, 47.52378845214844, 11.40643310546875, 15.867853164672852], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000320.npy"}
{"epoch": 0.9411764705882353, "step": 321, "batch_size": 128, "mean": 17.976694107055664, "std": 18.384021759033203, "min": -26.664581298828125, "p10": -1.462411117553711, "median": 15.442719459533691, "p90": 39.62427978515625, "max": 92.9432373046875, "pos_frac": 0.875, "sample": [9.365928649902344, 18.853836059570312, 21.4143123626709, 31.653167724609375, 6.8512725830078125, 9.740432739257812, 28.914382934570312, 22.64525604248047, 5.742435455322266, 10.973495483398438, 10.03466796875, 14.836891174316406, 14.179985046386719, 7.382255554199219, 10.229835510253906, 32.24259948730469, 20.81390380859375, 0.8251800537109375, 21.683578491210938, 24.745525360107422, 61.02605438232422, -11.828643798828125, 3.80718994140625, 11.440338134765625, 13.109943389892578, 58.77472686767578, 34.94816589355469, 37.36457824707031, 30.813255310058594, 32.990623474121094, 18.029022216796875, 26.004501342773438, 1.4655036926269531, -2.5054473876953125, -10.482376098632812, 24.92755126953125, 1.638824462890625, 2.561084747314453, 91.87632751464844, 19.471397399902344, 13.146015167236328, 2.4947128295898438, -1.8051834106445312, 9.914886474609375, 18.775297164916992, 25.606613159179688, 10.739547729492188, 25.263572692871094, 16.6085205078125, -2.6123046875, 16.526269912719727, 1.974456787109375, 16.5341796875, 17.54465103149414, 2.798107147216797, 8.448196411132812, 19.19097137451172, 36.580841064453125, 27.960311889648438, 22.08343505859375, 1.7829780578613281, 25.94025421142578, 23.394912719726562, 16.70661163330078, 41.17820739746094, 21.053131103515625, 28.012706756591797, 31.87773895263672, 13.136676788330078, 4.3133697509765625, 69.6956787109375, -5.61083984375, 10.597320556640625, 42.706634521484375, -26.664581298828125, 12.505535125732422, -1.0269317626953125, 7.954444885253906, 17.974727630615234, 7.138021469116211, 27.839134216308594, 45.03527069091797, 8.104301452636719, 46.9990234375, 32.157073974609375, 32.47407531738281, -9.06744384765625, 5.644451141357422, 20.297836303710938, -0.2148590087890625, 92.9432373046875, 17.217212677001953, -4.216552734375, 10.20623779296875, 5.676103591918945, -1.5785808563232422, 14.063850402832031, 6.447175979614258, -2.1627979278564453, 3.375396728515625, 17.2412109375, -1.4617919921875, 5.5447235107421875, 42.78363037109375, 19.442859649658203, 7.6485748291015625, 6.182830810546875, 16.01726722717285, 26.803810119628906, 26.286773681640625, 13.475456237792969, 20.268199920654297, 50.270294189453125, 37.65031433105469, 3.950164794921875, 14.868171691894531, -1.4638557434082031, 39.62248992919922, 14.180057525634766, 13.424781799316406, -4.1803741455078125, 57.03143310546875, 23.701187133789062, 20.12310791015625, 9.076812744140625, 31.903106689453125, 0.7557907104492188, 39.628456115722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000321.npy"}
{"epoch": 0.9441176470588235, "step": 322, "batch_size": 128, "mean": 16.720640182495117, "std": 16.631458282470703, "min": -26.11553955078125, "p10": -1.3976377487182612, "median": 15.57975959777832, "p90": 40.219123077392574, "max": 65.90042114257812, "pos_frac": 0.875, "sample": [24.873943328857422, 26.73834228515625, 18.569440841674805, 9.187156677246094, 22.58251953125, 17.649085998535156, 23.544933319091797, 2.3304061889648438, 33.012786865234375, 22.921356201171875, 17.76204490661621, 38.15258026123047, 14.213798522949219, 15.566875457763672, -3.341625213623047, -8.160797119140625, -11.687110900878906, 49.145477294921875, 54.988075256347656, 54.176544189453125, 23.402023315429688, 17.79440689086914, 23.071123123168945, 17.271827697753906, 33.318115234375, 6.517124176025391, 34.993690490722656, 5.8558502197265625, 5.770641326904297, 8.855308532714844, 22.583293914794922, 6.252492904663086, -8.212152481079102, 3.918426513671875, -1.0411396026611328, 1.1685867309570312, 15.574382781982422, 19.129192352294922, -0.6709671020507812, 54.352272033691406, 7.655036926269531, -1.7743072509765625, 15.428070068359375, 13.431022644042969, 13.475860595703125, 14.023033142089844, -4.415260314941406, 22.360868453979492, 7.59326171875, 36.079444885253906, 65.90042114257812, 39.91248321533203, 41.106781005859375, 14.034812927246094, 2.1570510864257812, 10.431777954101562, 22.263778686523438, 1.4706039428710938, 10.418510437011719, 20.32366943359375, 7.147438049316406, 13.447769165039062, -26.11553955078125, 15.818191528320312, 49.26239013671875, 20.84933090209961, 40.93461608886719, -20.58929443359375, -7.7053070068359375, 24.89786720275879, -16.085830688476562, 10.085517883300781, -2.4189720153808594, 17.142982482910156, 19.399986267089844, -1.2362079620361328, 27.04571533203125, 37.28191375732422, 10.75313949584961, 13.243057250976562, 21.756988525390625, 0.5296821594238281, 30.06465721130371, 4.256742477416992, 24.229400634765625, 29.347763061523438, 2.804180145263672, 9.112770080566406, 4.222240447998047, 15.585136413574219, 33.02629852294922, 2.09100341796875, 4.662752151489258, 1.4941177368164062, 36.26129150390625, 54.132843017578125, 11.967178344726562, 10.479927062988281, 17.302135467529297, 17.660911560058594, 11.887809753417969, 7.436809539794922, 19.938825607299805, 48.32191467285156, 22.54388427734375, 21.561527252197266, -9.176250457763672, 18.228897094726562, 25.130233764648438, 36.246055603027344, 6.972503662109375, 5.376983642578125, 29.81757354736328, 15.941413879394531, 43.986595153808594, 2.532743453979492, 24.24675750732422, 45.370628356933594, 15.82455062866211, -7.574972152709961, 16.972583770751953, 5.624126434326172, 13.74652099609375, 9.737052917480469, 5.159870147705078, 49.62403869628906, 10.666519165039062, 15.942626953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000322.npy"}
{"epoch": 0.9470588235294117, "step": 323, "batch_size": 128, "mean": 15.796025276184082, "std": 16.117319107055664, "min": -18.72673797607422, "p10": -3.6707681655883784, "median": 13.930732727050781, "p90": 36.1272590637207, "max": 73.5977783203125, "pos_frac": 0.8359375, "sample": [7.356950759887695, 30.96759033203125, 1.5748748779296875, 24.186294555664062, -2.7011032104492188, -10.63421630859375, 30.3013916015625, 23.57526397705078, 18.386856079101562, 10.857070922851562, 68.60379791259766, 14.629308700561523, 10.921600341796875, -0.6175708770751953, 23.375160217285156, 10.838836669921875, 8.320419311523438, 16.393936157226562, 60.77372741699219, 4.22210693359375, 6.353271484375, 42.1268310546875, -3.578889846801758, 6.145072937011719, 26.4189510345459, 9.902908325195312, 14.419593811035156, 6.651817321777344, 38.52918243408203, 0.477630615234375, 35.91864013671875, 33.18951416015625, 12.117362976074219, -1.40576171875, 3.8926010131835938, 16.504470825195312, 22.641799926757812, -3.885150909423828, 13.190170288085938, 13.273635864257812, 12.566295623779297, 20.386581420898438, -10.600112915039062, -7.357334136962891, -18.72673797607422, 4.712501525878906, -5.302486419677734, 0.7491416931152344, 73.5977783203125, 17.64250946044922, -0.7333450317382812, 25.508991241455078, 14.486221313476562, 33.92253112792969, 33.43199157714844, 21.055694580078125, 15.159673690795898, -2.602081298828125, -12.819786071777344, 7.009803771972656, 19.54828453063965, 3.356355667114258, 28.16681671142578, -1.795806884765625, 37.949485778808594, 4.037506103515625, 26.41593360900879, 15.122932434082031, 16.089614868164062, 1.0459518432617188, 34.4569091796875, 27.801353454589844, 9.240341186523438, 4.970756530761719, 20.175796508789062, 41.46942901611328, 11.741043090820312, 20.645465850830078, 6.5036468505859375, 12.11843490600586, -4.548095703125, 9.361251831054688, 26.058670043945312, 19.77867889404297, 14.010406494140625, 15.316879272460938, 37.8797607421875, 10.2362060546875, 25.486480712890625, 34.816307067871094, 44.17726135253906, 12.963134765625, 18.77318572998047, -4.18310546875, 50.13789367675781, 7.106744766235352, 43.83268737792969, 9.8946533203125, 10.38037109375, 34.54266357421875, 9.972091674804688, 13.060020446777344, 15.065628051757812, 21.0656681060791, -5.986289978027344, 18.784042358398438, 36.614036560058594, 34.018211364746094, 1.955636978149414, 24.876388549804688, 10.803783416748047, 11.346939086914062, 18.933937072753906, 29.3697509765625, 12.555091857910156, -9.224157333374023, -2.4108009338378906, -6.927913665771484, 13.851058959960938, 17.033523559570312, 9.066009521484375, 14.983543395996094, 27.13849639892578, 20.887863159179688, -10.47800064086914, 39.43596267700195, 7.953407287597656, 26.761192321777344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000323.npy"}
{"epoch": 0.95, "step": 324, "batch_size": 128, "mean": 18.0839786529541, "std": 18.774473190307617, "min": -18.687606811523438, "p10": -1.4036912918090816, "median": 15.917908668518066, "p90": 42.92532272338866, "max": 96.3758544921875, "pos_frac": 0.875, "sample": [4.524909973144531, 25.469261169433594, 32.04014587402344, 15.327117919921875, 27.80382537841797, 20.011886596679688, 25.98680877685547, 2.9006195068359375, 12.690740585327148, 14.933441162109375, 36.62609100341797, 15.616304397583008, 16.094146728515625, -13.850807189941406, 19.678409576416016, 5.159811019897461, 6.616912841796875, 10.776432037353516, 24.545257568359375, 18.893905639648438, -6.0576324462890625, 27.320159912109375, 15.102027893066406, -18.687606811523438, 6.249176025390625, 37.347328186035156, 18.824487686157227, 18.081398010253906, 49.04981231689453, 53.07701110839844, 41.630645751953125, 11.640335083007812, 15.997121810913086, 4.198747634887695, 32.28971862792969, 18.43362045288086, 2.52392578125, 20.952133178710938, 31.23596954345703, 19.426010131835938, 29.955772399902344, 47.22588348388672, -7.3316497802734375, 33.28797912597656, 0.26421356201171875, 57.59426498413086, 38.06163787841797, 5.059000015258789, 16.8182373046875, -1.9087028503417969, 46.43385314941406, 2.183135986328125, -14.506431579589844, 1.682668685913086, 20.693931579589844, 8.210708618164062, -10.623931884765625, 21.143077850341797, -4.904119491577148, 21.72119140625, 4.763641357421875, 15.538139343261719, -11.024139404296875, 18.314292907714844, -1.2731914520263672, -4.463127136230469, 2.0213165283203125, 24.72259521484375, 6.162860870361328, 26.521461486816406, 70.79298400878906, 68.62415313720703, 45.94623565673828, 37.06227111816406, 25.73834228515625, -9.68136978149414, -0.5648765563964844, 6.822715759277344, 12.481460571289062, 25.022056579589844, 15.909896850585938, 96.3758544921875, 13.100616455078125, 1.5431995391845703, 0.9889984130859375, 15.925920486450195, 11.951034545898438, -1.70819091796875, 12.126205444335938, 4.759798049926758, 40.437286376953125, 20.605804443359375, 7.117612838745117, 6.753301620483398, 15.205398559570312, 0.5618362426757812, 49.663330078125, 35.538352966308594, 3.9851608276367188, 5.713373184204102, 23.666213989257812, 23.66124725341797, -0.6214447021484375, 6.785774230957031, 8.7200927734375, 20.183982849121094, 10.6275634765625, 22.131149291992188, 22.523597717285156, 21.32396697998047, 16.707901000976562, 7.573890686035156, 14.979202270507812, 33.38262176513672, 38.89033508300781, 6.244468688964844, 9.613040924072266, 49.19879150390625, 59.25360107421875, 2.415863037109375, 26.67962646484375, 34.587127685546875, 12.223617553710938, 25.7994384765625, 7.3636627197265625, 26.676719665527344, -16.069847106933594, 48.604095458984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000324.npy"}
{"epoch": 0.9529411764705882, "step": 325, "batch_size": 128, "mean": 16.614822387695312, "std": 17.4611759185791, "min": -24.191497802734375, "p10": -2.5189451217651353, "median": 13.396818161010742, "p90": 40.82988471984863, "max": 62.64210510253906, "pos_frac": 0.8515625, "sample": [0.2630767822265625, 33.201690673828125, 19.009458541870117, -1.2853260040283203, 41.9057731628418, 13.702625274658203, -3.70843505859375, 12.183122634887695, 6.797943115234375, 34.30574035644531, -3.470888137817383, 21.347280502319336, 15.673664093017578, 5.280261993408203, -1.9135894775390625, 6.547428131103516, 2.7854385375976562, 10.182380676269531, 16.145751953125, 10.60716438293457, 31.140609741210938, 30.539344787597656, 18.372482299804688, 13.821159362792969, 27.419830322265625, 3.7228832244873047, 0.5475654602050781, 0.9500732421875, 17.42041015625, 56.032318115234375, 12.279510498046875, 29.926185607910156, 7.450408935546875, 9.855209350585938, 59.04588317871094, -7.412384033203125, 1.5139942169189453, 3.81201171875, 11.610885620117188, -2.1109695434570312, 19.830842971801758, 7.275300979614258, 37.65620422363281, 14.34820556640625, 5.5941009521484375, 15.117195129394531, 6.570991516113281, -0.5500411987304688, 31.96380615234375, 12.238960266113281, 1.9969978332519531, 33.34465026855469, 19.4202880859375, 1.885772705078125, 30.090858459472656, 60.069244384765625, 31.187774658203125, -6.593997955322266, 2.8639144897460938, 10.493511199951172, 4.5632781982421875, 10.100494384765625, 8.239028930664062, 24.905685424804688, -4.11456298828125, -5.0864715576171875, 23.207717895507812, 23.119529724121094, 16.038490295410156, 36.485137939453125, 28.226425170898438, -2.05615234375, 14.000358581542969, 42.28712463378906, 17.80116844177246, 62.64210510253906, 59.443328857421875, 20.710275650024414, 16.531646728515625, 47.348838806152344, 1.8232002258300781, -1.1349029541015625, 15.752326965332031, 37.266929626464844, 16.899341583251953, 55.25677490234375, 3.176441192626953, 13.091011047363281, 16.4371337890625, 52.36127471923828, 24.5689697265625, 11.744705200195312, 50.59587097167969, 34.54792785644531, 3.0788497924804688, -8.127859115600586, 37.26013946533203, 27.60709571838379, 40.36878967285156, -10.410087585449219, 44.785064697265625, 9.335769653320312, 28.84619903564453, 31.969467163085938, 17.222158432006836, 16.62828254699707, 11.912109375, 3.361968994140625, 7.621612548828125, 11.413009643554688, 51.97972106933594, 15.44390869140625, 4.124668121337891, 11.917068481445312, -4.790937423706055, 33.644065856933594, -18.452735900878906, 19.032363891601562, 28.75839614868164, -4.050350189208984, 3.6700668334960938, 6.1107330322265625, -24.191497802734375, 27.432540893554688, 12.00567626953125, -5.34259033203125, 10.8062744140625, 8.644355773925781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000325.npy"}
{"epoch": 0.9558823529411765, "step": 326, "batch_size": 128, "mean": 18.854167938232422, "std": 17.444089889526367, "min": -15.67193603515625, "p10": -0.1653106689453125, "median": 15.094409942626953, "p90": 43.887586975097655, "max": 79.67526245117188, "pos_frac": 0.890625, "sample": [57.43791961669922, 29.48187255859375, 13.336013793945312, 44.870849609375, -1.3029937744140625, 5.7984466552734375, 21.620197296142578, 31.457828521728516, 4.699134826660156, -1.832794189453125, 6.3139190673828125, 26.97577667236328, 17.497154235839844, -15.67193603515625, 50.813262939453125, 27.791366577148438, 28.147186279296875, -1.8481407165527344, 57.86468505859375, 23.117237091064453, 30.789878845214844, 4.232200622558594, 42.39329528808594, 4.533468246459961, 7.366397857666016, 11.251296997070312, 7.053668975830078, 7.530336380004883, 23.526691436767578, 27.065040588378906, 50.821815490722656, 6.060264587402344, 16.992279052734375, 11.589706420898438, -2.602752685546875, 22.456771850585938, 22.06239128112793, 3.0687026977539062, 19.02276611328125, 0.26891326904296875, 35.8760986328125, 12.78363037109375, 4.5161895751953125, -10.086494445800781, 32.87621307373047, 12.347419738769531, 2.214479446411133, -0.1583709716796875, 18.65814208984375, 20.458999633789062, -4.202524185180664, 7.508180618286133, 4.8973846435546875, 43.93760681152344, 17.140365600585938, 57.20710754394531, 8.198719024658203, -0.1815032958984375, 42.12408447265625, 13.263107299804688, 15.696250915527344, 29.72966766357422, 10.905815124511719, 5.299125671386719, 7.294837951660156, 11.801582336425781, 35.2379150390625, 7.385871887207031, -4.076271057128906, 9.858406066894531, 13.374885559082031, 0.9093399047851562, 3.2823867797851562, 40.242828369140625, 59.994720458984375, 2.4621658325195312, 10.428085327148438, 3.4043807983398438, 14.986396789550781, 8.836318969726562, 5.9669647216796875, 52.375709533691406, 32.42326736450195, 30.85589599609375, 14.646499633789062, 15.202423095703125, -3.6303768157958984, 18.37335205078125, 52.379730224609375, 3.0060272216796875, 51.34051513671875, -1.6615219116210938, 43.86614990234375, 8.177909851074219, 18.42223358154297, 15.430988311767578, 24.666229248046875, 40.290916442871094, 36.25462341308594, 45.83565902709961, 42.16163635253906, 7.3575286865234375, -3.3810672760009766, 7.0926971435546875, 19.21389389038086, 11.697158813476562, 8.022945404052734, 22.260787963867188, 15.627580642700195, 13.301734924316406, 32.866050720214844, 79.67526245117188, 12.958778381347656, 28.855361938476562, 8.54193115234375, 9.268714904785156, 32.632102966308594, 34.964813232421875, 32.061370849609375, 4.823585510253906, 36.2457275390625, -11.994827270507812, 21.872299194335938, 25.035308837890625, 15.719429016113281, 16.758522033691406, 30.273021697998047, 6.7145538330078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000326.npy"}
{"epoch": 0.9588235294117647, "step": 327, "batch_size": 128, "mean": 16.440813064575195, "std": 17.752872467041016, "min": -20.527873992919922, "p10": -1.7781717300415039, "median": 14.600343704223633, "p90": 38.95388793945312, "max": 88.95266723632812, "pos_frac": 0.8359375, "sample": [65.37936401367188, 12.550148010253906, 3.3666343688964844, 57.306793212890625, -1.1366348266601562, 15.890953063964844, 9.469413757324219, -10.685958862304688, 19.899757385253906, 26.194068908691406, 24.984283447265625, 50.58689880371094, -0.9477863311767578, 18.21920394897461, 30.543560028076172, 6.27337646484375, 7.764854431152344, 17.665374755859375, 15.086463928222656, 23.243228912353516, -2.8135223388671875, 28.17266082763672, -1.9205970764160156, 21.625473022460938, 8.046382904052734, 4.2000885009765625, 22.34929656982422, 12.54452133178711, 53.748565673828125, 31.059783935546875, 17.09783172607422, 19.742172241210938, -0.0211029052734375, 24.649085998535156, 42.03968811035156, 6.210731506347656, -10.645454406738281, 26.447967529296875, 35.66001892089844, 15.967803955078125, 27.79454803466797, 29.480133056640625, 17.71369171142578, 11.769462585449219, 24.854248046875, 34.367835998535156, 56.93214416503906, 1.5209732055664062, 52.2181396484375, 9.14300537109375, 4.830835342407227, 5.4127655029296875, 24.79181671142578, -1.4539871215820312, 7.68719482421875, 10.56308364868164, -8.94601821899414, -1.7725658416748047, 33.7398681640625, 28.488969802856445, 48.76936340332031, -1.7912521362304688, 8.829010009765625, 20.989089965820312, 7.95269775390625, 40.83692169189453, 14.944366455078125, -2.510528564453125, 4.64801025390625, 9.288734436035156, -18.004161834716797, 11.356231689453125, 17.08013916015625, 11.525726318359375, 6.005504608154297, 12.218368530273438, 38.146873474121094, 62.907135009765625, 11.166282653808594, 2.0088882446289062, 41.78680419921875, 11.0509033203125, 16.811622619628906, -7.3029937744140625, 29.5897216796875, 2.099853515625, -0.039340972900390625, 29.463134765625, 31.6505126953125, 3.947723388671875, 32.1373291015625, 8.1131591796875, 22.974544525146484, 3.0240211486816406, 2.9364242553710938, -0.1284465789794922, 3.209819793701172, 17.125106811523438, 12.192432403564453, 20.79071044921875, 10.007209777832031, 17.744091033935547, 4.801250457763672, 1.951446533203125, 6.78057861328125, 48.58038330078125, 2.5350570678710938, 25.982101440429688, -10.653255462646484, 2.7598876953125, 21.08165740966797, 25.230587005615234, 20.724884033203125, 19.975723266601562, 22.07947540283203, 8.57414436340332, -20.527873992919922, 24.440414428710938, 22.015846252441406, 17.06908416748047, 14.25632095336914, -5.470304489135742, 88.95266723632812, 17.771820068359375, 16.32110595703125, -1.44671630859375, -10.079887390136719, 10.214393615722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000327.npy"}
{"epoch": 0.961764705882353, "step": 328, "batch_size": 128, "mean": 17.564903259277344, "std": 17.336795806884766, "min": -17.03118896484375, "p10": -3.6494224548339824, "median": 14.018383026123047, "p90": 40.431859588623034, "max": 82.07192993164062, "pos_frac": 0.8515625, "sample": [11.0810546875, 4.247184753417969, 32.74610137939453, -3.071147918701172, -12.616371154785156, 25.373428344726562, -0.44811248779296875, 49.02672576904297, 5.944866180419922, 82.07192993164062, 6.50177001953125, 19.599502563476562, 23.010356903076172, 36.280677795410156, 55.302406311035156, 13.029502868652344, 21.01229476928711, 0.9905357360839844, 20.45966339111328, 28.383533477783203, -6.670467376708984, 30.1956787109375, 29.52020263671875, 13.235908508300781, 16.965782165527344, 2.886566162109375, -15.188995361328125, -0.423431396484375, 19.700775146484375, -10.745983123779297, 44.21690368652344, -2.936534881591797, 1.284433364868164, 7.532127380371094, 11.378921508789062, 18.351402282714844, -5.077079772949219, -0.5562248229980469, 14.668811798095703, 8.944602966308594, 4.938455581665039, 7.293643951416016, 13.409988403320312, 27.938613891601562, -4.998729705810547, 7.609809875488281, -10.430599212646484, 60.3013916015625, 4.4638519287109375, 12.859687805175781, 19.111072540283203, 27.60906219482422, 55.984771728515625, 7.092203140258789, 10.861457824707031, -8.707801818847656, 51.38360595703125, -6.7296142578125, -10.937179565429688, 45.38166046142578, 19.338455200195312, 18.18561553955078, 26.40556526184082, 2.4632949829101562, 28.29851531982422, 33.833396911621094, 5.290184020996094, 10.200809478759766, 3.8352508544921875, 34.620025634765625, 10.753524780273438, 23.653854370117188, 16.232208251953125, 9.679189682006836, 12.705368041992188, 16.978389739990234, 13.861709594726562, 19.148040771484375, 29.734981536865234, 9.29150390625, 26.338180541992188, -1.0116004943847656, 35.236854553222656, 13.247711181640625, 29.706411361694336, 26.960968017578125, 32.16302490234375, -8.70660400390625, 7.539146423339844, 39.372962951660156, 38.628692626953125, 9.262248992919922, 10.169658660888672, -5.7424468994140625, 22.311279296875, 16.835342407226562, 21.797950744628906, 20.442642211914062, 15.433181762695312, 11.047134399414062, 34.603424072265625, 28.16937255859375, 9.705696105957031, 21.377883911132812, -17.03118896484375, 10.673233032226562, 36.470733642578125, 48.03620529174805, 4.091732025146484, 46.07548522949219, 10.84457015991211, 34.20787811279297, 48.93397521972656, 14.175056457519531, 33.14476013183594, 10.651744842529297, 11.426851272583008, 36.631103515625, 12.217536926269531, 43.37310028076172, 12.867599487304688, 11.88580322265625, 26.73368263244629, 3.1771163940429688, 17.23101043701172, 28.740724563598633, 10.802749633789062, 42.902618408203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000328.npy"}
{"epoch": 0.9647058823529412, "step": 329, "batch_size": 128, "mean": 18.546722412109375, "std": 18.013072967529297, "min": -25.753883361816406, "p10": -1.9895133972167955, "median": 17.966537475585938, "p90": 45.01473770141602, "max": 63.33147430419922, "pos_frac": 0.8671875, "sample": [8.966033935546875, 2.5970821380615234, 12.148300170898438, 44.868873596191406, 43.2564697265625, 55.38745880126953, 6.747871398925781, 8.750381469726562, 1.8115463256835938, 26.407093048095703, 22.97394561767578, 42.22639465332031, 46.88288879394531, 47.03417205810547, 19.218090057373047, 48.93035888671875, 9.004180908203125, 2.6787033081054688, 33.28131103515625, 34.189109802246094, 35.512840270996094, -0.2792205810546875, 18.956153869628906, 9.787189483642578, 33.32839584350586, 24.288864135742188, 51.867034912109375, 25.530624389648438, 1.38397216796875, 43.8687744140625, 11.053382873535156, 0.14532470703125, 29.80380630493164, 15.441085815429688, -15.262893676757812, 5.273418426513672, -1.6091804504394531, 21.288589477539062, 18.849700927734375, 32.114295959472656, 9.280845642089844, 24.85406494140625, 4.513252258300781, 24.689109802246094, 3.3031463623046875, 16.927154541015625, 4.745426177978516, 6.224235534667969, 34.20597839355469, 36.16712188720703, 11.336109161376953, 40.00011444091797, 50.007286071777344, 9.895790100097656, 8.637897491455078, 5.656532287597656, 43.3883056640625, 57.81721496582031, 20.392963409423828, -9.320993423461914, -4.270965576171875, -2.8769569396972656, 29.730392456054688, 22.270408630371094, 22.19043731689453, -7.995403289794922, -7.8584747314453125, -19.530372619628906, 1.0408706665039062, 8.188247680664062, 22.52318572998047, 21.740325927734375, 14.898017883300781, 10.08056640625, 19.23094940185547, -5.46038818359375, 5.5816650390625, -6.804325103759766, 3.526947021484375, 14.54106330871582, 10.633277893066406, 31.245750427246094, 12.320892333984375, 35.64134216308594, 6.692750930786133, 55.220123291015625, 22.93295669555664, 0.6987953186035156, 25.77920150756836, 48.73053741455078, -1.5094051361083984, 7.695137023925781, -0.5090522766113281, 21.030738830566406, 26.75607681274414, 10.125701904296875, 13.382675170898438, 21.57964324951172, 9.497810363769531, 51.20440673828125, 18.871000289916992, 28.368988037109375, 23.183753967285156, 5.534202575683594, 17.5755615234375, 12.878120422363281, 45.35508728027344, 18.357513427734375, 46.11549377441406, -25.753883361816406, -9.381340026855469, 20.76346778869629, 37.98193359375, 39.31901550292969, 22.184646606445312, 35.089111328125, 6.6937713623046875, 14.011138916015625, 22.5947265625, 5.074928283691406, 38.15351867675781, 63.33147430419922, -14.68612289428711, 30.952606201171875, 6.9227294921875, 9.106128692626953, -4.502979278564453, 28.56629180908203], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000329.npy"}
{"epoch": 0.9676470588235294, "step": 330, "batch_size": 128, "mean": 19.15849494934082, "std": 17.724870681762695, "min": -16.576675415039062, "p10": -2.3745399475097653, "median": 17.982439041137695, "p90": 40.1012222290039, "max": 96.05990600585938, "pos_frac": 0.859375, "sample": [32.26538848876953, 1.8266448974609375, 3.6202621459960938, 17.86927032470703, 8.575239181518555, 10.234560012817383, 57.660438537597656, 38.24382019042969, 18.473114013671875, 11.33199691772461, 17.935531616210938, 9.787109375, 12.249435424804688, -7.288185119628906, -1.052133560180664, 43.69892883300781, -5.2667694091796875, 13.82562255859375, 40.84526062011719, 30.381256103515625, 52.36393737792969, 9.555831909179688, -0.19244384765625, 24.67908477783203, 27.054595947265625, 34.84988021850586, 23.815162658691406, 22.008520126342773, -0.13506317138671875, 12.569129943847656, 18.526182174682617, 14.27508544921875, 6.805973052978516, -16.576675415039062, 51.86662292480469, -13.610179901123047, 15.620429992675781, 2.8201446533203125, 20.31102752685547, 18.027069091796875, 38.8958740234375, 51.97554016113281, 33.115325927734375, 25.650131225585938, 48.335723876953125, 33.28483581542969, 27.799850463867188, 7.905786514282227, 30.197036743164062, 15.433277130126953, 28.530929565429688, 1.4635753631591797, 52.66747283935547, -4.682117462158203, 2.6157989501953125, 36.99951171875, 24.845069885253906, 15.156379699707031, 39.7823486328125, -6.423791885375977, 20.779308319091797, 30.569091796875, -7.5936737060546875, -12.10467529296875, 12.933094024658203, 15.14051628112793, 8.964628219604492, 9.160497665405273, -8.304229736328125, 43.23036193847656, 35.59773254394531, 6.267730712890625, 19.232532501220703, -8.005470275878906, 5.6173095703125, 25.89276123046875, 27.638046264648438, 19.442729949951172, 28.815326690673828, 23.76689910888672, 21.78063201904297, 44.82624816894531, 15.463996887207031, 27.878814697265625, 20.889862060546875, 24.587158203125, 3.506855010986328, 18.529220581054688, 67.28582763671875, 11.932151794433594, 14.162628173828125, 13.978347778320312, 7.933694839477539, 18.82708740234375, 26.0291748046875, -2.3250808715820312, 6.718597412109375, 27.911815643310547, 0.8617305755615234, 23.187664031982422, 31.57685089111328, 15.531646728515625, 14.636856079101562, 34.74594497680664, 31.401023864746094, 58.23014831542969, -3.298215866088867, -2.1218395233154297, 32.85270690917969, 33.489585876464844, 13.648174285888672, 26.84856414794922, -5.775142669677734, 18.819026947021484, 32.013092041015625, 22.248565673828125, 10.984657287597656, 17.812530517578125, 7.110902786254883, 96.05990600585938, 15.738435745239258, 17.937808990478516, 6.462646484375, 21.109283447265625, 4.382572174072266, 6.073783874511719, -2.4899444580078125, 21.851112365722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000330.npy"}
{"epoch": 0.9705882352941176, "step": 331, "batch_size": 128, "mean": 15.835184097290039, "std": 17.263996124267578, "min": -19.595489501953125, "p10": -5.030295944213865, "median": 13.816204071044922, "p90": 37.746780395507805, "max": 77.435546875, "pos_frac": 0.8515625, "sample": [10.1136474609375, 0.35926246643066406, 21.745189666748047, 6.083217620849609, 19.746334075927734, 5.2642364501953125, 14.175617218017578, 5.8803558349609375, 15.539520263671875, 11.350078582763672, -11.94134521484375, 11.188735961914062, 20.837467193603516, 5.83392333984375, 5.524698257446289, 6.074378967285156, 14.096366882324219, 34.55329132080078, 21.12946891784668, 6.099384307861328, -1.4940948486328125, 10.559394836425781, 10.934511184692383, 4.208869934082031, 17.87370491027832, 8.706501007080078, 45.34965515136719, -19.595489501953125, 10.875408172607422, 12.918066024780273, 24.041736602783203, 49.76819610595703, 10.265546798706055, 37.05052185058594, 24.9100341796875, 18.419631958007812, 13.220436096191406, 35.68452453613281, 66.31683349609375, 33.69071960449219, 4.866544723510742, 47.70671081542969, 12.404495239257812, 48.87788391113281, 30.406036376953125, 9.470672607421875, -2.186197280883789, 11.800712585449219, -10.223352432250977, -7.5873565673828125, 9.38934326171875, 26.855072021484375, 17.174224853515625, -17.9794921875, 57.7169189453125, 10.736736297607422, 15.444320678710938, 49.92317199707031, 7.961467742919922, 77.435546875, 20.753700256347656, 12.775432586669922, 14.504375457763672, 19.386520385742188, 4.877292633056641, 23.375526428222656, 15.504806518554688, -14.999996185302734, 33.331031799316406, 7.621063232421875, 17.966873168945312, -15.378395080566406, 16.9841251373291, 4.650302886962891, 42.131561279296875, 17.587387084960938, 52.03510284423828, 16.920265197753906, 16.663911819458008, 11.509712219238281, -9.580825805664062, 13.664390563964844, 25.0631103515625, 29.593353271484375, 8.421295166015625, 22.20917510986328, 16.493438720703125, 28.025829315185547, -8.423942565917969, 32.037078857421875, 12.773841857910156, 9.109115600585938, 12.819305419921875, 17.07708740234375, 4.236656188964844, 2.051027297973633, 46.35791778564453, 10.447677612304688, 16.047372817993164, -6.358478546142578, -10.281684875488281, 26.242584228515625, 24.15935516357422, 13.777084350585938, -4.4610748291015625, 9.82427978515625, 13.848678588867188, 17.523193359375, 14.405258178710938, 20.1260986328125, -18.244277954101562, -8.935155868530273, -1.52789306640625, 36.043373107910156, 13.783729553222656, 17.633087158203125, 12.950271606445312, 15.608352661132812, 5.410697937011719, 20.951892852783203, 57.13520812988281, 21.10321044921875, 39.37138366699219, 19.918811798095703, 34.40964126586914, -1.0432815551757812, 0.1837158203125, -0.83001708984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000331.npy"}
{"epoch": 0.9735294117647059, "step": 332, "batch_size": 128, "mean": 17.000221252441406, "std": 16.122892379760742, "min": -29.659698486328125, "p10": 0.9333244323730496, "median": 14.0355224609375, "p90": 37.02368927001953, "max": 77.28192138671875, "pos_frac": 0.8984375, "sample": [12.163558959960938, 21.8818359375, 52.29328918457031, 9.977315902709961, 18.112211227416992, 12.586669921875, -3.5506858825683594, 15.049015045166016, 9.992816925048828, 37.18641662597656, -7.243370056152344, 7.1523895263671875, 23.276840209960938, 8.773147583007812, 3.6494789123535156, 8.184356689453125, 37.50593566894531, -14.059524536132812, 2.270782470703125, 28.210464477539062, 28.41033172607422, 12.311214447021484, 5.676704406738281, 36.71574401855469, 5.231330871582031, 77.28192138671875, 30.493114471435547, 24.985679626464844, 32.58822250366211, 1.9262847900390625, 9.310043334960938, 5.605464935302734, 9.01702880859375, -4.38859748840332, 14.701623916625977, 9.129188537597656, 30.78171157836914, 29.557113647460938, 30.770538330078125, 16.617843627929688, 32.377281188964844, 42.164703369140625, 18.300125122070312, 16.933059692382812, 46.80188751220703, 46.688720703125, 5.22685432434082, 14.281265258789062, 36.953948974609375, 33.74698257446289, 11.9637451171875, 4.472217559814453, -2.277587890625, 21.272140502929688, 15.318561553955078, 4.638641357421875, 7.257179260253906, 7.138759613037109, 20.627113342285156, 8.358345031738281, 5.6473541259765625, 28.08440399169922, 2.405364990234375, 13.789779663085938, 36.13536834716797, 3.5021400451660156, 1.6967849731445312, 29.266551971435547, 34.3668327331543, 27.760299682617188, 5.464286804199219, 23.891292572021484, 15.59429931640625, 48.73362731933594, 6.2047271728515625, 16.789459228515625, -0.84808349609375, 7.761871337890625, -2.092742919921875, 34.01104736328125, 32.82078552246094, 9.987174987792969, 10.984111785888672, 39.632972717285156, -29.659698486328125, 11.457107543945312, 24.46477508544922, 30.941879272460938, 34.452362060546875, -5.7306060791015625, 6.235816955566406, 2.4665908813476562, 20.784927368164062, 1.8454055786132812, 24.42878532409668, 10.800064086914062, 25.29103660583496, 39.18377685546875, -4.7273101806640625, 54.37541198730469, 22.27996826171875, 25.350711822509766, 17.68316650390625, 7.7503509521484375, -9.428459167480469, 9.62969970703125, 20.728818893432617, 9.653934478759766, 6.341392517089844, 17.176025390625, 13.045501708984375, 42.68299102783203, -10.630794525146484, 9.91912841796875, 27.647586822509766, -13.971633911132812, 29.363786697387695, 34.78253173828125, 10.487136840820312, 6.644876480102539, 25.924087524414062, 2.6055564880371094, 19.990463256835938, 11.592208862304688, 7.397407531738281, 20.895965576171875, 47.737060546875, 2.169403076171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000332.npy"}
{"epoch": 0.9764705882352941, "step": 333, "batch_size": 128, "mean": 16.340715408325195, "std": 16.139278411865234, "min": -24.852996826171875, "p10": -3.333737182617187, "median": 14.826492309570312, "p90": 39.17903442382812, "max": 66.3505859375, "pos_frac": 0.859375, "sample": [-3.164154052734375, 7.045600891113281, 3.149883270263672, 10.369255065917969, 16.796241760253906, 16.743026733398438, 12.901824951171875, -3.096466064453125, 4.887678146362305, 12.9609375, -1.7091217041015625, -6.1435546875, 3.0943870544433594, 15.851898193359375, 16.04050064086914, 20.99608612060547, 36.64874267578125, 33.388336181640625, 15.409347534179688, 5.533082962036133, 19.395904541015625, 19.59271240234375, 12.328926086425781, 8.227943420410156, 45.028770446777344, 18.350845336914062, 7.757200241088867, 8.29159927368164, 26.38232421875, -1.8400039672851562, 44.617332458496094, 9.967765808105469, 26.05255126953125, 26.197677612304688, 11.831512451171875, 5.908550262451172, 1.4241180419921875, 20.262451171875, 17.07476043701172, -9.456954956054688, 3.6794281005859375, 16.045555114746094, 31.869583129882812, 20.746253967285156, 54.38893127441406, 3.9236984252929688, 8.256355285644531, 14.15095329284668, 0.5688419342041016, 21.561309814453125, 42.173030853271484, 5.2547607421875, 29.710342407226562, 22.839298248291016, -4.63414192199707, 8.819679260253906, 11.074630737304688, 13.793510437011719, 40.80030822753906, 34.0032958984375, 66.3505859375, -15.04052734375, 2.550537109375, 13.384916305541992, 11.151432037353516, 6.708740234375, 34.27692413330078, 2.8620643615722656, -6.601781845092773, 12.517528533935547, 23.65502166748047, 26.39936065673828, 41.986183166503906, 8.78118896484375, 7.861886978149414, 25.62399673461914, 35.268943786621094, 38.796051025390625, -10.103919982910156, 4.1624603271484375, 22.237056732177734, 32.15978240966797, 40.072662353515625, 24.292861938476562, 40.4608154296875, 3.4208450317382812, 10.907455444335938, 18.303787231445312, 11.863990783691406, 27.095962524414062, 23.72911834716797, 34.852806091308594, 20.63720703125, 1.7457466125488281, 11.740692138671875, 17.805679321289062, 19.11454200744629, 50.325279235839844, -10.840896606445312, 2.40533447265625, 12.843826293945312, -3.7447738647460938, 19.27233123779297, 20.778053283691406, 53.69171142578125, -12.927024841308594, 22.518123626708984, -10.561809539794922, -3.0421295166015625, -24.852996826171875, 10.390274047851562, 33.64405822753906, 10.931018829345703, 41.147727966308594, -9.752885818481445, 49.01604461669922, 21.465438842773438, 19.303531646728516, -3.72943115234375, 38.3587646484375, 16.775238037109375, 14.243637084960938, 23.461524963378906, 28.676528930664062, 11.666236877441406, 9.604381561279297, 30.93695831298828, 24.449745178222656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000333.npy"}
{"epoch": 0.9794117647058823, "step": 334, "batch_size": 128, "mean": 18.326770782470703, "std": 15.6023530960083, "min": -13.410018920898438, "p10": -0.6880922317504882, "median": 17.321578979492188, "p90": 39.891740798950195, "max": 67.33211517333984, "pos_frac": 0.890625, "sample": [23.493202209472656, 1.6385269165039062, 21.195337295532227, 0.39864540100097656, 9.235076904296875, 31.72144317626953, 12.24163818359375, -6.66168212890625, 43.783416748046875, -2.6887283325195312, 50.89030456542969, 16.426618576049805, 15.394330978393555, 16.50296401977539, 32.762237548828125, 31.96636199951172, 29.064048767089844, 48.827110290527344, 26.001564025878906, -6.7921142578125, 6.67254638671875, 10.556449890136719, 41.95122528076172, 25.853668212890625, 20.250579833984375, 40.18644714355469, 7.709339141845703, 31.719993591308594, 7.696159362792969, 12.739608764648438, 27.70378875732422, 24.543960571289062, 34.48333740234375, 2.9685897827148438, 2.261383056640625, 13.158241271972656, 23.763410568237305, 19.85186004638672, 15.864263534545898, 24.570068359375, 18.438308715820312, 19.98528289794922, 24.686019897460938, -3.09393310546875, 36.39393615722656, 7.163749694824219, 19.05792236328125, 19.873306274414062, 4.6089019775390625, 28.37071990966797, 14.197845458984375, 5.535377502441406, 17.849990844726562, 35.82276916503906, 43.55542755126953, 45.027896881103516, 25.59990692138672, 15.751686096191406, 3.7019805908203125, -4.618316650390625, 12.935714721679688, 3.172901153564453, 22.585067749023438, 6.73931884765625, 19.12744903564453, 38.06578063964844, 41.680267333984375, -6.020965576171875, -9.127328872680664, 15.16646957397461, 5.88641357421875, 11.185539245605469, -10.851325988769531, 0.6965179443359375, 7.500099182128906, 15.766096115112305, 9.581886291503906, 29.181060791015625, 4.7265167236328125, 18.622726440429688, 21.535064697265625, 13.481246948242188, 16.017608642578125, 15.640043258666992, -4.962852478027344, 41.617713928222656, 9.698265075683594, -0.6466503143310547, 17.1849365234375, 5.507083892822266, 26.76244354248047, 13.243961334228516, 24.865386962890625, 31.37023162841797, 0.8610305786132812, 33.703857421875, 38.06703186035156, 18.92261505126953, 2.7296485900878906, -13.410018920898438, 8.627647399902344, 13.103157043457031, 19.326324462890625, 39.765438079833984, 22.94623565673828, 41.72639465332031, 4.5732421875, -0.7847900390625, 16.01495361328125, 17.458221435546875, 31.198043823242188, 19.32171630859375, 13.314453125, 28.900901794433594, 63.55241012573242, 9.919349670410156, 4.557708740234375, 42.191871643066406, 5.023712158203125, -13.373306274414062, 23.759872436523438, 18.62347412109375, 67.33211517333984, 24.91241455078125, -1.53118896484375, 38.15684509277344, 36.483612060546875, 34.102874755859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000334.npy"}
{"epoch": 0.9823529411764705, "step": 335, "batch_size": 128, "mean": 19.943267822265625, "std": 18.3511962890625, "min": -15.789581298828125, "p10": -1.2563968658447262, "median": 16.10102081298828, "p90": 42.254011154174805, "max": 96.62901306152344, "pos_frac": 0.890625, "sample": [22.419044494628906, 4.041540145874023, -5.909734725952148, 33.400596618652344, -6.700428009033203, 49.22056198120117, 11.381900787353516, 27.45551300048828, 30.810237884521484, 7.364536285400391, 6.109458923339844, 3.3647003173828125, 7.614532470703125, 27.135162353515625, 27.105865478515625, 10.917964935302734, 24.378456115722656, 61.950897216796875, 67.2326889038086, 3.9777050018310547, 1.1448974609375, 1.3008003234863281, 23.34473419189453, 25.203948974609375, 16.93163299560547, 13.125686645507812, 24.575393676757812, 41.179779052734375, 11.536056518554688, 11.304229736328125, 17.454803466796875, 28.667625427246094, 20.20188331604004, -2.225942611694336, 96.62901306152344, -1.4183845520019531, 14.010040283203125, 25.41573715209961, -6.409736633300781, 39.458343505859375, 31.429534912109375, 36.59944152832031, 32.64868927001953, 47.49859619140625, 19.17525863647461, 15.175674438476562, 28.323932647705078, 39.084266662597656, 4.418891906738281, -1.1869735717773438, 22.447471618652344, 13.196195602416992, -15.464523315429688, 42.5094108581543, 4.467781066894531, 18.31949234008789, -4.067790985107422, 25.55799102783203, 34.18684387207031, 34.563072204589844, -2.0667724609375, 9.721076965332031, 6.981174468994141, 35.77320861816406, 8.656818389892578, 29.688098907470703, 45.9089469909668, 10.612625122070312, 15.887863159179688, 6.759632110595703, -15.789581298828125, 8.821998596191406, 37.00862121582031, 16.221778869628906, 23.130496978759766, 1.36163330078125, 14.179840087890625, 1.6468505859375, 24.972145080566406, 14.71783447265625, 9.835723876953125, 13.113967895507812, 14.377029418945312, 65.76321411132812, 35.10768127441406, -5.825874328613281, 31.74493408203125, 43.39893341064453, -7.956382751464844, 2.8801918029785156, 8.52239990234375, 58.04370880126953, 12.829742431640625, 26.857635498046875, 9.370372772216797, 2.819581985473633, 42.144554138183594, 37.451988220214844, 35.953826904296875, 0.87420654296875, -5.016775131225586, 20.72442626953125, 29.541481018066406, 14.060659408569336, 14.150161743164062, 43.044776916503906, 13.528972625732422, 4.394859313964844, 30.0582275390625, 9.736709594726562, 23.667556762695312, 44.346500396728516, -4.0851898193359375, 40.74652099609375, 0.207672119140625, 6.299072265625, 61.19618225097656, 29.963851928710938, 18.292373657226562, 30.052658081054688, 29.369712829589844, 13.825698852539062, 15.980262756347656, 41.97187423706055, 9.484481811523438, 10.439056396484375, 20.94701385498047, 10.722869873046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000335.npy"}
{"epoch": 0.9852941176470589, "step": 336, "batch_size": 128, "mean": 16.757282257080078, "std": 18.84595489501953, "min": -21.196258544921875, "p10": -2.610861587524413, "median": 12.515907287597656, "p90": 39.6929344177246, "max": 74.74935913085938, "pos_frac": 0.8671875, "sample": [6.959983825683594, 17.323631286621094, 10.284997940063477, 31.708026885986328, -13.34915542602539, 54.17828369140625, 11.954635620117188, 6.9884490966796875, -0.042919158935546875, 18.983924865722656, 41.778594970703125, 34.61090087890625, -14.234016418457031, 38.12106704711914, -12.97052001953125, 53.50286865234375, 11.778261184692383, -3.466259002685547, 25.279521942138672, 5.81492805480957, 21.69951629638672, 7.364982604980469, 32.790985107421875, 21.4732666015625, -6.6322021484375, 7.5050506591796875, 32.73644256591797, 15.671165466308594, -0.7364692687988281, 18.73542022705078, 11.677154541015625, 11.935352325439453, 16.585372924804688, 38.79907989501953, 28.747406005859375, 17.882301330566406, 14.763046264648438, 11.845649719238281, 10.462835311889648, 20.935134887695312, 44.72474670410156, 9.463193893432617, 35.81556701660156, 15.046825408935547, 16.255558013916016, 37.033447265625, 4.04377555847168, 7.151771545410156, 24.631134033203125, -16.64102554321289, 18.188438415527344, 2.2996292114257812, 2.043212890625, 28.5509033203125, 9.806583404541016, 71.59112548828125, -6.633567810058594, 0.18289947509765625, 9.196371078491211, 31.378433227539062, -2.2442626953125, 70.68876647949219, 33.82374572753906, 16.94335174560547, 20.950927734375, 2.338766098022461, 4.605014801025391, -4.859046936035156, 7.0334930419921875, -4.521701812744141, 9.805076599121094, 30.191680908203125, 16.818161010742188, 8.243408203125, 13.372871398925781, 68.28697204589844, 8.61578369140625, 22.04621124267578, 0.5216007232666016, 2.1515045166015625, 74.74935913085938, 18.457229614257812, 45.36561584472656, 13.077178955078125, 11.496452331542969, 9.799911499023438, 8.648918151855469, 38.19438171386719, 7.686088562011719, 13.351068496704102, 1.5150985717773438, 9.579635620117188, 1.733367919921875, 20.772369384765625, 4.633464813232422, -1.06072998046875, 72.57831573486328, 4.994529724121094, -7.248512268066406, 0.05147552490234375, -21.196258544921875, 17.175689697265625, 7.617176055908203, 3.9047164916992188, 6.933769226074219, -4.09228515625, 53.59944152832031, 2.3478260040283203, 29.85394859313965, 34.13816833496094, -7.914958953857422, 26.272811889648438, 18.707435607910156, 3.0603599548339844, 3.7073497772216797, 17.0213623046875, 29.817707061767578, 21.65963363647461, 42.894989013671875, 18.06890869140625, 0.7596359252929688, 20.346309661865234, 32.179237365722656, 3.4132614135742188, 23.2764892578125, 35.408782958984375, 48.80902862548828, 0.398162841796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000336.npy"}
{"epoch": 0.9882352941176471, "step": 337, "batch_size": 128, "mean": 17.653575897216797, "std": 16.464893341064453, "min": -18.13654899597168, "p10": -1.0054925918579096, "median": 13.257560729980469, "p90": 39.1697509765625, "max": 65.42552185058594, "pos_frac": 0.8828125, "sample": [5.6114501953125, -7.832723617553711, 7.267978668212891, 12.285659790039062, -4.8338470458984375, 58.35920715332031, 19.00225067138672, 12.610275268554688, -0.8300037384033203, 62.04991149902344, 38.2940673828125, 3.8062496185302734, -10.384681701660156, 28.963211059570312, 23.207277297973633, -1.8633842468261719, 32.045921325683594, 14.409004211425781, 6.218997955322266, 29.127334594726562, 4.544460296630859, 11.919685363769531, 33.49107360839844, 21.132465362548828, 2.1854705810546875, 44.84474182128906, 24.54912757873535, 8.698305130004883, 38.933494567871094, 26.98784637451172, -6.9216766357421875, 13.80727767944336, 62.767051696777344, 27.35944366455078, -0.7030563354492188, 28.912643432617188, 5.14447021484375, 18.513595581054688, 10.907970428466797, 19.66766357421875, 26.176254272460938, -1.4149665832519531, 6.412322998046875, 30.589492797851562, -1.6565208435058594, 2.9434890747070312, 31.888870239257812, 21.232954025268555, 9.68667984008789, 4.0699920654296875, 26.412757873535156, 35.35368347167969, 33.010650634765625, 4.559654235839844, 19.134328842163086, 3.1058197021484375, 6.788883209228516, 28.8707275390625, 11.367713928222656, 12.771087646484375, 6.085136413574219, 17.45816421508789, 33.16632080078125, 10.217689514160156, 21.42809295654297, 0.240234375, 1.2336311340332031, 22.261871337890625, 6.475776672363281, 41.20610046386719, 8.678691864013672, 32.663291931152344, 4.505003929138184, 44.49085235595703, 11.400733947753906, 46.079437255859375, 12.862110137939453, 13.653011322021484, -4.277189254760742, 26.227401733398438, 0.5326461791992188, 9.133672714233398, 33.93053436279297, 45.866485595703125, -2.4050216674804688, 6.558387756347656, 16.35448455810547, 39.72101593017578, -3.2370071411132812, 23.266677856445312, 26.49396514892578, -2.249715805053711, 46.17110061645508, -3.9269866943359375, 31.304786682128906, 4.377357482910156, -18.13654899597168, 23.69118881225586, 0.7174453735351562, 47.49504089355469, 2.0613346099853516, 36.44673156738281, 6.352813720703125, 6.161495208740234, 28.595748901367188, 4.099739074707031, 29.839614868164062, 11.476669311523438, 15.212623596191406, 7.015838623046875, 34.99237060546875, 65.42552185058594, 3.7562332153320312, 7.342338562011719, 23.75122833251953, 7.788799285888672, 12.622989654541016, 22.854084014892578, 35.78216552734375, 44.32705307006836, 12.854454040527344, 22.162399291992188, 9.6788330078125, 29.62830352783203, 26.643489837646484, 0.8313140869140625, 25.803592681884766, 8.874191284179688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000337.npy"}
{"epoch": 0.9911764705882353, "step": 338, "batch_size": 128, "mean": 19.39845085144043, "std": 15.964556694030762, "min": -21.347328186035156, "p10": 0.48985900878906347, "median": 17.9994535446167, "p90": 41.247721862792964, "max": 57.20220184326172, "pos_frac": 0.8984375, "sample": [4.319389343261719, 25.917449951171875, 15.000152587890625, 14.849493026733398, 9.763011932373047, 13.842098236083984, 43.296058654785156, -1.4774093627929688, 1.0193939208984375, 24.371734619140625, 27.822959899902344, -15.269554138183594, 53.27275848388672, -2.1050453186035156, 55.62501525878906, 39.17927932739258, 18.305213928222656, 22.260238647460938, 1.3391265869140625, 33.807861328125, 27.952049255371094, -3.5312652587890625, 40.630775451660156, 42.680511474609375, 29.48967933654785, 21.93204116821289, 15.727924346923828, 44.103790283203125, 28.046112060546875, 7.7360992431640625, 18.460250854492188, 13.28581428527832, 8.808391571044922, 38.26006317138672, 27.180160522460938, -5.657989501953125, 36.125267028808594, 21.12554931640625, 16.524974822998047, 2.0814590454101562, 16.40453338623047, 10.885889053344727, 9.809598922729492, 33.7025260925293, 22.732969284057617, -6.8304595947265625, 19.168594360351562, 48.60987854003906, 22.37712860107422, 13.127838134765625, 10.29977798461914, 29.57070541381836, 1.6202392578125, 6.383472442626953, -7.686279296875, 37.77485656738281, 18.159414291381836, 13.280693054199219, 30.096534729003906, 40.85901641845703, 25.182817459106445, 26.181556701660156, 26.966552734375, 42.378665924072266, 45.324317932128906, 42.154701232910156, 31.523839950561523, 14.902175903320312, 16.24640655517578, 7.286500930786133, 34.9013671875, 5.129180908203125, 29.941932678222656, 4.73797607421875, -1.5465278625488281, -0.1500701904296875, -5.723489761352539, 6.7628631591796875, 24.898696899414062, 15.867332458496094, 22.06845474243164, 6.481941223144531, 25.272109985351562, 10.026512145996094, 37.66184997558594, 44.01527404785156, 10.693408966064453, 36.70246887207031, 7.7244720458984375, 14.523216247558594, 54.287086486816406, 36.641387939453125, 21.6865234375, 17.839492797851562, 13.430135726928711, 0.7641143798828125, -2.0160350799560547, 11.619026184082031, -21.347328186035156, 21.569129943847656, 13.2484130859375, 8.374755859375, 35.31095886230469, 22.680740356445312, 20.777503967285156, 11.774826049804688, 15.370231628417969, 4.994659423828125, 37.260581970214844, 32.82106018066406, 18.412261962890625, 1.2077407836914062, 5.0707550048828125, 3.585906982421875, 52.38525390625, 15.677356719970703, 28.00487518310547, 7.258392333984375, -9.450775146484375, 13.284416198730469, 57.20220184326172, 10.283851623535156, 31.08270263671875, 2.0569381713867188, 21.83675765991211, 38.720130920410156, 12.463775634765625, 22.24761199951172], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000338.npy"}
{"epoch": 0.9941176470588236, "step": 339, "batch_size": 128, "mean": 18.662097930908203, "std": 17.174728393554688, "min": -12.287612915039062, "p10": -1.6795213699340814, "median": 15.215606689453125, "p90": 42.50077819824219, "max": 61.83720397949219, "pos_frac": 0.875, "sample": [3.1477813720703125, 23.342422485351562, 13.12539291381836, 19.012176513671875, 16.05101776123047, 33.85221862792969, -6.612209320068359, 7.9532012939453125, 15.985958099365234, 4.5921478271484375, 23.55577850341797, 34.780792236328125, 31.97898292541504, 37.709476470947266, 25.395645141601562, 9.825096130371094, -1.522054672241211, 16.853059768676758, 24.478164672851562, 12.599807739257812, 50.01033020019531, 20.854642868041992, 7.160106658935547, 31.420883178710938, 11.493011474609375, 4.8486480712890625, 35.895904541015625, -3.3155593872070312, 0.26556396484375, 22.002506256103516, 31.745433807373047, 1.7897834777832031, 29.844589233398438, 3.580951690673828, 12.620635986328125, 28.834083557128906, -12.287612915039062, 38.86532211303711, 0.2815399169921875, 26.73540496826172, 11.66839599609375, 13.644271850585938, 59.814247131347656, 9.723024368286133, 15.090011596679688, 38.05480194091797, 10.781620025634766, 19.097015380859375, 16.66626739501953, -6.845516204833984, 22.97747039794922, 55.32408905029297, -0.24968719482421875, 24.812896728515625, 13.066963195800781, 14.254945755004883, 1.0541839599609375, 42.48468780517578, 35.8244514465332, 42.53832244873047, 23.26892852783203, -4.998039245605469, 37.6276741027832, 5.2060546875, 9.028938293457031, 61.83720397949219, 31.857337951660156, 35.41578674316406, 9.865715026855469, 9.635799407958984, 32.74597930908203, -6.2037353515625, 38.0025634765625, -3.56591796875, 2.52044677734375, 46.49847412109375, 24.340171813964844, 6.634468078613281, -5.462635040283203, 25.777297973632812, -2.478271484375, 50.980224609375, 4.540060043334961, 21.714197158813477, 10.662300109863281, 36.685455322265625, 7.446262359619141, 18.90412139892578, 2.504302978515625, 23.42255401611328, 14.126701354980469, -2.9528846740722656, 8.667741775512695, -2.0469436645507812, 4.774810791015625, 0.4975547790527344, 5.633588790893555, 11.306343078613281, 12.316482543945312, 43.70035171508789, 16.056617736816406, 58.900516510009766, 12.424720764160156, 32.98564147949219, 6.373138427734375, 53.34882354736328, 3.475494384765625, 3.8166427612304688, -0.9815654754638672, 37.741661071777344, 15.341201782226562, 4.395469665527344, -3.967243194580078, 12.592243194580078, 22.631256103515625, 56.11582946777344, 18.394550323486328, 2.535909652709961, 33.17718505859375, 51.386207580566406, 3.456541061401367, 52.8695068359375, 16.761085510253906, 27.91938018798828, 26.976055145263672, -5.128013610839844, 40.15355682373047, 10.025264739990234], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000339.npy"}
{"epoch": 0.9970588235294118, "step": 340, "batch_size": 128, "mean": 18.328603744506836, "std": 17.864362716674805, "min": -36.68048095703125, "p10": 0.049100494384768045, "median": 14.363204956054688, "p90": 41.6159309387207, "max": 89.79541015625, "pos_frac": 0.8984375, "sample": [33.78314208984375, 18.96003532409668, 5.3181304931640625, 49.27869415283203, 27.077957153320312, 27.155929565429688, 5.672016143798828, 46.56437683105469, 0.7256317138671875, 4.210273742675781, 18.24518585205078, 27.660675048828125, 25.619049072265625, 44.86836624145508, 8.555099487304688, 16.80813217163086, -12.5816650390625, 43.663047790527344, 12.205123901367188, 10.748992919921875, 8.807418823242188, 25.667682647705078, 1.0138130187988281, 2.5829620361328125, 12.253986358642578, 9.913921356201172, 39.77217102050781, 6.9266357421875, 3.543018341064453, 9.551799774169922, 2.9746322631835938, 7.825828552246094, 41.60155487060547, 27.36810302734375, 9.758598327636719, 54.667213439941406, 42.340492248535156, 10.21980094909668, 49.73759460449219, 15.569347381591797, 7.5304107666015625, -36.68048095703125, 10.486618041992188, 5.781333923339844, 14.778284072875977, 9.482803344726562, 21.399417877197266, 21.275466918945312, -5.989471435546875, -6.1027984619140625, 14.483078002929688, 13.598747253417969, 35.525108337402344, 34.15284729003906, -3.8701400756835938, 1.4287796020507812, 30.878053665161133, 3.8669052124023438, 20.553241729736328, 69.56594848632812, 8.673625946044922, 31.637603759765625, 27.318260192871094, 9.765569686889648, 31.641387939453125, -15.682559967041016, -3.851970672607422, 22.298965454101562, 11.506919860839844, 37.44095993041992, 37.24840545654297, 12.021209716796875, 6.034339904785156, 4.779449462890625, -3.2736053466796875, 1.5705337524414062, 29.80170440673828, 22.89825439453125, 6.315164566040039, 17.513324737548828, 18.66836929321289, 17.47995376586914, 22.154518127441406, 26.407241821289062, 12.635665893554688, 25.5821533203125, 13.531730651855469, -13.576019287109375, 5.643688201904297, 51.088890075683594, 7.355865478515625, 89.79541015625, 12.612091064453125, 17.573318481445312, 61.25054931640625, 29.778289794921875, 22.441650390625, 13.54827880859375, -5.855926513671875, 12.743087768554688, -4.732660293579102, 24.986648559570312, 33.923709869384766, 41.64947509765625, 11.427898406982422, 34.789337158203125, 13.88790512084961, 6.640190124511719, 30.76226806640625, 35.548858642578125, 6.262144088745117, 43.960227966308594, 32.416717529296875, 39.570648193359375, 15.504188537597656, 3.6173782348632812, 10.483299255371094, -1.5294723510742188, 11.585418701171875, -3.6306800842285156, 26.0045166015625, 39.66150665283203, 8.022235870361328, 19.098995208740234, 8.92791748046875, 24.768211364746094, 22.91583251953125, 14.243331909179688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs/step_0000340.npy"}