Files
ModelHub XC 3958945219 初始化项目,由ModelHub XC社区提供模型
Model: W-61/llama3-hh-helpful-qt045-b0p5-20260429-085449
Source: Original Platform
2026-05-22 21:14:21 +08:00

682 lines
1.1 MiB

{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.02287048101425171, "std": 0.42023447155952454, "min": -1.4034271240234375, "p10": -0.46674575805664065, "median": 0.04234886169433594, "p90": 0.4323463439941407, "max": 0.89263916015625, "pos_frac": 0.53125, "sample": [-0.06523895263671875, 0.436798095703125, 0.27811431884765625, -0.9194221496582031, 0.018890380859375, 0.20587158203125, 0.18878173828125, -0.3968696594238281, 0.26206207275390625, 0.2470550537109375, -0.040912628173828125, 0.4394989013671875, -0.44133758544921875, -0.39148712158203125, 0.2764854431152344, 0.89263916015625, -0.42584991455078125, -0.46125030517578125, -0.8638992309570312, -0.3508758544921875, 0.371368408203125, 0.887847900390625, -0.382904052734375, 0.36145782470703125, -0.4890003204345703, 0.052455902099609375, -0.036136627197265625, 0.23079299926757812, 0.2469482421875, 0.1643218994140625, -0.07129669189453125, 0.2790794372558594, 0.3637123107910156, -0.8916168212890625, 0.03298759460449219, -0.2790107727050781, -0.17860984802246094, 0.23892593383789062, 0.05171012878417969, -0.2564239501953125, -0.14655303955078125, 0.27777862548828125, 0.0810394287109375, -1.4034271240234375, -0.28739166259765625, -0.1489429473876953, 0.44918060302734375, 0.1693286895751953, 0.10933303833007812, -0.14766693115234375, -0.40944671630859375, -0.18532562255859375, 0.6261310577392578, -0.20856857299804688, 0.602569580078125, 0.05538177490234375, 0.1505279541015625, 0.1313800811767578, -0.006317138671875, 0.42195892333984375, -0.29936981201171875, -0.4691009521484375, 0.16705322265625, -0.5789260864257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000001.npy"}
{"epoch": 0.0014684287812041115, "step": 2, "batch_size": 64, "mean": -0.06572240591049194, "std": 0.3523969054222107, "min": -0.9291305541992188, "p10": -0.46334152221679686, "median": -0.05502510070800781, "p90": 0.3672500610351563, "max": 1.0444793701171875, "pos_frac": 0.4375, "sample": [-0.2829437255859375, 0.3027191162109375, -0.19867706298828125, -0.3062286376953125, 0.10318756103515625, 0.20131683349609375, -0.34906005859375, 0.2802886962890625, 0.1914520263671875, -0.31072998046875, 0.08922195434570312, 0.10284614562988281, -0.03655242919921875, -0.0604095458984375, -0.06208038330078125, 0.32562255859375, -0.37982177734375, 0.2746162414550781, -0.049640655517578125, 0.3752174377441406, -0.103973388671875, 0.0699462890625, 0.36417388916015625, -0.033428192138671875, 0.37265777587890625, -0.3787078857421875, -0.6610565185546875, 0.4720420837402344, 0.47701263427734375, -0.27928924560546875, -0.44719696044921875, -0.0965118408203125, -0.7628555297851562, 0.046764373779296875, 0.06670379638671875, -0.9291305541992188, -0.7122802734375, -0.16554832458496094, 0.1485595703125, -0.07539939880371094, 0.2588920593261719, 0.039890289306640625, 0.201690673828125, 0.0623016357421875, 1.0444793701171875, -0.37696075439453125, -0.02794647216796875, -0.223297119140625, -0.35730743408203125, -0.1309051513671875, -0.3106689453125, -0.11409187316894531, -0.1669769287109375, 0.131317138671875, -0.2361297607421875, 0.4093780517578125, -0.6485977172851562, 0.36856842041015625, -0.1951904296875, -0.4702606201171875, -0.7624168395996094, 0.008928298950195312, -0.31630706787109375, 0.022550582885742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000002.npy"}
{"epoch": 0.002936857562408223, "step": 3, "batch_size": 64, "mean": 0.06905469298362732, "std": 0.40016984939575195, "min": -0.804901123046875, "p10": -0.35855560302734374, "median": 0.051817893981933594, "p90": 0.5267044067382813, "max": 1.18548583984375, "pos_frac": 0.5625, "sample": [-0.21624755859375, -0.10370254516601562, 0.391082763671875, -0.05510711669921875, -0.3452949523925781, 0.17584228515625, 0.257598876953125, 0.22792816162109375, 0.1298370361328125, 0.09908294677734375, 0.3015594482421875, 0.16221237182617188, 0.5388946533203125, -0.3406982421875, -0.06643486022949219, 0.5764846801757812, -0.1379241943359375, -0.2915077209472656, 0.29923248291015625, -0.1660175323486328, -0.5831222534179688, 0.331298828125, -0.804901123046875, -0.170440673828125, -0.02797698974609375, -0.03792381286621094, 0.02301025390625, 0.06266212463378906, 0.10911941528320312, -0.4740791320800781, 0.972381591796875, 1.0604934692382812, 0.019342422485351562, 1.18548583984375, 0.1018524169921875, -0.2600250244140625, -0.26861572265625, -0.2392101287841797, 0.2829437255859375, 0.29195594787597656, -0.45505523681640625, 0.064788818359375, 0.1895751953125, 0.14105224609375, -0.07660675048828125, -0.1813812255859375, 0.30052947998046875, 0.7047462463378906, -0.3642387390136719, -0.11333465576171875, 0.26354026794433594, -0.1661357879638672, 0.1231689453125, 0.3535308837890625, 0.0026111602783203125, 1.1845283508300781, 0.2089996337890625, -0.28641510009765625, 0.2151031494140625, 0.498260498046875, -0.1604595184326172, -0.674163818359375, -0.40518951416015625, 0.040973663330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000003.npy"}
{"epoch": 0.004405286343612335, "step": 4, "batch_size": 64, "mean": -0.02125033736228943, "std": 0.3466644883155823, "min": -0.6687850952148438, "p10": -0.47548522949218747, "median": -0.07352828979492188, "p90": 0.4227813720703127, "max": 1.2737350463867188, "pos_frac": 0.421875, "sample": [0.15477752685546875, -0.5248489379882812, -0.1560821533203125, 0.10869598388671875, -0.10616302490234375, 0.44034576416015625, 0.57501220703125, 0.009521484375, -0.4143562316894531, -0.0028533935546875, -0.3737373352050781, 0.3240699768066406, -0.19890594482421875, -0.11342620849609375, -0.2062225341796875, -0.10839462280273438, 0.126861572265625, -0.2784576416015625, -0.21390151977539062, -0.08125877380371094, -0.4974212646484375, -0.5638961791992188, -0.2659454345703125, -0.383026123046875, -0.096343994140625, -0.0595855712890625, 0.2916107177734375, 0.21562957763671875, -0.21507644653320312, -0.611724853515625, 0.014041900634765625, -0.023233413696289062, -0.167205810546875, 0.04286956787109375, -0.5092754364013672, 0.18294334411621094, -0.6687850952148438, -0.071929931640625, -0.07512664794921875, 0.1259002685546875, -0.14809417724609375, -0.19214820861816406, 0.38179779052734375, 0.59210205078125, 0.15331268310546875, 0.10776901245117188, 0.329803466796875, 0.5581207275390625, -0.12898826599121094, -0.49985504150390625, -0.12280654907226562, 0.24433517456054688, 0.2951202392578125, -0.4243011474609375, 0.18082618713378906, 0.46535301208496094, 1.2737350463867188, -0.16167449951171875, -0.05783843994140625, 0.096649169921875, 0.4747161865234375, -0.22119712829589844, -0.41827392578125, 0.236419677734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000004.npy"}
{"epoch": 0.005873715124816446, "step": 5, "batch_size": 64, "mean": 0.03655460476875305, "std": 0.36775702238082886, "min": -1.0894775390625, "p10": -0.42081298828124997, "median": 0.04675102233886719, "p90": 0.49896697998046885, "max": 0.9008712768554688, "pos_frac": 0.546875, "sample": [0.0397491455078125, 0.1982421875, 0.132476806640625, -0.13463592529296875, 0.3111724853515625, 0.395782470703125, 0.7168006896972656, -0.2298736572265625, -0.06490516662597656, 0.3406524658203125, 0.14237213134765625, 0.541900634765625, 0.07331275939941406, 0.09097671508789062, -0.06625747680664062, -0.0108489990234375, 0.06243133544921875, 0.04116249084472656, -0.062099456787109375, -0.1951141357421875, 0.04395294189453125, 0.5097198486328125, 0.16313552856445312, -0.14658355712890625, 0.157073974609375, -0.29199981689453125, 0.34751129150390625, -0.370697021484375, -1.0894775390625, 0.550872802734375, -0.2722434997558594, 0.049549102783203125, 0.7149658203125, -0.11156272888183594, -0.18854904174804688, 0.20159912109375, 0.304229736328125, 0.46976470947265625, 0.3984260559082031, 0.524871826171875, 0.2529945373535156, 0.148681640625, -0.00342559814453125, -0.4779205322265625, 0.9008712768554688, -0.8629150390625, -0.01839447021484375, -0.5131072998046875, -0.38372802734375, -0.43670654296875, 0.24180030822753906, -0.04369354248046875, -0.16539764404296875, -0.2628440856933594, -0.09454345703125, 0.16192626953125, 0.473876953125, -0.29352569580078125, -0.43929290771484375, 0.19496917724609375, -0.6793594360351562, 0.23276519775390625, -0.0495758056640625, 0.168182373046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000005.npy"}
{"epoch": 0.007342143906020558, "step": 6, "batch_size": 64, "mean": -0.04002311825752258, "std": 0.4169080853462219, "min": -1.043914794921875, "p10": -0.5135873794555664, "median": -0.0203094482421875, "p90": 0.5216491699218752, "max": 0.8243865966796875, "pos_frac": 0.46875, "sample": [-0.9399490356445312, -0.08995246887207031, -0.06512641906738281, 0.0873565673828125, -0.31976318359375, 0.674652099609375, -0.4846916198730469, -0.4577369689941406, -0.4047698974609375, -1.043914794921875, 0.2895622253417969, 0.8243865966796875, -0.9969482421875, 0.3756847381591797, -0.3685417175292969, -0.1401214599609375, -0.00951385498046875, -0.12459754943847656, 0.013851165771484375, 0.184173583984375, -0.7579193115234375, -0.49339866638183594, 0.237030029296875, 0.003902435302734375, -0.2505836486816406, 0.08646392822265625, 0.2991828918457031, 0.8018569946289062, -0.03110504150390625, -0.2967720031738281, -0.20084381103515625, 0.33360862731933594, 0.6303482055664062, -0.12396240234375, -0.04131317138671875, 0.3746795654296875, -0.763885498046875, -0.00054168701171875, -0.21443939208984375, -0.37354278564453125, -0.200042724609375, 0.0389862060546875, -0.5222396850585938, 0.11528968811035156, 0.4557628631591797, 0.04302787780761719, 0.2043914794921875, 0.11734580993652344, 0.5416259765625, -0.41457366943359375, 0.00936126708984375, -0.12725067138671875, 0.18966293334960938, 0.10530662536621094, 0.5838775634765625, 0.1183013916015625, -0.10812759399414062, -0.6804656982421875, 0.47503662109375, -0.27386474609375, -0.1077880859375, -0.10727691650390625, 0.662139892578125, 0.09722900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000006.npy"}
{"epoch": 0.00881057268722467, "step": 7, "batch_size": 64, "mean": 0.03685298562049866, "std": 0.39603114128112793, "min": -1.09197998046875, "p10": -0.3415199279785156, "median": 0.04515838623046875, "p90": 0.43000030517578125, "max": 1.148834228515625, "pos_frac": 0.578125, "sample": [0.17153549194335938, -0.7578048706054688, -0.15599632263183594, -0.22876739501953125, -1.0454483032226562, 0.13387298583984375, -0.45716094970703125, -0.01141357421875, -0.35170745849609375, 0.8918838500976562, -1.09197998046875, 0.3156280517578125, -0.215667724609375, 0.27007293701171875, 0.0590667724609375, 0.1145172119140625, 0.030172348022460938, -0.23369216918945312, -0.03732109069824219, 0.019306182861328125, 0.13045501708984375, 0.757476806640625, 0.43082427978515625, 0.27620697021484375, 0.1888427734375, 0.034912109375, -0.1998271942138672, 0.08728790283203125, -0.20958328247070312, -0.1233673095703125, 0.13110733032226562, -0.07454681396484375, 0.22499465942382812, -0.210205078125, 0.08056640625, 0.42807769775390625, -0.012958526611328125, 0.0554046630859375, -0.158355712890625, -0.2365875244140625, 0.407806396484375, -0.5453033447265625, 0.3098602294921875, -0.03264617919921875, 0.015655517578125, 0.022918701171875, 0.4537506103515625, -0.3177490234375, 0.2466583251953125, 0.06764984130859375, 1.148834228515625, -0.6785507202148438, -0.0209503173828125, 0.34632110595703125, -0.06939697265625, 0.535247802734375, -0.25147247314453125, 0.11383056640625, 0.9949932098388672, 0.22313308715820312, 0.2706298828125, 0.0997772216796875, -0.06049346923828125, 0.05826568603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000007.npy"}
{"epoch": 0.010279001468428781, "step": 8, "batch_size": 64, "mean": -0.05733811855316162, "std": 0.3448963165283203, "min": -1.4411468505859375, "p10": -0.48493270874023436, "median": -0.011095046997070312, "p90": 0.3349519729614259, "max": 0.62738037109375, "pos_frac": 0.5, "sample": [0.3493194580078125, 0.3563079833984375, -0.4286041259765625, -0.10817718505859375, -0.48711395263671875, -0.051074981689453125, 0.07484626770019531, -0.06769561767578125, -0.04156494140625, -0.023731231689453125, -0.10165023803710938, 0.16172027587890625, -0.20542144775390625, 0.12717247009277344, 0.38275146484375, -0.36318206787109375, 0.14105606079101562, -0.1526947021484375, 0.166900634765625, -0.7697677612304688, 0.39801788330078125, -0.599365234375, 0.2248687744140625, 0.047149658203125, -0.024181365966796875, -0.55621337890625, -0.35065460205078125, 0.2282428741455078, 0.067230224609375, -0.4704551696777344, -0.36162567138671875, -0.3374481201171875, -0.023956298828125, 0.62738037109375, -0.03778839111328125, 0.108551025390625, 0.07010650634765625, 0.11507225036621094, 0.0478057861328125, -0.2046966552734375, 0.16254425048828125, -0.40816497802734375, 0.13079833984375, -0.619659423828125, 0.22029495239257812, -0.493988037109375, -1.4411468505859375, 0.2999725341796875, 0.09362220764160156, 0.24395751953125, 0.0015411376953125, -0.3343658447265625, -0.09130859375, -0.10546112060546875, 0.4631805419921875, 0.34806060791015625, -0.4798431396484375, 0.11028289794921875, 0.0623016357421875, -0.2927398681640625, -0.06087303161621094, 0.3043651580810547, 0.012285232543945312, 0.2772674560546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000008.npy"}
{"epoch": 0.011747430249632892, "step": 9, "batch_size": 64, "mean": 0.010491013526916504, "std": 0.4288538098335266, "min": -1.1603164672851562, "p10": -0.5315208435058593, "median": 0.0066967010498046875, "p90": 0.5592741012573242, "max": 1.184783935546875, "pos_frac": 0.5, "sample": [-0.002349853515625, 0.0631256103515625, 0.5856990814208984, 0.5285568237304688, 0.05889129638671875, -0.3079071044921875, -0.3601837158203125, 0.42855072021484375, 0.717315673828125, 0.4852943420410156, -0.5639266967773438, -0.15277099609375, -0.5597763061523438, -1.1603164672851562, -0.4655914306640625, -0.70703125, -0.1242523193359375, -0.004062652587890625, -0.1411590576171875, -0.3009529113769531, -1.0293121337890625, 0.018331527709960938, -0.038055419921875, 0.1282196044921875, -0.46068572998046875, 0.06626129150390625, 0.17092514038085938, 0.2177734375, 0.015743255615234375, 0.5599288940429688, 0.06674957275390625, 0.09561538696289062, 0.2195281982421875, 0.6021881103515625, -0.09443092346191406, -0.34069061279296875, -0.20111083984375, 0.146759033203125, 0.5581512451171875, 0.3494110107421875, -0.00347900390625, 0.0662994384765625, -0.116973876953125, -0.09171676635742188, -0.07898139953613281, -0.258819580078125, -0.133026123046875, 0.279693603515625, -0.23926544189453125, 0.03704261779785156, -0.1736602783203125, -0.16157913208007812, 0.24445343017578125, 0.5579509735107422, 0.5597553253173828, 0.2486114501953125, 0.4446563720703125, 0.6092376708984375, -0.29039955139160156, 1.184783935546875, -0.5751876831054688, 0.40840911865234375, -0.8969268798828125, -0.01790618896484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000009.npy"}
{"epoch": 0.013215859030837005, "step": 10, "batch_size": 64, "mean": -0.015163183212280273, "std": 0.3706546127796173, "min": -0.944732666015625, "p10": -0.4676921844482422, "median": -0.04345703125, "p90": 0.4301151275634766, "max": 0.955169677734375, "pos_frac": 0.453125, "sample": [-0.0544281005859375, -0.5283889770507812, 0.06861686706542969, 0.01398468017578125, -0.42329978942871094, 0.4275970458984375, -0.06527519226074219, -0.0302581787109375, 0.27394866943359375, -0.12749481201171875, 0.15576553344726562, 0.4225921630859375, -0.872283935546875, 0.271759033203125, -0.6533966064453125, 0.2159423828125, -0.30381011962890625, -0.17956161499023438, 0.023847579956054688, 0.35688018798828125, 0.274444580078125, -0.06953048706054688, 0.21259117126464844, 0.25125885009765625, -0.141021728515625, -0.22128677368164062, -0.28675079345703125, -0.0324859619140625, 0.4450874328613281, 0.20842361450195312, 0.601043701171875, -0.1962432861328125, -0.0034770965576171875, -0.10068511962890625, -0.4756927490234375, -0.5381202697753906, -0.944732666015625, 0.4311943054199219, 0.16281509399414062, -0.13874244689941406, -0.4810619354248047, -0.08788299560546875, -0.16117477416992188, 0.83538818359375, 0.01239013671875, 0.31293487548828125, 0.41623687744140625, -0.29511260986328125, -0.07083511352539062, 0.1755523681640625, -0.315032958984375, -0.16355514526367188, -0.23836517333984375, 0.004550933837890625, -0.44341278076171875, -0.4490242004394531, -0.1312255859375, 0.11463165283203125, 0.46099090576171875, 0.955169677734375, 0.5910263061523438, -0.43738555908203125, 0.12314224243164062, -0.12921524047851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000010.npy"}
{"epoch": 0.014684287812041116, "step": 11, "batch_size": 64, "mean": 0.024578243494033813, "std": 0.36094382405281067, "min": -1.34063720703125, "p10": -0.37055854797363275, "median": 0.00807952880859375, "p90": 0.42927398681640627, "max": 1.1585922241210938, "pos_frac": 0.515625, "sample": [-0.046848297119140625, 0.3947906494140625, 0.43877410888671875, -0.07598876953125, -0.2633056640625, -0.19207000732421875, 0.18349456787109375, 0.16388320922851562, -0.06622886657714844, 0.3453369140625, 0.21259117126464844, -0.0736541748046875, 0.13243484497070312, -0.02309417724609375, -0.3908843994140625, -0.26111602783203125, 0.2804107666015625, -0.3231315612792969, 0.35040283203125, 0.16260528564453125, 0.4237518310546875, 0.00084686279296875, -0.42234039306640625, 0.30315399169921875, -0.21710205078125, 0.431640625, -0.2574005126953125, -0.14827728271484375, -0.0072021484375, -0.21186447143554688, 0.0864715576171875, -0.0588836669921875, 0.2151031494140625, 0.4415283203125, 0.4045372009277344, 0.327789306640625, 0.077117919921875, 1.1585922241210938, 0.16107177734375, -0.03583717346191406, 0.08475112915039062, 0.2829856872558594, 0.01531219482421875, -0.0082855224609375, 0.12360382080078125, 0.2314910888671875, 0.0336456298828125, -0.14336013793945312, -1.34063720703125, 0.4790496826171875, -0.13177490234375, -0.6788177490234375, 0.5181732177734375, -0.4339790344238281, 0.4646339416503906, -0.5093116760253906, -0.14192962646484375, -0.5736961364746094, -0.31841278076171875, -0.2347259521484375, 0.21511077880859375, 0.39038848876953125, -0.14275360107421875, -0.22955322265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000011.npy"}
{"epoch": 0.016152716593245228, "step": 12, "batch_size": 64, "mean": 0.005901157855987549, "std": 0.4333747327327728, "min": -1.20166015625, "p10": -0.5622680664062499, "median": 0.029298782348632812, "p90": 0.5754165649414062, "max": 0.8854827880859375, "pos_frac": 0.53125, "sample": [0.492279052734375, 0.26385498046875, 0.1423187255859375, -1.20166015625, -0.12641143798828125, -0.3177642822265625, 0.07976531982421875, -0.14760208129882812, 0.6872711181640625, 0.7110366821289062, 0.08893585205078125, -0.244903564453125, -0.6677169799804688, 0.474365234375, -0.27910614013671875, 0.8854827880859375, -0.15653228759765625, 0.61834716796875, -0.02398681640625, 0.16069412231445312, -1.120697021484375, -0.33226776123046875, 0.70654296875, -0.0976715087890625, 0.022769927978515625, -0.26482391357421875, -0.288360595703125, 0.5640411376953125, -0.2644004821777344, -0.571624755859375, -0.540435791015625, 0.745849609375, 0.1737518310546875, -0.1004486083984375, -0.8997268676757812, 0.2420024871826172, -0.04316139221191406, -0.226898193359375, -0.3194580078125, -0.09997749328613281, -0.19348907470703125, 0.16186904907226562, -0.6494178771972656, 0.17780685424804688, -0.8262786865234375, 0.11347198486328125, 0.48638916015625, -0.08006668090820312, 0.3527851104736328, 0.45659637451171875, 0.14620208740234375, 0.13780784606933594, 0.03582763671875, 0.1669788360595703, 0.2756805419921875, -0.19403076171875, 0.580291748046875, 0.1027679443359375, -0.03559112548828125, 0.010402679443359375, 0.12530136108398438, -0.06374359130859375, 0.272613525390625, 0.0938262939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000012.npy"}
{"epoch": 0.01762114537444934, "step": 13, "batch_size": 64, "mean": 0.009646564722061157, "std": 0.4301586151123047, "min": -1.637359619140625, "p10": -0.3507537841796875, "median": 0.00809478759765625, "p90": 0.4263500213623048, "max": 1.6751632690429688, "pos_frac": 0.515625, "sample": [-0.2741241455078125, -0.052059173583984375, 0.11940765380859375, -0.44556427001953125, 0.28227996826171875, 0.2332763671875, 0.10430145263671875, 0.21355438232421875, 0.8230056762695312, -0.3305206298828125, 0.11138153076171875, 0.6058120727539062, 0.4803009033203125, -0.08669281005859375, 0.013095855712890625, 1.6751632690429688, -0.33232879638671875, -0.083282470703125, -0.16844558715820312, -0.22289276123046875, 0.018259048461914062, -0.03186798095703125, 0.10190582275390625, 0.08144760131835938, -0.10750198364257812, -0.133544921875, 0.09861373901367188, -0.08642387390136719, -0.3276214599609375, -0.32843017578125, -0.45703887939453125, 0.4378204345703125, 0.11523056030273438, 0.3995857238769531, -1.637359619140625, -0.8367767333984375, -0.20478057861328125, -0.20548057556152344, -0.08886528015136719, -0.297576904296875, 0.2574653625488281, 0.30125999450683594, 0.004119873046875, 0.5090866088867188, 0.0120697021484375, 0.08625030517578125, -0.17089462280273438, -0.08344650268554688, 0.2252044677734375, 0.0472259521484375, -0.5036849975585938, -0.05437469482421875, -0.241363525390625, 0.08892822265625, -0.35865020751953125, 0.16922760009765625, 0.30454254150390625, 0.0879058837890625, 1.00634765625, -0.524810791015625, -0.0253143310546875, 0.12322235107421875, 0.34747314453125, -0.16567230224609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000013.npy"}
{"epoch": 0.01908957415565345, "step": 14, "batch_size": 64, "mean": 0.0010128915309906006, "std": 0.4283331632614136, "min": -1.479461669921875, "p10": -0.4663551330566406, "median": -0.012849807739257812, "p90": 0.4951553344726564, "max": 1.1710586547851562, "pos_frac": 0.46875, "sample": [-0.47235870361328125, 0.28043365478515625, 0.06099700927734375, 0.024158477783203125, 0.668853759765625, 0.5144500732421875, -0.625152587890625, -0.2615966796875, 0.4221954345703125, 0.04989433288574219, -0.13574981689453125, -0.027278900146484375, 0.9397125244140625, 1.1710586547851562, -0.3539009094238281, -0.108642578125, -0.3332405090332031, -0.00858306884765625, -0.3509101867675781, 0.07254981994628906, 0.21436309814453125, 0.036319732666015625, -0.123931884765625, 0.45013427734375, -0.030029296875, -0.5933799743652344, -0.6619873046875, 0.2104644775390625, 0.067474365234375, 0.36624908447265625, -0.26128387451171875, -0.023212432861328125, -0.285675048828125, 0.9023399353027344, -0.1327037811279297, 0.25653839111328125, -0.017116546630859375, 0.721954345703125, 0.24257850646972656, 0.03504371643066406, -0.4678955078125, 0.17490386962890625, -0.7355499267578125, -0.02863311767578125, -0.12773513793945312, 0.08661651611328125, 0.4225883483886719, 0.12268829345703125, 0.060211181640625, -0.0175323486328125, -1.479461669921875, -0.23123931884765625, 0.05445098876953125, -0.2818717956542969, -0.21527862548828125, 0.6582717895507812, -0.005706787109375, 0.33038330078125, -0.46276092529296875, -0.42305755615234375, -0.4161567687988281, 0.2556037902832031, -0.03408050537109375, -0.07496261596679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000014.npy"}
{"epoch": 0.020558002936857563, "step": 15, "batch_size": 64, "mean": 0.029320329427719116, "std": 0.3721500337123871, "min": -1.3077239990234375, "p10": -0.3649717330932617, "median": 0.045134544372558594, "p90": 0.4791122436523438, "max": 1.141876220703125, "pos_frac": 0.578125, "sample": [-0.07243156433105469, 0.901947021484375, 0.024091720581054688, -0.029165267944335938, 0.22891998291015625, 0.14734268188476562, 0.001201629638671875, 0.19197845458984375, -0.2300567626953125, 0.25641632080078125, -0.13161468505859375, -0.09997749328613281, 0.31307220458984375, -0.6687393188476562, 0.4200096130371094, -0.0316619873046875, 0.030094146728515625, -0.10471343994140625, 0.549285888671875, 0.245391845703125, 0.0937042236328125, -1.3077239990234375, -0.3080902099609375, -0.09282684326171875, -0.46903228759765625, 0.1169281005859375, 0.11470794677734375, -0.32163238525390625, 0.16933441162109375, 0.0630645751953125, -0.168212890625, -0.434539794921875, 0.03924560546875, -0.0020046234130859375, 0.6167449951171875, 0.4800872802734375, -0.5722808837890625, 0.6367950439453125, 0.1488189697265625, -0.3723011016845703, -0.19353675842285156, 0.027833938598632812, 0.05102348327636719, 0.07137680053710938, 0.1047210693359375, -0.13193130493164062, 0.1528644561767578, -0.1832561492919922, 0.48014068603515625, 0.17539596557617188, 0.476837158203125, -0.1295928955078125, 0.05683135986328125, 0.11561965942382812, -0.28636932373046875, 0.25127410888671875, 0.18218994140625, -0.347869873046875, 0.083892822265625, -0.181854248046875, -0.22945404052734375, -0.56634521484375, 0.3826560974121094, 1.141876220703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000015.npy"}
{"epoch": 0.022026431718061675, "step": 16, "batch_size": 64, "mean": 0.06042572855949402, "std": 0.3054898679256439, "min": -0.633697509765625, "p10": -0.32482070922851564, "median": 0.022003173828125, "p90": 0.4334480285644533, "max": 0.7891464233398438, "pos_frac": 0.5625, "sample": [-0.026041030883789062, 0.3798828125, -0.17888641357421875, 0.02234649658203125, 0.14044952392578125, 0.22310638427734375, -0.01763916015625, -0.04574775695800781, 0.13165283203125, 0.5761489868164062, -0.486480712890625, 0.117095947265625, 0.7262191772460938, 0.5210952758789062, 0.016569137573242188, -0.157440185546875, -0.044078826904296875, -0.41613006591796875, -0.3293914794921875, 0.02165985107421875, -0.579986572265625, 0.21640968322753906, 0.18822860717773438, -0.33319091796875, 0.2689208984375, -0.20293617248535156, -0.633697509765625, 0.11115455627441406, -0.031154632568359375, -0.06329154968261719, -0.2885246276855469, 0.5588951110839844, 0.007022857666015625, -0.2329254150390625, -0.21125030517578125, 0.2392425537109375, 0.2328033447265625, 0.3532257080078125, -0.07256317138671875, -0.05965423583984375, 0.36260223388671875, -0.18358612060546875, 0.17421340942382812, 0.171875, 0.7891464233398438, 0.38692474365234375, -0.37490272521972656, -0.2892436981201172, 0.10639190673828125, -0.112548828125, 0.2019329071044922, -0.11588287353515625, 0.0155487060546875, 0.19721031188964844, -0.08812713623046875, 0.0269775390625, 0.23016738891601562, 0.3284912109375, 0.5762901306152344, 0.3927154541015625, -0.085662841796875, 0.45090484619140625, 0.37884521484375, -0.31415557861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000016.npy"}
{"epoch": 0.023494860499265784, "step": 17, "batch_size": 64, "mean": 0.10754308104515076, "std": 0.3486311137676239, "min": -0.734466552734375, "p10": -0.2747003555297851, "median": 0.11061859130859375, "p90": 0.5492942810058594, "max": 1.1995849609375, "pos_frac": 0.59375, "sample": [-0.5289115905761719, 0.09411048889160156, 0.33838653564453125, -0.08798980712890625, -0.352752685546875, -0.08141326904296875, -0.09307861328125, -0.734466552734375, 0.083953857421875, -0.154327392578125, 0.17893218994140625, 0.5125579833984375, 0.426422119140625, -0.1830596923828125, -0.7002544403076172, 0.0629119873046875, -0.14076995849609375, 0.13175582885742188, -0.00936126708984375, 0.7049026489257812, 0.26924896240234375, 0.18049240112304688, 0.21676254272460938, 0.103729248046875, -0.03455352783203125, 0.333099365234375, 0.17018890380859375, 0.195556640625, 0.4893798828125, 0.2562408447265625, 0.16187286376953125, 0.7891159057617188, 0.1175079345703125, 0.332733154296875, -0.03486061096191406, 0.22969818115234375, -0.062175750732421875, -0.1985034942626953, -0.2923717498779297, 0.25238037109375, 0.5595779418945312, 0.06863021850585938, 0.4790077209472656, 0.24691200256347656, -0.43544769287109375, -0.025604248046875, -0.19278335571289062, 0.6278438568115234, 0.2541961669921875, -0.1522064208984375, -0.089111328125, -0.049968719482421875, 0.1182098388671875, -0.13085174560546875, 0.29383087158203125, 0.525299072265625, 1.1995849609375, 0.6122627258300781, 0.61431884765625, -0.39846038818359375, -0.17499160766601562, 0.0375518798828125, 0.185333251953125, -0.23346710205078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000017.npy"}
{"epoch": 0.024963289280469897, "step": 18, "batch_size": 64, "mean": 0.0317995548248291, "std": 0.36544162034988403, "min": -0.9456634521484375, "p10": -0.3904548645019531, "median": 0.035816192626953125, "p90": 0.49973163604736337, "max": 0.872283935546875, "pos_frac": 0.53125, "sample": [0.15006256103515625, -0.9212875366210938, 0.08345794677734375, 0.01496124267578125, 0.2623252868652344, 0.03221893310546875, 0.13051414489746094, -0.648834228515625, -0.0827178955078125, 0.47240257263183594, 0.4317474365234375, -0.1723003387451172, 0.29035186767578125, 0.7525177001953125, -0.2287750244140625, -0.08542633056640625, 0.19806480407714844, 0.24301719665527344, -0.020061492919921875, -0.3933868408203125, -0.00543975830078125, -0.001445770263671875, 0.511444091796875, -0.081634521484375, 0.5651359558105469, 0.3323020935058594, 0.5580062866210938, 0.3588714599609375, -0.2858409881591797, 0.33957672119140625, 0.3498420715332031, 0.1511821746826172, 0.26827239990234375, -0.34039306640625, 0.52069091796875, -0.4714508056640625, -0.07143402099609375, 0.0394134521484375, -0.38361358642578125, -0.0751190185546875, 0.5347709655761719, 0.872283935546875, -0.890716552734375, 0.14020538330078125, 0.4238433837890625, 0.10968780517578125, 0.10106658935546875, -0.3210334777832031, -0.2531280517578125, 0.07212448120117188, 0.10580825805664062, -0.093994140625, -0.41595458984375, -0.05971527099609375, 0.11084747314453125, -0.155242919921875, 0.2698822021484375, -0.07549285888671875, -0.9456634521484375, 0.09850502014160156, -0.014814376831054688, -0.123504638671875, -0.12859344482421875, -0.11321830749511719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000018.npy"}
{"epoch": 0.02643171806167401, "step": 19, "batch_size": 64, "mean": 0.11113619804382324, "std": 0.3500584363937378, "min": -1.1013031005859375, "p10": -0.2136514663696289, "median": 0.06773662567138672, "p90": 0.5630462646484378, "max": 1.4053955078125, "pos_frac": 0.609375, "sample": [0.1874675750732422, -0.1949901580810547, -0.32861328125, -0.17777633666992188, 0.37270355224609375, -0.221649169921875, -0.1204071044921875, 0.15423202514648438, 0.04536247253417969, 0.6990737915039062, 0.589569091796875, -0.02425384521484375, 0.659027099609375, 0.40985870361328125, 1.4053955078125, -1.1013031005859375, 0.1618499755859375, 0.06277847290039062, -0.0384979248046875, 0.20379638671875, -0.4046630859375, 0.0354766845703125, -0.03929710388183594, 0.13861083984375, -0.038028717041015625, 0.063873291015625, 0.274688720703125, 0.1753692626953125, 0.11197662353515625, -0.2338714599609375, -0.182098388671875, 0.07159996032714844, 0.2809162139892578, 0.03300666809082031, -0.194732666015625, 0.11509895324707031, -0.22167205810546875, -0.284637451171875, 0.50115966796875, 0.712493896484375, 0.16522979736328125, 0.4188690185546875, -0.15973663330078125, -0.0269012451171875, -0.00225067138671875, 0.7743301391601562, 0.0973968505859375, 0.449127197265625, -0.1306915283203125, 0.2668647766113281, 0.31606292724609375, 0.20215225219726562, -0.008544921875, -0.026628494262695312, 0.19464111328125, 0.16596031188964844, -0.17488861083984375, 0.2631950378417969, 0.0322418212890625, 0.01702880859375, 0.7530670166015625, 0.14934158325195312, -0.17995452880859375, -0.10208892822265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000019.npy"}
{"epoch": 0.027900146842878122, "step": 20, "batch_size": 64, "mean": 0.07542020082473755, "std": 0.31793731451034546, "min": -0.8017425537109375, "p10": -0.23335704803466795, "median": 0.028299331665039062, "p90": 0.4594173431396486, "max": 0.9977035522460938, "pos_frac": 0.609375, "sample": [-0.013490676879882812, 0.17618942260742188, -0.042087554931640625, 0.4130859375, -0.165740966796875, 0.415771484375, 0.0328216552734375, 0.3463630676269531, 0.3111114501953125, 0.0747222900390625, -0.8017425537109375, 0.4811859130859375, 0.2582206726074219, 0.01532745361328125, 0.01204681396484375, -0.04012298583984375, -0.14083480834960938, 0.2744407653808594, -0.30699920654296875, 0.529083251953125, 0.2016448974609375, -0.7068023681640625, 0.13829803466796875, 0.03916168212890625, -0.083648681640625, -0.15311622619628906, -0.6802444458007812, -0.036113739013671875, 0.19498062133789062, 0.01218414306640625, -0.059047698974609375, -0.2969169616699219, -0.48925018310546875, 0.0029087066650390625, 0.022006988525390625, -0.1164093017578125, 0.219268798828125, -0.19548797607421875, -0.08725738525390625, -0.20418548583984375, -0.11554718017578125, 0.496673583984375, 0.33452606201171875, 0.34918975830078125, 0.4781227111816406, -0.048503875732421875, 0.1889801025390625, 0.2194061279296875, 0.9977035522460938, -0.07479476928710938, -0.0116729736328125, 0.1735992431640625, 0.16977882385253906, 0.39810943603515625, 0.742095947265625, -0.24585914611816406, 0.011322021484375, 0.2678070068359375, 0.09984016418457031, 0.06274032592773438, -0.09959793090820312, 0.6706771850585938, 0.18719482421875, 0.023777008056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000020.npy"}
{"epoch": 0.02936857562408223, "step": 21, "batch_size": 64, "mean": 0.06414888799190521, "std": 0.3082134425640106, "min": -0.5070648193359375, "p10": -0.3398246765136719, "median": 0.06994056701660156, "p90": 0.3462173461914063, "max": 1.2077178955078125, "pos_frac": 0.625, "sample": [0.00807952880859375, 0.012805938720703125, -0.08246612548828125, 0.1129302978515625, -0.12456512451171875, 0.004872322082519531, -0.12888717651367188, 0.18958473205566406, 0.262725830078125, 0.3550567626953125, 0.41510009765625, 0.09514617919921875, -0.06507682800292969, 0.3600196838378906, 0.2075653076171875, 0.73577880859375, 0.1009063720703125, 0.08041954040527344, -0.14782142639160156, 0.06701278686523438, -0.3309326171875, 0.1608734130859375, -0.34363555908203125, -0.06576156616210938, 0.07088851928710938, 0.8012161254882812, 0.137176513671875, -0.398193359375, -0.15203857421875, 0.6372604370117188, 0.1448516845703125, 0.06899261474609375, -0.2605400085449219, 0.24495887756347656, -0.3999481201171875, -0.12329864501953125, -0.5000495910644531, 0.2352466583251953, 1.2077178955078125, -0.5070648193359375, 0.137786865234375, 0.17507362365722656, -0.009063720703125, -0.03182792663574219, 0.17751693725585938, -0.1423358917236328, -0.014347076416015625, -0.11165618896484375, 0.30391693115234375, -0.26503753662109375, 0.2410125732421875, 0.19426727294921875, 0.31543731689453125, 0.046417236328125, 0.03778266906738281, -0.20795059204101562, 0.1862945556640625, -0.4647674560546875, 0.17533111572265625, 0.31396484375, -0.4893798828125, 0.10086441040039062, 0.325592041015625, 0.023729324340820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000021.npy"}
{"epoch": 0.030837004405286344, "step": 22, "batch_size": 64, "mean": 0.206741064786911, "std": 0.4434663951396942, "min": -0.6882171630859375, "p10": -0.3468109130859375, "median": 0.21599674224853516, "p90": 0.8212585449218751, "max": 1.3379440307617188, "pos_frac": 0.6875, "sample": [0.13161087036132812, 0.872528076171875, -0.32906341552734375, 0.21917152404785156, 0.430389404296875, 0.2746429443359375, -0.092071533203125, 0.011600494384765625, 0.32904624938964844, -0.10809326171875, 1.3379440307617188, 0.609344482421875, -0.35318756103515625, 0.9714279174804688, -0.15939712524414062, 0.6170578002929688, 0.21282196044921875, 0.0484771728515625, -0.240570068359375, -0.48065185546875, 0.09432220458984375, 0.5374603271484375, -0.6182861328125, 0.7970123291015625, -0.19370269775390625, 0.1927967071533203, 0.9144973754882812, 0.39037322998046875, 0.30341339111328125, 0.058319091796875, 0.024629592895507812, 0.714599609375, -0.23854827880859375, -0.21891021728515625, 0.4943809509277344, 0.6719818115234375, 0.37210845947265625, -0.33193206787109375, -0.04540252685546875, 0.3915863037109375, 0.23773193359375, 0.39939117431640625, 0.20778656005859375, 0.019016265869140625, 0.018550872802734375, 0.7652206420898438, -0.1911163330078125, -0.6882171630859375, 1.056610107421875, 0.2322845458984375, 0.31436920166015625, 0.02530670166015625, -0.2707328796386719, -0.4319419860839844, -0.4151611328125, 0.242706298828125, 0.5021743774414062, -0.19402122497558594, 0.3004302978515625, 0.96002197265625, 0.36627197265625, 0.8316497802734375, 0.682647705078125, -0.35327911376953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000022.npy"}
{"epoch": 0.032305433186490456, "step": 23, "batch_size": 64, "mean": 0.2705477774143219, "std": 0.4138149321079254, "min": -0.85565185546875, "p10": -0.11949157714843749, "median": 0.20740795135498047, "p90": 0.7874473571777345, "max": 1.4178848266601562, "pos_frac": 0.78125, "sample": [-0.0405731201171875, -0.048610687255859375, 0.8022079467773438, 0.040477752685546875, 0.18039703369140625, 0.3389625549316406, 0.39803314208984375, 0.0228118896484375, -0.3590526580810547, -0.85565185546875, 0.1730194091796875, 1.4178848266601562, 0.306793212890625, 0.09051513671875, 0.36936187744140625, 0.48082733154296875, 0.49271583557128906, 0.04036712646484375, 0.2262725830078125, -0.0640869140625, 0.7095413208007812, 0.24301528930664062, 1.3955841064453125, 0.31696319580078125, -0.02065277099609375, 0.1766815185546875, -0.47829437255859375, -0.5174369812011719, 0.7173614501953125, 1.1053009033203125, 0.028949737548828125, 0.9972991943359375, 0.06673431396484375, 0.6245994567871094, 0.03386497497558594, 0.1037139892578125, -0.104400634765625, 0.2798614501953125, 0.18854331970214844, 0.4280834197998047, 0.27816009521484375, 0.17059707641601562, 0.25727081298828125, 0.6846046447753906, 0.13358306884765625, 0.93463134765625, 0.07613754272460938, 0.166351318359375, 0.086395263671875, -0.1259002685546875, 0.4784507751464844, 0.5290470123291016, 0.15288543701171875, -0.17963218688964844, -0.013311386108398438, 0.6668853759765625, 0.460723876953125, 0.8756103515625, -0.1452465057373047, -0.1045379638671875, 0.25107574462890625, 0.31915283203125, 0.3011322021484375, 0.7530059814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000023.npy"}
{"epoch": 0.033773861967694566, "step": 24, "batch_size": 64, "mean": 0.24319320917129517, "std": 0.3789023756980896, "min": -0.5788421630859375, "p10": -0.21066436767578126, "median": 0.25905323028564453, "p90": 0.7156667709350588, "max": 1.468292236328125, "pos_frac": 0.734375, "sample": [0.5647048950195312, 0.22069549560546875, -0.163970947265625, -0.15216064453125, 0.7410526275634766, -0.2100982666015625, 0.28823089599609375, 0.115936279296875, 0.9733428955078125, 0.07723236083984375, 0.2868614196777344, -0.09291458129882812, -0.05219841003417969, 0.34140777587890625, 0.6547966003417969, -0.14048004150390625, -0.00745391845703125, 0.03632545471191406, 0.40465545654296875, -0.25708770751953125, 0.7495765686035156, -0.3822479248046875, -0.1859111785888672, 0.44640350341796875, 0.8257904052734375, 0.4961509704589844, 0.2424793243408203, -0.5788421630859375, 0.422393798828125, 0.27562713623046875, 1.468292236328125, 0.458587646484375, 0.4291191101074219, -0.19873809814453125, 0.6384429931640625, -0.12103271484375, 0.1874847412109375, 0.548095703125, -0.4881744384765625, 0.1422882080078125, 0.76904296875, 0.12451362609863281, -0.4048347473144531, 0.49160003662109375, 0.20836639404296875, 0.5399856567382812, -0.210906982421875, 0.12822914123535156, 0.65643310546875, 0.2815074920654297, 0.327484130859375, 0.18404006958007812, 0.17240524291992188, 0.24210166931152344, 0.4122161865234375, 0.3589935302734375, 0.5300865173339844, 0.0412445068359375, 0.35106849670410156, -0.32421112060546875, 0.30876922607421875, 0.1848602294921875, 0.36224365234375, 0.824462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000024.npy"}
{"epoch": 0.03524229074889868, "step": 25, "batch_size": 64, "mean": 0.2395840287208557, "std": 0.493831992149353, "min": -0.912506103515625, "p10": -0.3890365600585937, "median": 0.21311187744140625, "p90": 0.8702682495117188, "max": 1.4177093505859375, "pos_frac": 0.671875, "sample": [0.4282035827636719, 0.1508636474609375, 0.27056884765625, 0.32250213623046875, -0.166473388671875, -0.017641067504882812, -0.912506103515625, 0.786651611328125, -0.40380859375, 0.04109954833984375, 0.1556549072265625, 0.903656005859375, -0.045574188232421875, 0.57855224609375, 1.4177093505859375, -0.3545684814453125, -0.46321678161621094, 1.0084228515625, 0.5165786743164062, 0.449951171875, -0.2298431396484375, -0.231658935546875, 0.5328960418701172, 0.760711669921875, 0.2902984619140625, 0.37060546875, -0.2667083740234375, 0.9017181396484375, -0.7278594970703125, -0.022212982177734375, 0.4969329833984375, -0.01554107666015625, 0.2938346862792969, 0.8257904052734375, 0.41091156005859375, 0.13558197021484375, 0.155609130859375, 0.8778076171875, -0.8358917236328125, 0.8526763916015625, 0.30106544494628906, 0.7777862548828125, 0.942169189453125, 0.3220558166503906, -0.13441085815429688, -0.19231414794921875, -0.02127838134765625, 0.1370391845703125, 0.7448196411132812, 0.3130226135253906, 1.0805816650390625, 0.032642364501953125, 0.3144569396972656, -0.7196044921875, 0.5967330932617188, 0.06573104858398438, -0.4397735595703125, 0.8284988403320312, -0.11540603637695312, 0.14252090454101562, 0.14324569702148438, 0.8358001708984375, -0.00139617919921875, 0.13710784912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000025.npy"}
{"epoch": 0.03671071953010279, "step": 26, "batch_size": 64, "mean": 0.37870919704437256, "std": 0.5413480997085571, "min": -0.4564056396484375, "p10": -0.181884765625, "median": 0.30806541442871094, "p90": 1.0313491821289062, "max": 1.99169921875, "pos_frac": 0.765625, "sample": [0.7156524658203125, 0.08474540710449219, 0.3180961608886719, 0.04680824279785156, 1.4901123046875, 0.09639739990234375, 0.5727310180664062, 1.151947021484375, 0.137176513671875, -0.06720161437988281, -0.29937744140625, 0.08972930908203125, 1.81317138671875, 0.38822174072265625, 0.092681884765625, 0.754974365234375, 0.6745529174804688, -0.163543701171875, 0.420867919921875, -0.21583938598632812, 0.21643829345703125, 0.011690139770507812, 0.43784332275390625, 0.4549827575683594, 0.29803466796875, 0.9150161743164062, -0.18547821044921875, -0.3322601318359375, 0.40361785888671875, 0.5408096313476562, -0.04695892333984375, -0.4564056396484375, 0.25511741638183594, 0.4009723663330078, -0.11382675170898438, 0.4036865234375, 1.667724609375, -0.147369384765625, 0.25574493408203125, 0.8127288818359375, 0.19910812377929688, 0.3804779052734375, 1.99169921875, 0.5758819580078125, 0.7429313659667969, 1.0417633056640625, -0.3347587585449219, 0.41339111328125, 0.10787773132324219, 0.06578254699707031, -0.4409294128417969, -0.10100936889648438, -0.17350006103515625, 0.40538787841796875, 0.136138916015625, 0.515411376953125, 0.8547134399414062, 1.7485733032226562, 1.007049560546875, 0.08044815063476562, 0.2202911376953125, -0.09522628784179688, 0.4923248291015625, 0.5095481872558594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000026.npy"}
{"epoch": 0.0381791483113069, "step": 27, "batch_size": 64, "mean": 0.5219472646713257, "std": 0.5141395330429077, "min": -0.261474609375, "p10": -0.05240001678466792, "median": 0.41822052001953125, "p90": 1.1893386840820315, "max": 1.9866104125976562, "pos_frac": 0.875, "sample": [0.18301963806152344, 0.5727996826171875, 0.502960205078125, 0.44484710693359375, 0.6270980834960938, 1.1485366821289062, 0.23211669921875, 0.688323974609375, 1.7204818725585938, 0.2648200988769531, 0.6349105834960938, 0.0105743408203125, 1.1391754150390625, -0.07242202758789062, 0.396697998046875, 0.4168701171875, 1.9866104125976562, 0.21929931640625, 0.3165740966796875, 1.0002059936523438, 0.4692535400390625, 0.0068817138671875, 0.18283843994140625, -0.0056819915771484375, 0.25646209716796875, 0.3414459228515625, 0.04593849182128906, 0.08858299255371094, 0.5111427307128906, 1.8701324462890625, -0.16314697265625, -0.1167144775390625, -0.10153961181640625, 1.88555908203125, 0.8954086303710938, 0.22186279296875, 0.6445770263671875, 0.7666473388671875, 0.32796478271484375, 0.3065948486328125, 0.057262420654296875, 0.4559478759765625, 0.141632080078125, 1.0400772094726562, 0.47699737548828125, 0.900390625, -0.11808013916015625, 0.3351116180419922, 0.9842758178710938, 0.4867401123046875, 1.23193359375, 0.15697860717773438, 0.675537109375, 0.26186370849609375, 0.4195709228515625, 1.2068252563476562, 1.3327178955078125, -0.11540794372558594, 0.24570083618164062, -0.261474609375, 0.5312728881835938, 0.9332504272460938, 0.307830810546875, 0.8499603271484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000027.npy"}
{"epoch": 0.039647577092511016, "step": 28, "batch_size": 64, "mean": 0.36013028025627136, "std": 0.6014600396156311, "min": -1.4525146484375, "p10": -0.292889404296875, "median": 0.35738372802734375, "p90": 1.028032684326172, "max": 2.2101898193359375, "pos_frac": 0.75, "sample": [1.1276779174804688, 0.1740570068359375, -0.14539337158203125, 0.13084983825683594, 0.4761180877685547, -0.1380462646484375, -0.273834228515625, 0.3813934326171875, -0.18882369995117188, 0.20379257202148438, 0.36760711669921875, 0.662445068359375, 1.0475311279296875, 0.657684326171875, 0.36078643798828125, -0.12270355224609375, 0.5693016052246094, 0.2404327392578125, 0.3319854736328125, 0.600921630859375, 0.4873695373535156, -0.9485855102539062, 0.03804779052734375, 1.473724365234375, 0.08940887451171875, 0.6173324584960938, 0.08278656005859375, 0.8543167114257812, 1.0550079345703125, 0.9044952392578125, 0.7363128662109375, 0.03229522705078125, -0.09845352172851562, -0.301055908203125, 0.8462104797363281, 0.3365764617919922, 0.4647216796875, 0.8416366577148438, 0.3279914855957031, 0.467315673828125, 0.52099609375, 0.5603160858154297, 0.11356544494628906, 0.84283447265625, 1.782806396484375, 0.2657623291015625, 0.9825363159179688, -0.12276649475097656, 0.943084716796875, -0.314727783203125, 0.9324588775634766, -0.7049636840820312, -0.3025779724121094, 0.52734375, -0.17371749877929688, 0.40218353271484375, 0.35398101806640625, 1.2928924560546875, 2.2101898193359375, -0.10295486450195312, 0.04470062255859375, -1.4525146484375, -0.34185791015625, 0.017528533935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000028.npy"}
{"epoch": 0.041116005873715125, "step": 29, "batch_size": 64, "mean": 0.5567758679389954, "std": 0.5686749815940857, "min": -0.3724250793457031, "p10": -0.09239253997802732, "median": 0.5233612060546875, "p90": 1.1527282714843752, "max": 2.991455078125, "pos_frac": 0.84375, "sample": [0.7795867919921875, 0.03495025634765625, 0.3055877685546875, 0.6639289855957031, -0.07665252685546875, 0.32082176208496094, 0.1028594970703125, 0.6999282836914062, 0.6386260986328125, 0.17673492431640625, 0.4615516662597656, -0.028141021728515625, 0.9139480590820312, 0.6131820678710938, 0.1831817626953125, 0.8812255859375, 0.9775238037109375, -0.13516998291015625, 0.10093116760253906, 1.3250503540039062, -0.1638641357421875, 0.355926513671875, 0.614501953125, 0.4335155487060547, 0.8181228637695312, 0.7964401245117188, -0.3724250793457031, 0.2644920349121094, 0.560516357421875, 0.752288818359375, -0.11136436462402344, 1.17559814453125, 2.991455078125, -0.2067718505859375, 0.493316650390625, 0.17224502563476562, 0.1274566650390625, 0.12610626220703125, 1.7411651611328125, 1.099365234375, 0.884429931640625, 1.038604736328125, 0.3768119812011719, 0.55340576171875, 0.5759372711181641, -0.09913825988769531, 0.8807373046875, 0.2929840087890625, 0.7023582458496094, 0.12822723388671875, 0.379150390625, 0.13332366943359375, 0.709808349609375, 0.37822723388671875, 0.8341865539550781, 1.2412872314453125, -0.0535125732421875, 1.6932754516601562, 0.5987815856933594, 0.4742164611816406, 1.7395858764648438, 0.93817138671875, -0.281341552734375, 0.9063949584960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000029.npy"}
{"epoch": 0.042584434654919234, "step": 30, "batch_size": 64, "mean": 0.7079058289527893, "std": 0.6552006006240845, "min": -0.41600799560546875, "p10": -0.08536376953125, "median": 0.6320343017578125, "p90": 1.5072158813476564, "max": 3.364990234375, "pos_frac": 0.875, "sample": [0.6613540649414062, 0.5860214233398438, -0.09296226501464844, 0.9230728149414062, 1.7312469482421875, -0.29601287841796875, 0.09174346923828125, 0.4889068603515625, 0.12222862243652344, 0.7850494384765625, 0.9357376098632812, 1.1751327514648438, 0.7069473266601562, 0.9803199768066406, 1.479156494140625, 1.5400238037109375, 0.7650909423828125, 0.8964157104492188, -0.08731842041015625, 1.5192413330078125, 0.9484481811523438, 0.2890625, 3.364990234375, 0.6027145385742188, 0.9537506103515625, 0.5631027221679688, 0.545379638671875, 0.47145843505859375, -0.27263641357421875, 0.19425201416015625, 0.5032119750976562, 0.07378005981445312, 0.884429931640625, 0.21383094787597656, 0.15954971313476562, 0.4063873291015625, 1.414520263671875, 1.658233642578125, 1.2607574462890625, 0.7636394500732422, 0.1987152099609375, 1.37628173828125, 0.8248882293701172, 0.34645843505859375, -0.3920936584472656, 1.2135086059570312, 1.1814727783203125, 0.2867774963378906, 0.8070449829101562, 0.957550048828125, 1.4445037841796875, 1.7040023803710938, 0.4357337951660156, -0.08080291748046875, 0.026742935180664062, 1.2188262939453125, 0.5783882141113281, 0.015819549560546875, 0.5782890319824219, -0.09870338439941406, -0.41600799560546875, 1.8297119140625, 0.80023193359375, 0.5583744049072266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000030.npy"}
{"epoch": 0.04405286343612335, "step": 31, "batch_size": 64, "mean": 0.492476224899292, "std": 0.6538965106010437, "min": -0.8985137939453125, "p10": -0.19391326904296874, "median": 0.3832082748413086, "p90": 1.2969055175781252, "max": 2.550933837890625, "pos_frac": 0.796875, "sample": [0.03210639953613281, 1.246917724609375, -0.049072265625, 0.765594482421875, 1.0937576293945312, 0.424591064453125, -0.198486328125, -0.31035614013671875, 0.4679985046386719, -0.5715179443359375, 2.550933837890625, -0.06760406494140625, 0.18831634521484375, 0.5584335327148438, 0.2774810791015625, 0.020040512084960938, 1.3154220581054688, -0.11706161499023438, 1.6423873901367188, 0.6742305755615234, 0.3929901123046875, 0.0730743408203125, 0.26781463623046875, 0.090911865234375, 0.1865997314453125, 0.2905254364013672, 0.9868888854980469, 0.46042633056640625, 0.6913032531738281, 0.099212646484375, 2.016357421875, 0.5092544555664062, 1.90411376953125, -0.362060546875, 0.3687477111816406, -0.3096923828125, 0.221923828125, 0.5246047973632812, -0.15528106689453125, 0.9146499633789062, 1.0303573608398438, 1.2537002563476562, -0.29877471923828125, 0.3734264373779297, -0.1832427978515625, -0.8985137939453125, 0.3673858642578125, 0.819976806640625, 0.1413421630859375, -0.03934669494628906, 0.491302490234375, 1.3676223754882812, 0.8547515869140625, 0.010101318359375, 0.05279541015625, 0.24622344970703125, 0.18230056762695312, 0.53582763671875, 0.5784988403320312, 0.5501174926757812, 0.8727874755859375, 1.9662246704101562, 1.196380615234375, 0.930755615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000031.npy"}
{"epoch": 0.04552129221732746, "step": 32, "batch_size": 64, "mean": 0.7637800574302673, "std": 0.9055874943733215, "min": -1.0298004150390625, "p10": -0.16612014770507813, "median": 0.5863561630249023, "p90": 1.9382125854492191, "max": 4.1165771484375, "pos_frac": 0.84375, "sample": [0.571624755859375, 0.2105865478515625, 1.3085556030273438, 1.0286941528320312, 0.7469482421875, 1.018280029296875, -0.1695098876953125, 0.71099853515625, 0.4255790710449219, 1.0358123779296875, 1.3278961181640625, -0.45655059814453125, 0.0524749755859375, 0.11553573608398438, -1.0298004150390625, 1.4357376098632812, -0.42998504638671875, 1.0793037414550781, -0.3780670166015625, 2.661224365234375, 0.42897796630859375, 1.4805374145507812, 1.9758453369140625, 0.4795379638671875, 0.7602386474609375, 0.9092254638671875, 0.17176055908203125, 0.5377273559570312, -0.0128936767578125, 0.5675582885742188, 0.31037139892578125, 0.09965896606445312, 0.6010875701904297, 2.6692352294921875, 1.5270233154296875, 0.2898712158203125, 0.956756591796875, 0.7275428771972656, -0.12489700317382812, 0.4359245300292969, 2.4787750244140625, 0.99957275390625, -0.15821075439453125, 0.1635894775390625, 4.1165771484375, 1.218536376953125, 0.31813812255859375, 1.85040283203125, 2.790374755859375, -0.28269386291503906, 0.03200531005859375, 0.10419464111328125, 2.2996902465820312, 1.3449211120605469, -0.2529792785644531, 0.12990379333496094, 0.8092803955078125, 1.2014007568359375, 0.2222614288330078, 0.96807861328125, 0.19114303588867188, 0.7467422485351562, 1.1757659912109375, 0.35802459716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000032.npy"}
{"epoch": 0.04698972099853157, "step": 33, "batch_size": 64, "mean": 0.5975916385650635, "std": 0.6344035267829895, "min": -0.6681900024414062, "p10": -0.17468910217285144, "median": 0.46619606018066406, "p90": 1.3964073181152343, "max": 2.4553565979003906, "pos_frac": 0.828125, "sample": [-0.3941001892089844, 0.4609565734863281, 0.6709785461425781, 0.368560791015625, 1.6577911376953125, -0.01065826416015625, -0.252716064453125, 0.2698974609375, 0.471435546875, 1.0429840087890625, 0.362884521484375, 1.38787841796875, 1.132598876953125, 0.9759368896484375, 1.064056396484375, 0.33119964599609375, 0.5120010375976562, -0.6681900024414062, 0.3195343017578125, -0.609954833984375, 0.25414276123046875, 1.245941162109375, 0.8640975952148438, 2.0506973266601562, 0.8238601684570312, 0.4588165283203125, 1.6281089782714844, 0.02960205078125, 0.1194305419921875, 1.2283935546875, -0.037403106689453125, -0.36666107177734375, 2.4553565979003906, 0.011121749877929688, -0.04669952392578125, 0.7975387573242188, 0.46063995361328125, 0.5451812744140625, 1.2674369812011719, 0.8716354370117188, 0.2017974853515625, 1.0537796020507812, 0.37824249267578125, 0.36540985107421875, 0.37165069580078125, 1.4000625610351562, 0.43708038330078125, 1.4958877563476562, 0.8171863555908203, -0.009380340576171875, 0.3582611083984375, -0.22954177856445312, 1.5256156921386719, 0.7006340026855469, 0.08355712890625, 0.7782440185546875, 1.191650390625, -0.6423416137695312, 0.42957305908203125, 0.611846923828125, 0.7012939453125, 0.393585205078125, 0.9938926696777344, 1.0835647583007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000033.npy"}
{"epoch": 0.048458149779735685, "step": 34, "batch_size": 64, "mean": 0.8158169984817505, "std": 0.8475502133369446, "min": -0.570037841796875, "p10": -0.10389842987060544, "median": 0.5883846282958984, "p90": 2.1682266235351566, "max": 3.139617919921875, "pos_frac": 0.84375, "sample": [2.2894744873046875, -0.34139251708984375, 0.5287857055664062, 0.13504409790039062, 2.24273681640625, 0.3976917266845703, 1.7216033935546875, 1.5234832763671875, 0.2183685302734375, -0.29949951171875, 0.380126953125, -0.07632637023925781, 1.111114501953125, 0.4144287109375, 2.2198715209960938, 2.1872100830078125, 0.13805770874023438, -0.22551345825195312, 1.840972900390625, 0.46686553955078125, 1.0957412719726562, 0.06060028076171875, 0.9476871490478516, 1.8420753479003906, 1.16778564453125, 1.638214111328125, 1.018218994140625, -0.11571502685546875, 0.500335693359375, 0.9079666137695312, -0.4199066162109375, 0.604766845703125, 0.7183952331542969, 0.8590660095214844, 2.123931884765625, 0.8700942993164062, 0.8598556518554688, 3.139617919921875, 0.30150604248046875, 0.17729568481445312, 0.4556312561035156, 0.23285484313964844, 1.244110107421875, 1.3095932006835938, 0.8804550170898438, 1.1653518676757812, 0.11310195922851562, 0.013111114501953125, 2.2249298095703125, -0.035121917724609375, 0.3112144470214844, -0.570037841796875, 0.8693466186523438, 0.7063674926757812, 0.4048919677734375, 0.2136096954345703, 2.0475387573242188, -0.043941497802734375, -0.28910064697265625, 0.34104156494140625, 2.0918731689453125, 2.49334716796875, 0.28948211669921875, 0.5720024108886719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000034.npy"}
{"epoch": 0.049926578560939794, "step": 35, "batch_size": 64, "mean": 1.1769659519195557, "std": 1.1176694631576538, "min": -0.42818450927734375, "p10": 0.15845642089843756, "median": 0.9615554809570312, "p90": 2.318426513671875, "max": 6.7027587890625, "pos_frac": 0.921875, "sample": [1.1796035766601562, 0.4361305236816406, 2.03363037109375, 1.6453895568847656, 0.2536773681640625, 0.7318611145019531, 0.7703857421875, 0.1362457275390625, -0.125091552734375, 1.9095611572265625, 2.3313446044921875, 0.2102813720703125, 1.0933609008789062, 0.6141853332519531, 0.23268890380859375, 0.7049560546875, 2.0897369384765625, 3.4026107788085938, 0.5116806030273438, 0.2807121276855469, 0.7914371490478516, 1.8934783935546875, 2.9918365478515625, 0.309051513671875, -0.15047454833984375, 0.9729080200195312, -0.14479637145996094, 1.3530654907226562, 0.5731582641601562, 0.9502029418945312, 6.7027587890625, 1.3162994384765625, 0.4862060546875, 1.56744384765625, 0.45010948181152344, 2.93072509765625, 0.3933563232421875, 1.4089202880859375, 2.0173492431640625, 2.6710205078125, 0.24666213989257812, 2.83648681640625, 1.1510848999023438, 0.9969558715820312, 0.24260711669921875, -0.05158805847167969, 2.07550048828125, 2.2882843017578125, 1.7300872802734375, 0.57928466796875, 1.094614028930664, 0.26233673095703125, 0.7071113586425781, 1.9844512939453125, 1.2466278076171875, 0.8244571685791016, 1.246612548828125, 0.03357124328613281, 1.53485107421875, -0.42818450927734375, 0.7237300872802734, 1.8063201904296875, 1.7924652099609375, 0.4744873046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000035.npy"}
{"epoch": 0.0513950073421439, "step": 36, "batch_size": 64, "mean": 1.094736099243164, "std": 1.2846930027008057, "min": -2.7100677490234375, "p10": -0.19489250183105458, "median": 0.8944025039672852, "p90": 2.8227348327636723, "max": 4.15069580078125, "pos_frac": 0.796875, "sample": [1.4486923217773438, -0.07552337646484375, 0.8630847930908203, 0.8051300048828125, 1.3740463256835938, 0.7127742767333984, 1.54144287109375, -0.5506439208984375, -0.0069141387939453125, 2.852752685546875, 2.3326950073242188, -0.06914710998535156, -0.4393196105957031, 0.4741344451904297, 0.0074863433837890625, 1.33428955078125, 0.07972145080566406, 1.3417205810546875, 1.24639892578125, 0.04227447509765625, -0.2370147705078125, -0.6694736480712891, -0.05329132080078125, 0.1721954345703125, 3.644439697265625, 4.047554016113281, 1.4942741394042969, -0.30400848388671875, 3.08416748046875, 0.7273330688476562, 0.7232646942138672, 1.1945724487304688, -0.09660720825195312, 3.4634170532226562, 0.29773712158203125, -0.06018829345703125, 2.20465087890625, 0.34783172607421875, 1.9979171752929688, 0.7754173278808594, 2.3607254028320312, 1.2041816711425781, 2.259246826171875, 0.9783840179443359, 2.352874755859375, 1.1391677856445312, 0.5956916809082031, 4.15069580078125, 0.7542209625244141, -2.7100677490234375, 2.7246551513671875, 0.061248779296875, 1.5126609802246094, 0.92572021484375, 2.7526931762695312, 1.3528518676757812, -0.2884674072265625, 0.4950828552246094, 2.1005859375, 0.4364356994628906, 1.6637420654296875, 3.7472991943359375, 1.0804290771484375, 0.3437690734863281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000036.npy"}
{"epoch": 0.05286343612334802, "step": 37, "batch_size": 64, "mean": 1.0146114826202393, "std": 1.3858215808868408, "min": -2.61865234375, "p10": -0.2255735397338866, "median": 0.7293777465820312, "p90": 2.7518539428710946, "max": 6.3740234375, "pos_frac": 0.796875, "sample": [1.6860122680664062, -0.013946533203125, 1.3075485229492188, 0.6056442260742188, 0.15875244140625, -0.0401611328125, -0.40863037109375, 0.6845779418945312, 1.3842544555664062, -0.076873779296875, 0.9497528076171875, 3.047760009765625, 0.24517822265625, 0.272491455078125, 0.208587646484375, 0.38120269775390625, 0.8408775329589844, 0.11485862731933594, 1.868703842163086, 6.3740234375, 0.7741775512695312, 4.427337646484375, 1.4219131469726562, 0.9518394470214844, 1.4612350463867188, 1.2358169555664062, 0.9848709106445312, 0.17833709716796875, 1.5712738037109375, 1.0789012908935547, 0.4520683288574219, -0.06163787841796875, 0.10485267639160156, -1.0080337524414062, 0.5343704223632812, 0.2805442810058594, 0.27812957763671875, 2.190765380859375, 2.5041885375976562, 2.8578033447265625, -0.12848854064941406, 3.0472564697265625, 0.42543792724609375, 1.498260498046875, -0.5338211059570312, 0.8113918304443359, 0.016271591186523438, -0.27880859375, 0.79241943359375, 1.7046737670898438, 2.3525466918945312, -0.267181396484375, 2.4035110473632812, 2.504638671875, 0.3589973449707031, 3.3080902099609375, 1.6216964721679688, -2.61865234375, 0.4853973388671875, -0.431121826171875, 2.3571510314941406, 3.6165771484375, -0.09886360168457031, 0.17838668823242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000037.npy"}
{"epoch": 0.05433186490455213, "step": 38, "batch_size": 64, "mean": 1.344200849533081, "std": 1.6346410512924194, "min": -1.7084808349609375, "p10": -0.054677581787109325, "median": 0.8669166564941406, "p90": 3.2927429199218756, "max": 7.29541015625, "pos_frac": 0.875, "sample": [0.09067535400390625, 7.29541015625, 4.1405029296875, 1.4813518524169922, 0.23117446899414062, 0.04170799255371094, 1.4351882934570312, -0.40483856201171875, 1.2051239013671875, 2.57257080078125, 1.471771240234375, 0.8087615966796875, 0.1119384765625, 2.7542724609375, 1.174346923828125, -0.3412132263183594, 0.5314788818359375, 0.27643585205078125, -0.00142669677734375, 2.2769393920898438, 0.8171844482421875, 2.0788421630859375, 2.5112152099609375, 0.1966400146484375, 3.133209228515625, 0.11271286010742188, 1.5365371704101562, 2.7201385498046875, 1.7891502380371094, 0.3577079772949219, 0.562774658203125, 3.361114501953125, -1.7084808349609375, 0.9166488647460938, 0.6838169097900391, 0.5190811157226562, 1.22900390625, 5.6529693603515625, 0.6211433410644531, 0.39383697509765625, 3.3665695190429688, 5.878448486328125, 1.7239990234375, 0.6677398681640625, 0.491790771484375, -0.875701904296875, 0.18028640747070312, 0.19033432006835938, 0.8056221008300781, 1.350677490234375, -0.0774993896484375, 0.46661376953125, -0.3891735076904297, 1.8696937561035156, 1.3590259552001953, 2.24560546875, 1.30908203125, 5.151222229003906, 1.241546630859375, 0.19795989990234375, 1.8208541870117188, -0.4308032989501953, 0.7528228759765625, 2.0947227478027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000038.npy"}
{"epoch": 0.055800293685756244, "step": 39, "batch_size": 64, "mean": 1.5921775102615356, "std": 1.3956255912780762, "min": -2.6716461181640625, "p10": 0.23551158905029304, "median": 1.2817916870117188, "p90": 3.6658027648925784, "max": 4.9209747314453125, "pos_frac": 0.921875, "sample": [2.2532119750976562, 2.0909271240234375, 4.9209747314453125, 2.2091140747070312, 1.242462158203125, -0.6747360229492188, 2.9158706665039062, 2.438648223876953, 1.393239974975586, 2.814115524291992, 0.787445068359375, 3.508026123046875, 1.6200714111328125, 1.169149398803711, 3.873882293701172, 1.0582046508789062, 0.505889892578125, 0.3256072998046875, -0.0616455078125, 0.782073974609375, 0.20563507080078125, 1.8240509033203125, 2.3686141967773438, 0.9167976379394531, 1.0498847961425781, 2.5280838012695312, -0.07294464111328125, 1.513803482055664, 4.27587890625, 0.4831695556640625, 0.5949859619140625, 0.3052234649658203, 2.9181671142578125, 1.9554061889648438, -2.6716461181640625, 1.9434432983398438, 0.3068504333496094, 1.5413227081298828, 3.744874954223633, 4.1374664306640625, 1.0471763610839844, 2.4693603515625, 4.187339782714844, 0.5455474853515625, 1.3211212158203125, 0.7453231811523438, -0.21112060546875, 0.5110740661621094, 1.23193359375, 0.9262008666992188, 0.9416732788085938, 3.6057052612304688, 2.6007766723632812, 2.8060302734375, 0.4246559143066406, 3.691558837890625, 2.9618988037109375, 0.9895992279052734, 2.0414810180664062, 0.35285377502441406, 0.7514209747314453, 0.1039581298828125, 0.4931468963623047, 2.3190441131591797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000039.npy"}
{"epoch": 0.05726872246696035, "step": 40, "batch_size": 64, "mean": 1.5351676940917969, "std": 1.656908631324768, "min": -1.090057373046875, "p10": -0.21006011962890622, "median": 1.1917686462402344, "p90": 3.84886932373047, "max": 7.1263427734375, "pos_frac": 0.859375, "sample": [-0.68304443359375, 0.233123779296875, 0.6062393188476562, 0.8682975769042969, 1.5935821533203125, 0.7240753173828125, 0.4557647705078125, 0.44641876220703125, 0.23168182373046875, 2.001401901245117, 6.260101318359375, 3.567474365234375, 0.9337997436523438, 3.384918212890625, 1.879974365234375, -0.6501274108886719, 2.1688079833984375, 2.687530517578125, 0.1897430419921875, -1.090057373046875, 2.9409332275390625, -0.2541484832763672, 1.25225830078125, -0.220245361328125, 2.1590347290039062, 1.88836669921875, 0.039707183837890625, 1.1799850463867188, 0.2627410888671875, -0.1862945556640625, 4.100311279296875, -0.24281692504882812, 3.3308029174804688, 0.1363677978515625, 0.3930339813232422, 4.3996734619140625, 0.1065826416015625, 3.9694671630859375, 0.9090347290039062, 1.2267341613769531, 4.762664794921875, 1.0427932739257812, 0.8771591186523438, 7.1263427734375, -0.376251220703125, 1.9193954467773438, -0.09644317626953125, 2.03680419921875, 2.84393310546875, 2.087696075439453, 1.429229736328125, 2.3803024291992188, 3.3391342163085938, 1.3389148712158203, 1.1044464111328125, 1.20355224609375, 4.4152374267578125, 0.43686676025390625, 2.0682144165039062, 0.4227714538574219, 0.35817718505859375, 1.3747482299804688, 2.0938720703125, 0.8599357604980469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000040.npy"}
{"epoch": 0.05873715124816446, "step": 41, "batch_size": 64, "mean": 1.8554792404174805, "std": 2.001645088195801, "min": -3.066680908203125, "p10": -0.02576808929443339, "median": 1.3504142761230469, "p90": 4.417221069335938, "max": 8.768112182617188, "pos_frac": 0.890625, "sample": [0.865814208984375, 0.8433380126953125, 3.5187530517578125, 1.6478958129882812, 0.571807861328125, 2.2460098266601562, 3.8543853759765625, 2.128826141357422, -0.959075927734375, 3.1564865112304688, 5.6116485595703125, 1.2997589111328125, 2.4298858642578125, 8.768112182617188, 0.1785717010498047, 0.428131103515625, 0.8908233642578125, 1.2938156127929688, 0.3207893371582031, -1.64703369140625, 1.3644485473632812, 3.122180938720703, 4.8681793212890625, 1.0261554718017578, 6.58343505859375, 1.6158447265625, 4.4842376708984375, 4.8438720703125, 0.6357841491699219, 0.26424407958984375, 2.531524658203125, 0.653228759765625, 6.764434814453125, 0.6491641998291016, 1.2193470001220703, 3.7293701171875, 2.1214675903320312, 2.1517791748046875, -0.3547821044921875, 2.5264053344726562, 4.028953552246094, 1.5901165008544922, -3.066680908203125, 2.6518707275390625, 0.5278701782226562, 1.20977783203125, 1.6205596923828125, 1.7341651916503906, 2.4970550537109375, 0.8030643463134766, 1.409423828125, 0.874755859375, 1.11724853515625, 1.3363800048828125, 0.8642444610595703, 0.9762172698974609, -0.11334228515625, 3.4408721923828125, 1.0046920776367188, 0.45551109313964844, -0.5531463623046875, -0.21187591552734375, 2.0430259704589844, 4.2608489990234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000041.npy"}
{"epoch": 0.06020558002936858, "step": 42, "batch_size": 64, "mean": 2.4679267406463623, "std": 2.316767692565918, "min": -1.4112701416015625, "p10": 0.06012077331542984, "median": 1.8595638275146484, "p90": 5.852120208740236, "max": 10.449981689453125, "pos_frac": 0.890625, "sample": [1.7782058715820312, -0.2077484130859375, 2.4191226959228516, 8.935623168945312, 10.449981689453125, 2.4389419555664062, 0.284881591796875, 6.110107421875, 1.8187103271484375, 2.465068817138672, 1.8514480590820312, 5.978858947753906, 2.4932861328125, 7.089263916015625, 0.9021492004394531, 1.6646194458007812, 2.56158447265625, -0.00433349609375, 2.2998428344726562, 3.2196578979492188, 1.6710739135742188, 1.404672622680664, 3.8101577758789062, 0.59735107421875, 1.1770057678222656, 3.1149635314941406, -0.8181610107421875, 3.009033203125, 1.6869735717773438, -0.5399169921875, 2.273967742919922, 5.35772705078125, 0.38789939880371094, 5.556396484375, 2.2232666015625, 1.8650360107421875, 0.9240322113037109, 1.4047679901123047, 1.946533203125, -0.8748016357421875, 3.56719970703125, -0.03021240234375, 1.8540916442871094, 6.3135833740234375, 1.5797157287597656, 4.1817779541015625, 0.9949760437011719, 0.3075294494628906, 0.8494663238525391, 1.7783355712890625, 0.9699478149414062, 6.195892333984375, -1.4112701416015625, 0.7137508392333984, 2.1836013793945312, 2.9486656188964844, 0.21051406860351562, 1.53387451171875, 4.904197692871094, 2.9354400634765625, 4.8314208984375, 4.9960479736328125, 0.7461719512939453, 4.065338134765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000042.npy"}
{"epoch": 0.06167400881057269, "step": 43, "batch_size": 64, "mean": 2.342538356781006, "std": 1.8794914484024048, "min": -1.098297119140625, "p10": 0.20966148376464847, "median": 2.1711063385009766, "p90": 4.45673065185547, "max": 10.042739868164062, "pos_frac": 0.96875, "sample": [-0.3757209777832031, 0.0064697265625, 0.1991558074951172, 3.1932449340820312, 3.5842361450195312, 1.550811767578125, 3.4396705627441406, 2.2703399658203125, 2.534219741821289, 6.2111968994140625, 1.07061767578125, 1.7060432434082031, 0.69158935546875, 1.991546630859375, 2.743968963623047, 0.9206619262695312, 3.18023681640625, 5.205848693847656, 1.7826004028320312, 0.15467453002929688, 6.055328369140625, 3.2957077026367188, 5.058494567871094, 1.0917205810546875, 2.778148651123047, 2.1277236938476562, 0.5776481628417969, 2.349773406982422, 0.05035972595214844, 3.99285888671875, 1.5546646118164062, 2.5247802734375, 3.891143798828125, 3.399688720703125, 2.58343505859375, 3.799774169921875, 0.5721282958984375, 2.2508697509765625, 1.7410697937011719, 0.3658599853515625, 2.2133102416992188, 1.2766342163085938, 6.016754150390625, 3.3783645629882812, 1.4598617553710938, -1.098297119140625, 1.085855484008789, 2.751373291015625, 1.52947998046875, 3.3118133544921875, 1.9890861511230469, 3.6936416625976562, 2.1289024353027344, 0.4359931945800781, 0.8514633178710938, 10.042739868164062, 3.541717529296875, 1.629659652709961, 0.023042678833007812, 0.6433944702148438, 3.1348876953125, 4.6555328369140625, 2.8704833984375, 0.2341747283935547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000043.npy"}
{"epoch": 0.0631424375917768, "step": 44, "batch_size": 64, "mean": 2.71984601020813, "std": 2.155487537384033, "min": -0.7050457000732422, "p10": 0.37867565155029304, "median": 2.30275821685791, "p90": 5.687815856933594, "max": 9.21075439453125, "pos_frac": 0.953125, "sample": [3.7109909057617188, 2.492706298828125, 1.8139820098876953, 0.5608272552490234, 6.735160827636719, 2.4648818969726562, 2.7541961669921875, 1.0667037963867188, 2.093181610107422, 5.1909332275390625, 6.662017822265625, 0.6707496643066406, 4.186927795410156, 5.24114990234375, 4.78680419921875, 3.2060012817382812, 0.9451103210449219, 1.788726806640625, 2.2134876251220703, 5.6090850830078125, 0.3241119384765625, 2.789215087890625, 2.7850799560546875, 7.8972930908203125, 1.597747802734375, 3.0890350341796875, 0.446136474609375, 1.5945281982421875, 4.722007751464844, 1.9735794067382812, 2.8925552368164062, -0.27831077575683594, 1.6798954010009766, 1.3552284240722656, 1.7098464965820312, 3.0664749145507812, 1.0781478881835938, 1.593597412109375, 5.7215576171875, 2.39202880859375, 0.7303924560546875, 2.148040771484375, 4.088592529296875, 3.2839508056640625, 1.174530029296875, 3.5687026977539062, 2.56414794921875, 2.5831298828125, 0.5827541351318359, 0.29680442810058594, 6.2164154052734375, 4.284332275390625, 1.9590606689453125, 3.232025146484375, 1.295064926147461, 9.21075439453125, 0.16848373413085938, -0.42462158203125, 3.625030517578125, 1.5662040710449219, -0.7050457000732422, 0.3497638702392578, 1.3307151794433594, 8.317543029785156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000044.npy"}
{"epoch": 0.06461086637298091, "step": 45, "batch_size": 64, "mean": 2.208528518676758, "std": 2.53650164604187, "min": -1.0955734252929688, "p10": -0.1773731231689452, "median": 1.5621986389160156, "p90": 5.6249755859375, "max": 13.0001220703125, "pos_frac": 0.859375, "sample": [3.642730712890625, 1.501617431640625, 0.7557830810546875, 3.1797561645507812, 0.16960525512695312, 1.8144073486328125, 3.5316238403320312, 5.930419921875, 5.6456298828125, 2.1600189208984375, 0.9093093872070312, 7.428199768066406, 3.222074508666992, 0.08140182495117188, -0.060733795166015625, 1.6033248901367188, -0.0621795654296875, 1.8177299499511719, 0.04630470275878906, 5.5767822265625, 0.12868499755859375, 0.23797988891601562, 2.940826416015625, 0.8514137268066406, 0.8080062866210938, 5.257246017456055, 6.9957275390625, 3.6768112182617188, 0.13054275512695312, 0.48044776916503906, 4.6272125244140625, 2.50677490234375, 5.5270538330078125, 0.03694915771484375, -0.32305145263671875, 1.5210723876953125, 5.8154449462890625, 3.488109588623047, -0.22674179077148438, 1.6270904541015625, 0.9040336608886719, 0.230987548828125, -1.0955734252929688, -0.44844818115234375, 1.274169921875, 13.0001220703125, 1.2187652587890625, 6.458229064941406, 2.6542282104492188, -0.6807403564453125, 2.214689254760742, 1.3556289672851562, 1.8692646026611328, 0.33403968811035156, 0.9720268249511719, 5.4557342529296875, 0.8685760498046875, 1.993988037109375, 2.3193893432617188, 2.2345428466796875, -0.3471641540527344, 3.955097198486328, 0.6522464752197266, -1.0494117736816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000045.npy"}
{"epoch": 0.06607929515418502, "step": 46, "batch_size": 64, "mean": 2.753678798675537, "std": 2.966536283493042, "min": -2.3441009521484375, "p10": -0.17053642272949218, "median": 2.0067310333251953, "p90": 6.049882888793945, "max": 13.966522216796875, "pos_frac": 0.875, "sample": [0.40608787536621094, -0.2232666015625, 5.242279052734375, 6.939697265625, 3.7248306274414062, -2.3441009521484375, 1.7165565490722656, 1.7524852752685547, 5.8242950439453125, 3.5635128021240234, 6.037906646728516, 1.406982421875, 0.2205352783203125, 2.5891857147216797, 5.26153564453125, 0.0846405029296875, -0.6981658935546875, 0.317962646484375, 4.389305114746094, 9.746429443359375, -0.38933563232421875, 0.31103515625, 0.4578742980957031, 4.0304412841796875, 0.2629890441894531, 4.493255615234375, 4.1727294921875, 1.0541725158691406, 3.7199440002441406, 2.254039764404297, 4.043548583984375, 0.6693286895751953, -2.177734375, 0.8164863586425781, 2.3774871826171875, 5.508083343505859, 5.217655181884766, -0.17590713500976562, 7.8423004150390625, -0.1580047607421875, 0.2981452941894531, 3.5992279052734375, 5.148712158203125, 0.45896148681640625, 13.966522216796875, 0.314483642578125, 6.9065093994140625, 5.628715515136719, 0.7417640686035156, 2.1596832275390625, 1.2835426330566406, 7.9680023193359375, 2.9505462646484375, 0.20934295654296875, 1.8537788391113281, 6.055015563964844, 1.1712417602539062, 2.414287567138672, 3.711395263671875, 5.655128479003906, 1.4641380310058594, 0.8498897552490234, 1.4539222717285156, -0.31658935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000046.npy"}
{"epoch": 0.06754772393538913, "step": 47, "batch_size": 64, "mean": 2.8084940910339355, "std": 2.5952467918395996, "min": -2.314098358154297, "p10": 0.1304063796997071, "median": 2.227280616760254, "p90": 6.219530487060547, "max": 10.015106201171875, "pos_frac": 0.90625, "sample": [1.4976348876953125, 2.8017959594726562, 0.7344512939453125, 1.0090999603271484, 5.144756317138672, 1.3113059997558594, 1.4425506591796875, 1.6918411254882812, 6.684013366699219, 3.050748825073242, -0.19408416748046875, 6.431938171386719, 6.270751953125, 1.4975204467773438, 3.3701553344726562, 2.382457733154297, 0.16996002197265625, 1.5924148559570312, 1.677337646484375, 5.361991882324219, 6.100013732910156, 1.3227691650390625, 0.8214511871337891, 2.948272705078125, 3.3046798706054688, 1.1883659362792969, -0.5001602172851562, 5.9267730712890625, 6.738189697265625, 2.396047592163086, -0.35321998596191406, 3.2825145721435547, 5.808036804199219, 4.3163604736328125, -2.314098358154297, 0.11345481872558594, 0.2802886962890625, 1.0931167602539062, 1.116485595703125, 9.87750244140625, 5.8236236572265625, 2.1422157287597656, 1.1955242156982422, 2.794677734375, 8.660232543945312, 5.55548095703125, -0.7502517700195312, 3.2030868530273438, 10.015106201171875, -1.09423828125, 1.1379013061523438, 1.8871307373046875, 0.1934051513671875, 1.3147335052490234, 2.9491729736328125, 0.7071342468261719, 5.778228759765625, 1.817352294921875, 5.1494903564453125, 1.646484375, 3.690643310546875, 3.85064697265625, 2.3680152893066406, 2.312345504760742], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000047.npy"}
{"epoch": 0.06901615271659324, "step": 48, "batch_size": 64, "mean": 2.720108985900879, "std": 2.682295799255371, "min": -6.2974395751953125, "p10": 0.01788673400878909, "median": 2.2589492797851562, "p90": 6.938107299804687, "max": 9.83245849609375, "pos_frac": 0.90625, "sample": [3.7593536376953125, 0.6459312438964844, 6.875579833984375, 6.96490478515625, 5.780731201171875, 3.1298751831054688, -1.1116485595703125, 2.3195724487304688, 5.0488128662109375, 3.8452987670898438, 1.5587615966796875, 5.140708923339844, 2.0462493896484375, 0.007354736328125, -0.14864349365234375, 1.3589439392089844, 7.07867431640625, 0.06800270080566406, 1.3714218139648438, -0.35608673095703125, 1.1412239074707031, 7.655754089355469, -0.5316276550292969, 2.1919403076171875, 5.765281677246094, 1.4915618896484375, 2.3039169311523438, 3.0126285552978516, 0.042461395263671875, 3.1278152465820312, 1.8955726623535156, 1.6675586700439453, 2.8143157958984375, 1.8392658233642578, 7.191314697265625, 3.9081897735595703, 4.108253479003906, 8.627822875976562, 3.5085296630859375, 0.8557052612304688, 1.1421775817871094, 1.4503116607666016, 1.4158935546875, 2.465038299560547, 1.188140869140625, 3.8537464141845703, 2.4843597412109375, 1.8206329345703125, -1.31243896484375, 7.660484313964844, 2.45208740234375, 2.8612194061279297, 2.109283447265625, 9.83245849609375, 4.8221282958984375, -6.2974395751953125, 5.086376190185547, 1.5745086669921875, 0.7411994934082031, 0.475006103515625, 3.624053955078125, 2.0584030151367188, 2.364093780517578, 2.2139816284179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000048.npy"}
{"epoch": 0.07048458149779736, "step": 49, "batch_size": 64, "mean": 3.2672815322875977, "std": 3.4105348587036133, "min": -3.291961669921875, "p10": 0.048761177062988326, "median": 2.7687034606933594, "p90": 7.769887542724612, "max": 14.187881469726562, "pos_frac": 0.90625, "sample": [3.061359405517578, 0.5331039428710938, 0.35956382751464844, 2.4762039184570312, 2.2668399810791016, 5.526363372802734, 0.8236236572265625, 2.415384292602539, -1.0635986328125, 0.787200927734375, 0.08919334411621094, 1.8609161376953125, 2.6810760498046875, 8.257591247558594, 7.280525207519531, 2.0377960205078125, 3.704692840576172, 1.7175464630126953, 2.8563308715820312, 1.1740188598632812, 1.8127365112304688, -2.9268970489501953, 3.942413330078125, 0.11493110656738281, 11.3323974609375, 9.07684326171875, 1.13873291015625, 5.346580505371094, 4.1695709228515625, 0.7188701629638672, 6.6278228759765625, 14.187881469726562, 3.6599807739257812, 0.822235107421875, -0.7567138671875, -3.291961669921875, 3.0712814331054688, 3.5664215087890625, 3.7712020874023438, 4.057548522949219, 2.6110572814941406, 0.03143310546875, -0.503692626953125, 3.8523025512695312, 4.999977111816406, 12.69439697265625, 2.5288772583007812, 2.0280914306640625, 7.9796142578125, 5.737579345703125, 5.766941070556641, 3.0269813537597656, 3.5719337463378906, 4.695518493652344, 11.772003173828125, 0.18674659729003906, -0.9648895263671875, 2.8778533935546875, 1.7580833435058594, 3.6411819458007812, 4.0842132568359375, 2.207988739013672, 4.065155029296875, 1.1690673828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000049.npy"}
{"epoch": 0.07195301027900147, "step": 50, "batch_size": 64, "mean": 3.5516457557678223, "std": 3.4186770915985107, "min": -2.6736907958984375, "p10": -0.5345260620117187, "median": 2.7652015686035156, "p90": 8.545244598388674, "max": 13.205413818359375, "pos_frac": 0.828125, "sample": [5.324737548828125, -0.0602874755859375, -1.230621337890625, 8.859371185302734, 0.7726936340332031, 1.7912368774414062, 6.938873291015625, 1.8265724182128906, 2.6888389587402344, 1.7584123611450195, -0.5196151733398438, 6.3728790283203125, 2.9356231689453125, 12.702041625976562, 0.3504180908203125, 2.407684326171875, 6.6142120361328125, 2.9626731872558594, 8.70587158203125, 3.5991058349609375, 0.9705066680908203, 5.0768585205078125, 8.170448303222656, 2.1046981811523438, 1.8286399841308594, 4.410486221313477, -0.26572418212890625, -0.5409164428710938, 2.2174949645996094, 9.036117553710938, 5.90325927734375, 4.879451751708984, 9.077255249023438, 13.205413818359375, -2.6736907958984375, 0.652191162109375, 0.9032135009765625, 2.1703033447265625, -0.07048797607421875, 3.70196533203125, 2.47637939453125, 1.2619190216064453, 5.818965911865234, 3.9515151977539062, 6.353031158447266, 6.48406982421875, -0.6119384765625, 1.7945518493652344, 5.217506408691406, -0.5818023681640625, 5.590667724609375, 2.841564178466797, 4.383033752441406, 5.029426574707031, 2.6112327575683594, 5.590766906738281, 6.798408508300781, 2.4178619384765625, 9.373298645019531, -0.9618301391601562, 5.847389221191406, 1.6566238403320312, -1.716400146484375, 0.120880126953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000050.npy"}
{"epoch": 0.07342143906020558, "step": 51, "batch_size": 64, "mean": 3.5724036693573, "std": 4.458898067474365, "min": -5.367889404296875, "p10": -0.28134651184082027, "median": 2.4499435424804688, "p90": 9.279871368408203, "max": 17.663986206054688, "pos_frac": 0.828125, "sample": [8.98614501953125, 3.723236083984375, 0.8275508880615234, -0.22011947631835938, 0.7900161743164062, 0.31085205078125, 8.512184143066406, -0.7489814758300781, 1.7327499389648438, 0.4948883056640625, 10.198211669921875, 7.169902801513672, 1.8056831359863281, 3.9162673950195312, 2.165803909301758, 4.8417510986328125, 2.982837677001953, -0.7646541595458984, 8.5361328125, 5.068553924560547, 3.8701438903808594, -0.1195526123046875, -0.13894271850585938, 3.430461883544922, 1.8848419189453125, 7.347873687744141, 0.3943023681640625, 4.998847961425781, 0.7295722961425781, 4.332496643066406, 17.663986206054688, 2.5602951049804688, 0.6959075927734375, 5.2451171875, 2.968332290649414, 13.0499267578125, 1.434844970703125, 13.100814819335938, 5.11627197265625, 1.7819099426269531, 2.375885009765625, 0.9989814758300781, 3.2911529541015625, 9.405754089355469, 1.1664962768554688, 0.09983062744140625, 4.6843109130859375, -3.8662872314453125, -0.307586669921875, 16.278640747070312, 13.229293823242188, 6.122871398925781, 6.653621673583984, -0.8066482543945312, 1.4365043640136719, 5.605552673339844, 0.06493377685546875, 0.7290267944335938, 2.005645751953125, -5.367889404296875, 2.5240020751953125, -1.1453094482421875, 2.818195343017578, -0.039608001708984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000051.npy"}
{"epoch": 0.07488986784140969, "step": 52, "batch_size": 64, "mean": 5.411087989807129, "std": 4.423210620880127, "min": -2.4843597412109375, "p10": 0.6714458465576173, "median": 4.760313034057617, "p90": 11.151004028320314, "max": 19.1807861328125, "pos_frac": 0.9375, "sample": [1.4709701538085938, 10.752174377441406, 4.098533630371094, 11.1961669921875, 7.149314880371094, 8.209945678710938, 3.052276611328125, 6.502227783203125, 8.13458251953125, 12.117050170898438, 0.03101348876953125, 11.455886840820312, 4.8309783935546875, 5.005653381347656, 8.64564323425293, 7.610250473022461, 2.531879425048828, 4.9181976318359375, 15.269485473632812, 9.26434326171875, 3.1796398162841797, 0.6301498413085938, 11.045623779296875, 2.741016387939453, 0.044933319091796875, 6.729957580566406, 2.360321044921875, 1.8827590942382812, 7.953386306762695, 19.1807861328125, 8.867889404296875, 2.0815200805664062, 8.888664245605469, 4.689647674560547, 3.7131195068359375, 12.473556518554688, 4.04852294921875, -0.6497955322265625, 4.193572998046875, -2.4843597412109375, -2.0307464599609375, 1.9537277221679688, 6.55573844909668, 5.507743835449219, 1.3355598449707031, 8.824005126953125, 3.9224853515625, 10.144622802734375, 6.7519683837890625, 1.6406116485595703, 0.7678031921386719, 2.703094482421875, 3.2714004516601562, 5.061363220214844, 3.05010986328125, 8.991798400878906, 5.662750244140625, 1.5444107055664062, 15.90618896484375, -1.4978866577148438, 5.502738952636719, 3.6881675720214844, 2.3110694885253906, 0.9234333038330078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000052.npy"}
{"epoch": 0.0763582966226138, "step": 53, "batch_size": 64, "mean": 5.484249114990234, "std": 5.811614036560059, "min": -2.352142333984375, "p10": 0.47873916625976565, "median": 3.8004379272460938, "p90": 13.809008789062503, "max": 26.019927978515625, "pos_frac": 0.921875, "sample": [-1.4273433685302734, 3.27252197265625, 3.9656143188476562, 2.420166015625, 0.8875503540039062, 3.6748085021972656, 12.339614868164062, 0.19185638427734375, 7.262626647949219, -1.4532623291015625, 1.85955810546875, 25.00970458984375, -2.352142333984375, 1.1058273315429688, 5.780357360839844, 7.105533599853516, 2.2113189697265625, 9.91180419921875, 2.7956619262695312, 10.590629577636719, 3.788066864013672, 5.6917877197265625, 11.827133178710938, 6.554847717285156, 14.027252197265625, 0.5918960571289062, 4.7135772705078125, 0.47104644775390625, 5.5041656494140625, 1.7671642303466797, 15.37982177734375, 7.7661590576171875, 6.291431427001953, 8.006172180175781, 3.8128089904785156, 0.4966888427734375, 0.7252769470214844, 3.9423789978027344, 1.2129936218261719, 4.026679992675781, 2.8493118286132812, 6.967519760131836, 1.8590927124023438, 13.299774169921875, -2.0651931762695312, 15.057098388671875, 16.906341552734375, 3.1031265258789062, 4.0637359619140625, 2.961627960205078, 1.7830657958984375, 3.916400909423828, 2.52435302734375, 1.77301025390625, 3.7868576049804688, 4.076770782470703, 2.9311981201171875, 26.019927978515625, 2.5110397338867188, 13.1112060546875, 4.6240081787109375, 3.2169418334960938, 14.584831237792969, -0.6198463439941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000053.npy"}
{"epoch": 0.07782672540381791, "step": 54, "batch_size": 64, "mean": 4.783502578735352, "std": 4.476994514465332, "min": -0.97015380859375, "p10": 0.5653587341308594, "median": 3.8231658935546875, "p90": 10.56023941040039, "max": 20.3184814453125, "pos_frac": 0.921875, "sample": [7.164070129394531, 4.217742919921875, 7.493171691894531, 0.7285346984863281, 0.9668502807617188, 2.4121856689453125, 1.177642822265625, 2.475862503051758, 5.447265625, 7.287700653076172, 0.8489246368408203, 5.186004638671875, 10.610641479492188, 2.285341262817383, 3.6014251708984375, 10.172882080078125, -0.07115554809570312, 4.364337921142578, 4.732706069946289, 5.235176086425781, -0.014942169189453125, 1.6258010864257812, 14.520408630371094, 0.594940185546875, 7.219917297363281, 0.472259521484375, 4.169559478759766, 4.406303405761719, 5.5215606689453125, 5.675178527832031, 1.3898601531982422, 0.5526809692382812, 7.126472473144531, -0.15752792358398438, 0.8302974700927734, 3.136371612548828, 11.967903137207031, 3.2358970642089844, 5.024085998535156, 12.652740478515625, 1.9077701568603516, 2.5778961181640625, 20.3184814453125, 10.442634582519531, -0.97015380859375, 2.597240447998047, -0.020965576171875, 11.736907958984375, 0.6072711944580078, 4.0449066162109375, 1.3527908325195312, 4.195613861083984, 9.81988525390625, 1.0833663940429688, 2.28338623046875, 16.652374267578125, 0.8555660247802734, 4.110101699829102, 2.697052001953125, 9.575332641601562, 2.3955631256103516, 10.374053955078125, 1.2194194793701172, 10.000572204589844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000054.npy"}
{"epoch": 0.07929515418502203, "step": 55, "batch_size": 64, "mean": 5.739418983459473, "std": 6.457626819610596, "min": -9.5828857421875, "p10": -1.1265609741210936, "median": 5.1821746826171875, "p90": 14.347319793701171, "max": 25.52313232421875, "pos_frac": 0.828125, "sample": [7.91351318359375, 9.039871215820312, 4.770195007324219, -3.070587158203125, 22.28424072265625, -2.3770980834960938, 6.729541778564453, 4.156253814697266, 14.883338928222656, -0.38945770263671875, 5.943992614746094, 5.475894927978516, 1.9337081909179688, 9.757278442382812, 17.430831909179688, 8.569992065429688, 4.184661865234375, 8.759857177734375, 25.52313232421875, 3.9289817810058594, 11.23297119140625, -1.3271484375, 4.222129821777344, 7.405891418457031, 21.137741088867188, 0.26033592224121094, 14.308349609375, 0.28499603271484375, 0.4656658172607422, 7.336353302001953, 3.857391357421875, -1.6635284423828125, 5.788948059082031, 0.22574806213378906, 10.841033935546875, 3.605010986328125, -1.0082511901855469, 0.5089569091796875, 0.24942970275878906, -4.2782135009765625, 8.52878189086914, 3.0333786010742188, 1.927154541015625, 13.700439453125, 6.665218353271484, -1.1772651672363281, 0.8115310668945312, 9.667327880859375, 5.450159072875977, -9.5828857421875, 2.3686161041259766, 14.364021301269531, 5.689056396484375, 3.5127792358398438, 7.100055694580078, 5.1398773193359375, -0.5732002258300781, 8.1822509765625, 7.228240966796875, 12.413406372070312, 5.2244720458984375, -0.3330726623535156, 4.42266845703125, 14.657852172851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000055.npy"}
{"epoch": 0.08076358296622614, "step": 56, "batch_size": 64, "mean": 5.461906433105469, "std": 6.40669584274292, "min": -8.48746109008789, "p10": -1.4576095581054687, "median": 5.224672317504883, "p90": 13.871589660644537, "max": 24.472976684570312, "pos_frac": 0.828125, "sample": [-1.3777999877929688, 6.9513092041015625, -2.3592987060546875, 7.457000732421875, -0.37066078186035156, -8.48746109008789, 9.905662536621094, -1.4918136596679688, 3.786266326904297, -3.5986862182617188, 3.3298492431640625, 2.1512680053710938, 6.523643493652344, 14.384490966796875, 2.0059032440185547, 0.09862518310546875, -0.3867301940917969, 9.147956848144531, 8.978130340576172, 6.920463562011719, -2.083179473876953, 9.494552612304688, 2.2523422241210938, 20.047332763671875, 7.106964111328125, 14.7088623046875, 1.115732192993164, 0.041637420654296875, 9.104230880737305, 2.4046707153320312, 6.8402862548828125, 6.758392333984375, 8.32904052734375, 1.438690185546875, 10.207725524902344, 11.227928161621094, 0.6791191101074219, 17.196044921875, 6.986064910888672, 5.711761474609375, 5.667640686035156, 4.319568634033203, 3.069852828979492, 18.947235107421875, 7.011077880859375, 11.761062622070312, 5.758886337280273, 9.886947631835938, 0.8617820739746094, 24.472976684570312, 1.0197925567626953, 0.02703094482421875, 4.066307067871094, 12.674819946289062, -5.9561767578125, 2.6937923431396484, -0.6662445068359375, 20.260414123535156, 4.894275665283203, -3.2195205688476562, 3.2938461303710938, 2.0112533569335938, 5.5550689697265625, 8.013999938964844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000056.npy"}
{"epoch": 0.08223201174743025, "step": 57, "batch_size": 64, "mean": 6.123730182647705, "std": 5.664114475250244, "min": -8.65020751953125, "p10": 0.1048212051391603, "median": 5.812950134277344, "p90": 13.373243331909181, "max": 20.917526245117188, "pos_frac": 0.921875, "sample": [7.745433807373047, 3.007070541381836, 0.6629714965820312, -2.4545135498046875, 9.749099731445312, -8.65020751953125, 10.878250122070312, 0.2445507049560547, 11.765350341796875, 5.9770355224609375, 15.719955444335938, 3.2582054138183594, 20.917526245117188, 2.8892059326171875, 6.953834533691406, 5.64886474609375, -0.7502517700195312, 3.5164527893066406, 6.454963684082031, 18.106170654296875, 10.064544677734375, 15.747634887695312, 10.547439575195312, 3.635639190673828, 6.404937744140625, 1.5121746063232422, 13.076587677001953, 4.741899490356445, 3.916980743408203, 2.5631637573242188, 1.20489501953125, 4.208000183105469, 2.9806365966796875, 0.02935028076171875, 6.730560302734375, 6.7663421630859375, 0.0449371337890625, 1.6090202331542969, 2.4732894897460938, 8.289390563964844, 9.888656616210938, 1.9785919189453125, 9.457084655761719, 4.842567443847656, 8.198226928710938, -2.2121620178222656, 8.48061752319336, 6.724266052246094, 8.611968994140625, 3.0862808227539062, 2.134449005126953, 16.942581176757812, 5.412410736083984, 2.7899627685546875, 7.1648406982421875, 4.258369445800781, 0.6883907318115234, 11.2506103515625, 11.649147033691406, -4.9671630859375, 13.500381469726562, 18.69219970703125, 7.147520065307617, 8.011543273925781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000057.npy"}
{"epoch": 0.08370044052863436, "step": 58, "batch_size": 64, "mean": 6.960675239562988, "std": 7.283979892730713, "min": -8.165863037109375, "p10": -0.15657005310058586, "median": 4.8431396484375, "p90": 17.01222686767579, "max": 26.385833740234375, "pos_frac": 0.875, "sample": [14.90240478515625, -1.6394309997558594, 0.7983589172363281, 4.182563781738281, 3.5980072021484375, 4.165802001953125, 19.116683959960938, 2.1693649291992188, 14.039878845214844, 2.7689285278320312, 5.495014190673828, 6.4330902099609375, 7.8250885009765625, 3.705904006958008, 2.7046279907226562, 12.3720703125, 10.735305786132812, 8.766242980957031, 4.191265106201172, 2.8262081146240234, 7.040924072265625, 13.865676879882812, 13.243804931640625, -1.6291351318359375, 11.291770935058594, 1.6610641479492188, 9.061782836914062, 6.723325729370117, 21.51044464111328, 3.7341251373291016, 26.385833740234375, 8.068038940429688, 6.619495391845703, -0.07796478271484375, 14.702957153320312, 0.8828525543212891, 13.767471313476562, 3.548694610595703, 2.70098876953125, -2.149993896484375, 1.5605487823486328, 2.270893096923828, 21.597991943359375, -1.9931564331054688, 10.025894165039062, -0.19025802612304688, 1.4699535369873047, 7.37237548828125, -4.228546142578125, 23.3475341796875, 2.078348159790039, 6.268451690673828, 0.8422393798828125, 14.359081268310547, 10.621341705322266, -8.165863037109375, 22.395309448242188, 1.39453125, 2.2394180297851562, 6.60980224609375, 15.345657348632812, 1.7395401000976562, 0.6861000061035156, 17.726470947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000058.npy"}
{"epoch": 0.08516886930983847, "step": 59, "batch_size": 64, "mean": 7.362071514129639, "std": 7.624090671539307, "min": -5.909423828125, "p10": -1.2778446197509765, "median": 6.645444869995117, "p90": 18.62580108642578, "max": 32.22602844238281, "pos_frac": 0.875, "sample": [8.322715759277344, 7.982383728027344, 14.810272216796875, 19.03460693359375, 15.112266540527344, 4.192024230957031, 32.22602844238281, 20.020721435546875, 7.333362579345703, 15.899154663085938, 0.7549095153808594, 6.779186248779297, -3.72528076171875, 2.388568878173828, 21.0130615234375, -1.3303718566894531, 2.4608755111694336, 13.576919555664062, 7.2828369140625, 19.564468383789062, 0.9286270141601562, 2.2508392333984375, 0.9340591430664062, 1.0956382751464844, 3.7268218994140625, 24.861679077148438, 2.731578826904297, 1.9985675811767578, 7.909767150878906, -5.909423828125, 9.510711669921875, -2.449634552001953, 7.833000183105469, 8.2703857421875, 8.685600280761719, 17.53736114501953, 2.3358535766601562, 11.472755432128906, 6.840293884277344, 3.7067184448242188, -5.557014465332031, -3.1932601928710938, 6.032297134399414, -1.1552810668945312, 4.066526412963867, 9.750473022460938, 7.062152862548828, 3.212127685546875, 7.299468994140625, 2.8349266052246094, 2.2858657836914062, 5.761741638183594, 2.775146484375, 8.898868560791016, 5.416900634765625, 14.567001342773438, 9.305381774902344, 3.4055557250976562, 2.5939178466796875, -2.2538681030273438, 16.506366729736328, 18.317916870117188, 6.5117034912109375, 18.75775146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000059.npy"}
{"epoch": 0.08663729809104258, "step": 60, "batch_size": 64, "mean": 5.937168121337891, "std": 6.694998741149902, "min": -14.41754150390625, "p10": -1.6272411346435547, "median": 6.3509016036987305, "p90": 12.099561309814453, "max": 25.782875061035156, "pos_frac": 0.828125, "sample": [7.500698089599609, -3.3667755126953125, 1.581705093383789, 11.620353698730469, 0.5745086669921875, 2.9387454986572266, 10.821216583251953, 8.26580810546875, 2.0553207397460938, -14.41754150390625, 6.664552688598633, -1.7320938110351562, 1.936594009399414, 3.869688034057617, 10.527778625488281, 5.880584716796875, 4.3164215087890625, 10.505821228027344, 3.6941757202148438, 10.327621459960938, 13.380477905273438, 11.97528076171875, 17.6448974609375, 8.543083190917969, 8.590179443359375, 7.445274353027344, -0.6603069305419922, 6.229988098144531, 10.095550537109375, 0.14325904846191406, 8.882671356201172, 18.407608032226562, 3.5217552185058594, 6.099315643310547, 8.155887603759766, -1.6035423278808594, 9.683059692382812, 9.058761596679688, 2.0871658325195312, -4.818183898925781, 10.9786376953125, 2.9487838745117188, 5.357078552246094, 8.242759704589844, 4.5932159423828125, -0.9718132019042969, -10.16384506225586, 15.839736938476562, 19.643096923828125, 11.896820068359375, 12.152824401855469, 2.51812744140625, -1.6976051330566406, -1.6373977661132812, 10.220975875854492, 6.47181510925293, 25.782875061035156, 9.069938659667969, 1.4972763061523438, 8.223104476928711, -0.7335968017578125, 7.9198455810546875, 4.283702850341797, 1.1150646209716797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000060.npy"}
{"epoch": 0.0881057268722467, "step": 61, "batch_size": 64, "mean": 7.258552074432373, "std": 9.79314136505127, "min": -9.294906616210938, "p10": -1.1030864715576172, "median": 5.387714385986328, "p90": 17.715953826904297, "max": 54.39056396484375, "pos_frac": 0.828125, "sample": [2.7423458099365234, 1.3120002746582031, -1.2756805419921875, 1.4614791870117188, 14.113899230957031, 0.17702293395996094, 0.28493309020996094, 7.842464447021484, -0.948028564453125, 26.841354370117188, 21.229515075683594, 19.409393310546875, 27.158187866210938, 12.469512939453125, 6.9866180419921875, 8.771507263183594, -2.032745361328125, 1.4881629943847656, 11.604541778564453, 11.085708618164062, 3.9116134643554688, 5.074520111083984, 14.546234130859375, 8.368968963623047, 6.021919250488281, 6.71684455871582, 5.318733215332031, 5.734458923339844, 8.763816833496094, 8.254745483398438, -1.0676116943359375, 0.5185546875, 12.605278015136719, 16.178802490234375, 15.777069091796875, 21.008316040039062, 17.367828369140625, 1.2127456665039062, -0.862060546875, 13.628585815429688, 3.008533477783203, 0.13791465759277344, 11.220367431640625, 2.577709197998047, 1.2829647064208984, 17.865150451660156, 1.243011474609375, 3.273883819580078, 54.39056396484375, 12.775321960449219, 12.202255249023438, -0.746429443359375, -9.294906616210938, 11.730125427246094, 2.2157974243164062, -4.637901306152344, 5.456695556640625, 3.7058181762695312, -7.4734039306640625, 2.815998077392578, 7.031524658203125, -6.0870208740234375, -1.1182899475097656, 1.1700859069824219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000061.npy"}
{"epoch": 0.08957415565345081, "step": 62, "batch_size": 64, "mean": 5.524896621704102, "std": 7.01943826675415, "min": -8.648681640625, "p10": -2.8673089981079096, "median": 5.220463752746582, "p90": 14.511708068847659, "max": 28.301368713378906, "pos_frac": 0.859375, "sample": [8.556774139404297, 1.4826736450195312, 0.5866317749023438, 10.502082824707031, 5.376943588256836, 11.299179077148438, 6.89094352722168, 0.893157958984375, 9.608695983886719, -5.395050048828125, 14.721908569335938, 5.772529602050781, 1.8425979614257812, 0.02208709716796875, 9.159660339355469, 3.412769317626953, 9.539306640625, 5.300506591796875, 0.1614990234375, 5.224334716796875, 6.887947082519531, 7.154689788818359, 1.810333251953125, 4.908092498779297, -4.0428619384765625, 8.532257080078125, 4.133296966552734, 7.954936981201172, -0.4099903106689453, 28.301368713378906, 17.204498291015625, 8.036201477050781, 5.81524658203125, 2.339447021484375, 1.8145751953125, 5.6868743896484375, 1.7879791259765625, 17.094467163085938, -8.648681640625, -3.7052078247070312, -5.955780029296875, 0.8525276184082031, 1.4433937072753906, 0.8811454772949219, 1.1666717529296875, 8.793445587158203, 16.363758087158203, 9.478034973144531, -3.085966110229492, 14.021240234375, 8.469799041748047, 5.216592788696289, -6.192962646484375, 3.390169143676758, 12.961551666259766, 3.0258102416992188, 3.1953697204589844, 4.0953521728515625, 25.843276977539062, 19.538742065429688, 7.03864860534668, 1.2868156433105469, -2.3571090698242188, 6.508148193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000062.npy"}
{"epoch": 0.09104258443465492, "step": 63, "batch_size": 64, "mean": 7.485637664794922, "std": 7.910577297210693, "min": -12.992572784423828, "p10": -0.36663284301757765, "median": 6.831754684448242, "p90": 16.559085083007815, "max": 31.837738037109375, "pos_frac": 0.890625, "sample": [24.701812744140625, 8.00262451171875, 4.646566390991211, 9.524723052978516, 2.452972412109375, 0.8351669311523438, 4.597587585449219, -6.8447265625, 6.341087341308594, 6.634616851806641, -12.992572784423828, 16.7130126953125, 9.349456787109375, 7.028892517089844, 2.8032760620117188, -1.0459423065185547, 16.199920654296875, 4.150690078735352, 11.928848266601562, 20.87340545654297, 11.963973999023438, 13.688453674316406, 5.877349853515625, 0.10333251953125, -0.5680465698242188, 12.045129776000977, -4.482612609863281, 0.27256011962890625, 5.009727478027344, 3.407114028930664, 11.423141479492188, 8.130226135253906, 8.062568664550781, 9.187259674072266, 10.536247253417969, 12.284347534179688, 13.659744262695312, 13.286476135253906, 5.106895446777344, 2.5273971557617188, 0.9323577880859375, 13.57330322265625, 2.401548385620117, 6.107929229736328, -10.49920654296875, 13.512014389038086, 7.1820526123046875, 1.295074462890625, 1.8016815185546875, 5.695953369140625, 2.9959716796875, 13.827400207519531, 17.533096313476562, 8.13079833984375, 17.700721740722656, 0.6702651977539062, 8.380697250366211, 31.837738037109375, -3.1706466674804688, 14.441234588623047, 4.8003082275390625, 25.1739501953125, 5.801856994628906, 11.532005310058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000063.npy"}
{"epoch": 0.09251101321585903, "step": 64, "batch_size": 64, "mean": 8.758337020874023, "std": 9.909212112426758, "min": -16.666961669921875, "p10": -0.7858884811401364, "median": 7.340604782104492, "p90": 21.024116516113285, "max": 46.760498046875, "pos_frac": 0.8125, "sample": [1.7029495239257812, 17.394744873046875, 26.986373901367188, 7.556148529052734, 4.84834098815918, 12.217491149902344, 3.240081787109375, 16.438087463378906, -0.2341899871826172, 6.52379035949707, 2.000164031982422, 10.43023681640625, 0.7709579467773438, 4.849945068359375, 12.24481201171875, 29.34869384765625, 4.596305847167969, -1.8371562957763672, 16.990097045898438, 7.519325256347656, 11.466598510742188, -3.2168426513671875, 27.998794555664062, 8.008865356445312, -0.28986358642578125, 6.444843292236328, -0.9086475372314453, 6.595989227294922, -16.666961669921875, -0.4697990417480469, 7.607288360595703, -0.49945068359375, 46.760498046875, 5.519100189208984, 2.229524612426758, 10.540725708007812, 17.08013916015625, 19.957015991210938, 10.358917236328125, 5.928302764892578, 5.753530502319336, 23.05016326904297, 11.707023620605469, 8.441276550292969, -1.2991485595703125, -9.410514831542969, 23.4808349609375, 3.2678070068359375, 21.4814453125, 0.2619476318359375, 15.372871398925781, 13.166122436523438, 11.671928405761719, -1.3052825927734375, 12.812713623046875, -0.3718414306640625, 4.7322998046875, 7.161884307861328, 13.292129516601562, 12.827590942382812, 1.6077804565429688, 11.4898681640625, 18.425689697265625, 4.883197784423828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000064.npy"}
{"epoch": 0.09397944199706314, "step": 65, "batch_size": 64, "mean": 8.849407196044922, "std": 9.720344543457031, "min": -22.24115753173828, "p10": -0.616684341430664, "median": 7.15633487701416, "p90": 22.604815673828128, "max": 30.037887573242188, "pos_frac": 0.875, "sample": [30.037887573242188, 24.27740478515625, 16.600662231445312, 12.517730712890625, 5.549161911010742, 9.339881896972656, 0.6753616333007812, 12.766765594482422, 3.490215301513672, 20.16883087158203, 16.233963012695312, 1.4378623962402344, -0.47119903564453125, 23.068603515625, 28.056396484375, 0.23386001586914062, 9.99068832397461, 2.3719959259033203, -4.0348052978515625, 4.855934143066406, 1.1144046783447266, 17.731201171875, 5.912864685058594, -22.24115753173828, 16.193878173828125, 7.87310791015625, -4.2362518310546875, 7.204387664794922, 14.011909484863281, 5.026287078857422, 5.945426940917969, 5.681266784667969, -1.7587242126464844, 5.230152130126953, 28.34454345703125, 6.930576324462891, 21.52264404296875, 15.279064178466797, 24.92792510986328, 7.509119033813477, 0.1097259521484375, -10.611259460449219, 8.892425537109375, 2.1167469024658203, 6.340789794921875, 18.404815673828125, 23.627357482910156, -1.3054580688476562, 6.765960693359375, 19.720993041992188, 11.239700317382812, 3.920684814453125, 20.308441162109375, -0.6790351867675781, 12.578826904296875, 7.5952911376953125, 5.3663482666015625, 2.5346832275390625, 17.45258331298828, 11.152618408203125, 0.24939727783203125, 8.008529663085938, 0.09377861022949219, 7.108282089233398], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000065.npy"}
{"epoch": 0.09544787077826726, "step": 66, "batch_size": 64, "mean": 8.239856719970703, "std": 9.795476913452148, "min": -8.536376953125, "p10": -0.7811008453369136, "median": 6.427303314208984, "p90": 18.879657363891603, "max": 46.99446105957031, "pos_frac": 0.859375, "sample": [7.446739196777344, 46.99446105957031, 0.69781494140625, 14.779182434082031, 13.737640380859375, 12.121978759765625, 2.3297290802001953, -0.34705352783203125, 10.384958267211914, 6.5466461181640625, -8.536376953125, 2.409055709838867, 6.189533233642578, -0.9671211242675781, 12.045799255371094, 3.2483367919921875, 4.649658203125, 0.15288162231445312, 2.3184738159179688, 18.743240356445312, 7.328575134277344, 10.483057022094727, 0.6119880676269531, 10.560997009277344, 17.495582580566406, 3.2617645263671875, 4.3423614501953125, -2.15765380859375, -3.9257144927978516, -7.635650634765625, 10.27835464477539, 17.51862335205078, 18.938121795654297, 7.420280456542969, 3.6919097900390625, 11.419296264648438, 6.307960510253906, -0.34487342834472656, 2.2108497619628906, 14.130935668945312, 11.209854125976562, 3.8775711059570312, 0.097747802734375, 6.8609619140625, 5.931392669677734, -8.328559875488281, 10.151687622070312, 1.6986961364746094, -4.067878723144531, 13.638031005859375, 4.436820983886719, 19.330322265625, 26.950469970703125, 22.435874938964844, 4.2880859375, 16.779541015625, 2.0994033813476562, 8.301555633544922, 15.577911376953125, 2.657928466796875, 31.894668579101562, 1.0135688781738281, 30.129486083984375, 13.503372192382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000066.npy"}
{"epoch": 0.09691629955947137, "step": 67, "batch_size": 64, "mean": 8.440168380737305, "std": 9.773744583129883, "min": -11.538116455078125, "p10": -0.9826553344726556, "median": 5.99215030670166, "p90": 20.833009338378908, "max": 45.8328857421875, "pos_frac": 0.859375, "sample": [22.053115844726562, 3.5303192138671875, 21.105140686035156, 0.27748870849609375, 2.002655029296875, 0.0134429931640625, 15.638816833496094, 5.243406295776367, -6.343666076660156, 2.1799678802490234, 17.065093994140625, 10.198875427246094, 4.2890472412109375, 15.862052917480469, -0.33405303955078125, -5.024806976318359, 9.773651123046875, 9.390731811523438, 1.317291259765625, -11.538116455078125, 20.691558837890625, 8.04644775390625, 0.37191009521484375, -0.068145751953125, 5.874244689941406, 2.6580429077148438, 20.893630981445312, 6.110055923461914, -1.2606277465820312, 6.6395416259765625, 3.280242919921875, 14.205333709716797, 6.703330993652344, 2.8117923736572266, 2.1319503784179688, 9.574592590332031, 15.078279495239258, 5.571245193481445, 1.400360107421875, -5.0413055419921875, 5.37445068359375, 17.976181030273438, 4.3790283203125, 3.7125587463378906, 25.875823974609375, 14.81829833984375, 6.519815444946289, 18.109481811523438, 14.259056091308594, 14.192840576171875, 12.496116638183594, 5.069122314453125, 11.747991561889648, 31.72967529296875, 5.748252868652344, 0.8353595733642578, 45.8328857421875, 3.6195106506347656, -2.589202880859375, 25.036956787109375, 20.061782836914062, 7.139835357666016, -2.9527435302734375, 8.804765701293945], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000067.npy"}
{"epoch": 0.09838472834067548, "step": 68, "batch_size": 64, "mean": 8.181711196899414, "std": 8.226612091064453, "min": -6.4801788330078125, "p10": 0.2630773544311526, "median": 5.993472099304199, "p90": 20.47516555786133, "max": 36.070159912109375, "pos_frac": 0.90625, "sample": [-4.660896301269531, 3.0965652465820312, 25.06597900390625, 8.180770874023438, 5.702083587646484, -6.33831787109375, 8.2642822265625, 22.491111755371094, 1.3354873657226562, 0.5248737335205078, 13.677902221679688, 15.566146850585938, 17.488449096679688, 11.472824096679688, 10.068267822265625, 0.15087890625, 6.338932037353516, 13.031963348388672, 3.6750411987304688, 7.7253570556640625, -1.0388031005859375, 4.799352645874023, 4.926670074462891, 2.35675048828125, 7.128276824951172, 13.114128112792969, 18.738021850585938, 8.574783325195312, 8.776847839355469, 19.65337371826172, 20.827362060546875, 21.914066314697266, 1.8187637329101562, 6.2587127685546875, 9.936996459960938, 1.7198238372802734, -0.1806468963623047, 5.728231430053711, 2.08880615234375, 5.1177520751953125, 16.231204986572266, 2.8135986328125, 10.758533477783203, -6.4801788330078125, 7.416450500488281, 5.405097961425781, 3.840362548828125, 21.382164001464844, 3.316986083984375, 1.4008255004882812, 1.455038070678711, 36.070159912109375, 24.68366241455078, 17.396804809570312, 13.250251770019531, 1.0840072631835938, 2.075084686279297, 5.48979377746582, 12.173721313476562, -2.308746337890625, 5.6685638427734375, 2.424091339111328, 5.417217254638672, 7.54779052734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000068.npy"}
{"epoch": 0.09985315712187959, "step": 69, "batch_size": 64, "mean": 9.668987274169922, "std": 9.481657981872559, "min": -4.6898651123046875, "p10": -0.2681427001953124, "median": 6.995128631591797, "p90": 20.486595153808597, "max": 43.473358154296875, "pos_frac": 0.875, "sample": [4.22479248046875, 6.584953308105469, 30.43212890625, 8.732917785644531, 12.065399169921875, 15.789024353027344, -4.606292724609375, -1.25531005859375, -0.153106689453125, 3.9714126586914062, 5.66339111328125, 8.839855194091797, -1.23516845703125, 8.99905014038086, 7.469135284423828, 14.870624542236328, 26.1024169921875, 6.525505065917969, 16.641555786132812, 15.577392578125, -0.8164253234863281, 24.494300842285156, 13.535736083984375, 10.456008911132812, 18.947113037109375, 14.155471801757812, 4.2408599853515625, 15.102287292480469, 12.615745544433594, 5.587717056274414, 0.8584079742431641, 0.8925514221191406, 18.994728088378906, 43.473358154296875, 4.637676239013672, 19.877395629882812, 5.499603271484375, 15.211700439453125, 16.66968536376953, 0.853851318359375, 2.0217666625976562, 16.598175048828125, 10.993392944335938, 1.339813232421875, -0.8395919799804688, 14.10635757446289, 6.904396057128906, 36.209983825683594, 3.7904510498046875, 7.897584915161133, 6.302925109863281, -4.6898651123046875, 5.93914794921875, 4.150398254394531, 2.4200973510742188, 7.0858612060546875, 20.79571533203125, 20.7476806640625, 1.0400390625, 18.15606689453125, 1.5331897735595703, 1.2922515869140625, 4.809314727783203, -0.31744384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000069.npy"}
{"epoch": 0.1013215859030837, "step": 70, "batch_size": 64, "mean": 10.476768493652344, "std": 10.976624488830566, "min": -9.244384765625, "p10": -1.228683471679687, "median": 9.992218017578125, "p90": 25.026464843750006, "max": 40.26800537109375, "pos_frac": 0.84375, "sample": [6.441108703613281, 1.5677947998046875, 11.09823989868164, 19.70343780517578, 23.553619384765625, 12.04942512512207, -7.907249450683594, -1.54833984375, 12.464736938476562, 4.033483505249023, -1.4087448120117188, 13.460517883300781, -2.6362037658691406, 8.061737060546875, 37.533721923828125, 1.8637752532958984, 11.282363891601562, 25.657684326171875, 11.371973037719727, 40.26800537109375, 11.261734008789062, 9.779525756835938, 2.2384567260742188, 28.52496337890625, 0.41889190673828125, 17.516014099121094, 19.245513916015625, 13.286500930786133, -2.052854537963867, 7.263051986694336, 5.847587585449219, 27.09345245361328, 1.9497642517089844, 0.6972236633300781, 1.1364669799804688, -0.15876388549804688, 14.081958770751953, 2.6405563354492188, 11.83721923828125, -1.9583797454833984, 2.760955810546875, 3.8347911834716797, -0.42221832275390625, 2.9757080078125, 10.853700637817383, 2.7167205810546875, 19.020225524902344, 22.87964630126953, -9.244384765625, 5.069999694824219, 17.633987426757812, 3.3520736694335938, 1.5598392486572266, 32.932525634765625, 13.248695373535156, -0.8085403442382812, 34.567352294921875, 10.204910278320312, 22.06720733642578, 16.658992767333984, 3.5499820709228516, 22.9769287109375, 13.715957641601562, 20.84814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000070.npy"}
{"epoch": 0.1027900146842878, "step": 71, "batch_size": 64, "mean": 12.117111206054688, "std": 11.999532699584961, "min": -9.489480972290039, "p10": 0.6195663452148453, "median": 10.725912094116211, "p90": 23.169992065429696, "max": 50.33636474609375, "pos_frac": 0.890625, "sample": [-5.564811706542969, 40.86187744140625, 6.707279205322266, 2.1231842041015625, 5.668216705322266, 12.57595443725586, 6.402133941650391, 12.336235046386719, 17.929946899414062, 5.790241241455078, 20.842910766601562, 6.893260955810547, 17.669937133789062, 4.417142868041992, 2.4780807495117188, 6.933319091796875, 12.080036163330078, 15.472259521484375, -0.1421051025390625, 2.251628875732422, 3.8969497680664062, 43.76173400878906, 7.269111633300781, 6.328147888183594, 15.89337158203125, 16.427352905273438, 5.885158538818359, 12.557579040527344, 13.061668395996094, -8.390975952148438, 4.568603515625, 13.840118408203125, 11.27471923828125, -0.1298999786376953, 5.02672004699707, 17.555374145507812, 19.37085723876953, -0.02484130859375, 21.361160278320312, 16.042015075683594, 5.174263000488281, 13.485908508300781, 6.8602752685546875, 18.009078979492188, 14.6356201171875, 18.05797576904297, 4.704418182373047, 36.288482666015625, 48.27777099609375, 50.33636474609375, 2.55859375, 23.945205688476562, -9.489480972290039, -1.0905590057373047, 13.41845703125, 6.262687683105469, 7.751972198486328, 10.881511688232422, 19.732086181640625, 16.50806427001953, 30.198394775390625, 3.0691375732421875, 6.046979904174805, 10.5703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000071.npy"}
{"epoch": 0.10425844346549193, "step": 72, "batch_size": 64, "mean": 12.761024475097656, "std": 13.074019432067871, "min": -14.089691162109375, "p10": -2.1346458435058566, "median": 10.784193992614746, "p90": 32.09220123291017, "max": 55.10345458984375, "pos_frac": 0.890625, "sample": [18.166954040527344, 14.91963005065918, 6.134056091308594, 12.610836029052734, 5.735633850097656, 3.5860671997070312, 9.27835464477539, 26.768203735351562, -14.089691162109375, 24.970001220703125, 14.753170013427734, 13.979816436767578, 7.915243148803711, 0.538787841796875, 17.253246307373047, 9.325740814208984, 12.477516174316406, 25.191864013671875, 14.612518310546875, 33.62275695800781, 39.40754699707031, -3.3268566131591797, 6.598869323730469, 10.522184371948242, 16.894493103027344, 3.74884033203125, 18.47760772705078, 10.160400390625, 11.04620361328125, 16.45575714111328, 3.1367416381835938, 5.6684722900390625, 11.808456420898438, -13.160842895507812, 35.26194763183594, 28.520904541015625, 19.380355834960938, 6.703563690185547, 22.620101928710938, -3.7938079833984375, 8.4920654296875, 40.4989013671875, 55.10345458984375, -7.617919921875, 9.725324630737305, 25.85662841796875, 7.68316650390625, 7.767719268798828, 0.9906940460205078, 34.42466735839844, -10.13427734375, 4.028022766113281, 2.5602035522460938, 18.120452880859375, 12.796112060546875, 19.796829223632812, 10.298439025878906, 15.743232727050781, 35.726165771484375, 12.13227653503418, 6.679222106933594, 3.4870986938476562, -3.2804031372070312, 1.9458427429199219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000072.npy"}
{"epoch": 0.10572687224669604, "step": 73, "batch_size": 64, "mean": 12.9429292678833, "std": 18.591745376586914, "min": -25.261825561523438, "p10": -8.75775909423828, "median": 10.025529861450195, "p90": 33.505692672729495, "max": 67.05513000488281, "pos_frac": 0.78125, "sample": [30.779083251953125, -13.465042114257812, 7.479442596435547, 14.684890747070312, 7.3932647705078125, 35.37023162841797, -6.038055419921875, 25.35833740234375, 7.54766845703125, -0.349578857421875, 28.61553955078125, 33.289241790771484, 24.188316345214844, -1.4833145141601562, 0.40594482421875, -14.64947509765625, 56.536956787109375, -1.8878402709960938, 10.461418151855469, 19.519760131835938, 16.763214111328125, 8.10769271850586, 10.465675354003906, 1.1250743865966797, 2.04345703125, 9.589641571044922, -18.188190460205078, 11.128650665283203, 29.67431640625, 15.52825927734375, 13.250381469726562, 2.9685935974121094, 57.658416748046875, -11.294479370117188, 5.201145172119141, 31.005523681640625, 31.64948272705078, -9.344442367553711, 4.341587066650391, 2.7031726837158203, -2.2555389404296875, -5.5533294677734375, -9.178421020507812, 7.101797103881836, 16.40802001953125, 15.82757568359375, 2.6898880004882812, 5.539579391479492, 21.181434631347656, 43.89390563964844, 14.91961669921875, 6.847202301025391, 22.140350341796875, 5.719429016113281, 14.644218444824219, -7.776214599609375, 14.62371826171875, 57.34288024902344, 27.878143310546875, 67.05513000488281, 3.142467498779297, 33.59845733642578, 19.68499755859375, -25.261825561523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000073.npy"}
{"epoch": 0.10719530102790015, "step": 74, "batch_size": 64, "mean": 14.029674530029297, "std": 20.74785614013672, "min": -40.30040740966797, "p10": -2.9911163330078114, "median": 10.624895095825195, "p90": 32.64582748413087, "max": 121.46435546875, "pos_frac": 0.796875, "sample": [27.40526580810547, 9.759231567382812, 12.80548095703125, 0.26739501953125, 24.703224182128906, 6.705333709716797, 10.387760162353516, 55.922706604003906, 2.189258575439453, 15.395500183105469, 15.884124755859375, 10.43515396118164, 20.376773834228516, 2.3784866333007812, 17.426902770996094, 121.46435546875, 8.73529052734375, 30.950050354003906, 15.364727020263672, 3.604278564453125, 1.7860698699951172, 50.45625305175781, 3.7391433715820312, 33.372589111328125, 26.607711791992188, 10.745979309082031, -3.735960006713867, 0.2978515625, -0.7028064727783203, -19.43968963623047, 14.969650268554688, -1.4126739501953125, -0.10745429992675781, 8.88067626953125, -40.30040740966797, 21.191146850585938, -1.8659744262695312, 26.468936920166016, 4.994794845581055, -4.373867034912109, -10.35504150390625, 11.16717529296875, 19.12664794921875, 24.84991455078125, -0.0720367431640625, 45.915802001953125, 14.120437622070312, 3.1414871215820312, 8.798599243164062, 9.094112396240234, 10.50381088256836, 36.584197998046875, 18.85223388671875, 23.592498779296875, 37.989219665527344, 16.64581298828125, 5.403657913208008, 16.619056701660156, 28.212188720703125, 27.2095947265625, -0.01448822021484375, -4.4408721923828125, -3.4733200073242188, 14.695236206054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000074.npy"}
{"epoch": 0.10866372980910426, "step": 75, "batch_size": 64, "mean": 20.05126190185547, "std": 20.014732360839844, "min": -12.407470703125, "p10": -2.9196683883666967, "median": 16.644107818603516, "p90": 48.70598220825196, "max": 73.53080749511719, "pos_frac": 0.859375, "sample": [-0.6165943145751953, 24.211929321289062, 17.296100616455078, 14.302864074707031, 18.44481658935547, 12.457618713378906, 10.769725799560547, 6.437046051025391, 46.38215637207031, 66.73309326171875, 31.70189666748047, 14.701299667358398, -6.787406921386719, 27.810508728027344, 73.53080749511719, 15.897701263427734, 41.890838623046875, 63.022735595703125, 9.568233489990234, -0.3710460662841797, 43.047637939453125, 24.698593139648438, 15.729568481445312, 10.941200256347656, 17.254898071289062, 47.440773010253906, 49.24821472167969, 3.8868236541748047, 9.210468292236328, 31.358776092529297, 17.912063598632812, 17.075458526611328, 5.0293426513671875, 65.40013122558594, -5.0759429931640625, -3.9067001342773438, 26.135635375976562, -12.407470703125, 20.94446563720703, 20.696208953857422, 6.301486968994141, 11.851478576660156, -8.449504852294922, 8.066352844238281, -7.452968597412109, 28.255203247070312, 2.863462448120117, 12.692340850830078, 38.853485107421875, 27.013946533203125, 4.1536102294921875, -8.103500366210938, 8.84151840209961, 6.79925537109375, 16.915786743164062, 5.492227554321289, 7.3217926025390625, 23.261550903320312, 16.773109436035156, 17.408096313476562, 37.917381286621094, 57.770751953125, 60.2142333984375, 16.515106201171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000075.npy"}
{"epoch": 0.11013215859030837, "step": 76, "batch_size": 64, "mean": 13.184097290039062, "std": 15.858263969421387, "min": -43.6123046875, "p10": -2.5099504470825185, "median": 12.413437843322754, "p90": 30.95248832702637, "max": 67.09577941894531, "pos_frac": 0.84375, "sample": [13.940938949584961, 6.090381622314453, 2.5387954711914062, 25.312896728515625, 2.27423095703125, 18.777862548828125, 10.95648193359375, 27.2330322265625, 31.007572174072266, 13.922248840332031, 15.596107482910156, 22.147361755371094, 9.618995666503906, 32.328895568847656, 4.565834045410156, 5.382871627807617, 8.442024230957031, 67.09577941894531, 32.88770294189453, 6.097190856933594, -5.645240783691406, 12.555479049682617, -0.04867362976074219, 12.670097351074219, -14.970283508300781, 7.175603866577148, 5.647520065307617, 15.630531311035156, 6.7547760009765625, 8.028312683105469, 30.823959350585938, 18.428359985351562, 22.650588989257812, 9.324705123901367, 30.380828857421875, 4.11732292175293, -3.0266036987304688, 26.236618041992188, 3.2212371826171875, -10.536033630371094, 7.3252716064453125, 2.192506790161133, -6.417022705078125, -6.83697509765625, 16.872154235839844, 30.349853515625, 50.35260009765625, 28.120529174804688, 8.752742767333984, 5.9423675537109375, 12.27139663696289, -1.3044261932373047, 16.445449829101562, 16.768600463867188, 15.738624572753906, 12.95767593383789, -0.7155437469482422, 13.921806335449219, 35.34496307373047, 24.420936584472656, 23.83544921875, 33.689544677734375, 11.729759216308594, -43.6123046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000076.npy"}
{"epoch": 0.11160058737151249, "step": 77, "batch_size": 64, "mean": 21.237140655517578, "std": 22.011798858642578, "min": -40.013763427734375, "p10": 0.12546348571777421, "median": 21.783546447753906, "p90": 55.34773101806641, "max": 75.38580322265625, "pos_frac": 0.890625, "sample": [16.690895080566406, 44.33525848388672, 31.715229034423828, 41.95673751831055, 42.400543212890625, -0.20832061767578125, 59.869537353515625, 22.92263412475586, 9.246545791625977, 10.890483856201172, 6.021520614624023, 33.96440124511719, 39.50212860107422, 5.515918731689453, 4.1625213623046875, 27.113494873046875, -40.013763427734375, 22.27796173095703, 35.625953674316406, 31.366539001464844, 14.284318923950195, -2.6637229919433594, 10.032821655273438, 0.9042930603027344, 3.7216033935546875, 54.41401672363281, 3.4335880279541016, 27.840919494628906, 11.575218200683594, -5.346456527709961, 32.26118850708008, 29.61815643310547, 13.874580383300781, 64.38612365722656, 33.22541809082031, 9.622053146362305, 57.11212158203125, 75.38580322265625, 21.28913116455078, 13.186515808105469, -32.43341064453125, 7.7773590087890625, -15.09149169921875, 3.8797473907470703, 12.146612167358398, 26.32140350341797, 32.879486083984375, 24.552011489868164, 26.738021850585938, 26.05475616455078, 7.710437774658203, -6.8913421630859375, 4.9029541015625, 26.23613739013672, 28.487030029296875, 24.127853393554688, 4.22479248046875, 56.200897216796875, 68.05091857910156, 12.064796447753906, 23.691524505615234, 55.747894287109375, 21.17459487915039, 7.110107421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000077.npy"}
{"epoch": 0.1130690161527166, "step": 78, "batch_size": 64, "mean": 16.436477661132812, "std": 18.949737548828125, "min": -20.861366271972656, "p10": -2.860781860351562, "median": 16.084810256958008, "p90": 42.61825103759766, "max": 84.21044921875, "pos_frac": 0.8125, "sample": [2.2380523681640625, 1.7361736297607422, 41.60107421875, 16.675552368164062, -2.0611343383789062, 47.00437927246094, 17.08253288269043, 84.21044921875, 26.4132080078125, 22.502635955810547, 21.10049057006836, 8.506118774414062, -4.74798583984375, 14.667022705078125, 22.8466796875, 19.59946060180664, -0.6156005859375, 8.494367599487305, -6.809478759765625, 3.135894775390625, 7.020967483520508, 18.64369773864746, 27.037227630615234, 23.415733337402344, 20.001575469970703, 14.47439193725586, 17.25588607788086, 17.676544189453125, 22.8807373046875, 6.295633316040039, 43.05418395996094, 16.4633846282959, 5.548492431640625, -10.55605697631836, 25.919044494628906, -11.30621337890625, 53.988922119140625, 20.94770050048828, 44.34220886230469, -0.762786865234375, 37.67290115356445, 4.005882263183594, 50.704010009765625, 33.20476531982422, 29.540077209472656, 0.3189201354980469, 11.493392944335938, 3.579458236694336, 12.940425872802734, 7.23187255859375, 6.338102340698242, -1.1934661865234375, 15.706235885620117, -3.1302490234375, 31.72052001953125, -20.861366271972656, 52.24797058105469, 5.096515655517578, -18.28240966796875, 26.605682373046875, -2.232025146484375, 35.25750732421875, 1.9166984558105469, 26.131935119628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000078.npy"}
{"epoch": 0.1145374449339207, "step": 79, "batch_size": 64, "mean": 18.058425903320312, "std": 23.04805564880371, "min": -20.985855102539062, "p10": -6.339723205566405, "median": 12.986186981201172, "p90": 50.54457244873049, "max": 76.422119140625, "pos_frac": 0.75, "sample": [-6.734375, 55.18012237548828, 6.059610366821289, 70.08642578125, 5.069038391113281, 0.10889434814453125, -1.841409683227539, -10.632949829101562, 44.093650817871094, 37.786529541015625, 19.25975799560547, -20.985855102539062, 24.70537567138672, 70.88215637207031, 9.372676849365234, 26.32561492919922, 73.66712951660156, 52.89973449707031, 17.692140579223633, -10.948089599609375, 76.422119140625, 17.469070434570312, -3.668243408203125, -11.30828857421875, 28.921722412109375, 0.8179702758789062, -0.196441650390625, 16.961692810058594, 14.037666320800781, 62.85395050048828, 3.1858444213867188, 8.904664993286133, 11.934707641601562, 27.88589096069336, -11.032562255859375, 10.975440979003906, -0.9833221435546875, 7.3872833251953125, 41.901397705078125, 40.20179748535156, 20.656021118164062, 31.7939453125, 34.20338439941406, 19.933746337890625, 27.876144409179688, -5.4188690185546875, 3.0119781494140625, 30.751571655273438, 36.869163513183594, 11.225030899047852, 15.544164657592773, -5.213083267211914, -1.3585357666015625, 38.96156311035156, -1.3363800048828125, 22.281784057617188, 5.160289764404297, 6.555080413818359, 0.21154022216796875, -9.271629333496094, 6.742198944091797, 45.0491943359375, -3.479419708251953, 20.27172088623047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000079.npy"}
{"epoch": 0.11600587371512482, "step": 80, "batch_size": 64, "mean": 16.79857063293457, "std": 24.100067138671875, "min": -22.36371612548828, "p10": -6.923367309570312, "median": 12.127460479736328, "p90": 43.8201530456543, "max": 98.13021850585938, "pos_frac": 0.78125, "sample": [27.088401794433594, 8.190750122070312, -6.933845520019531, 24.59442901611328, 6.012733459472656, 3.7311458587646484, -3.0190048217773438, -10.163902282714844, 31.537445068359375, 2.6567764282226562, 4.064121246337891, 16.747554779052734, -15.575565338134766, -21.242660522460938, -6.8268585205078125, 23.993087768554688, 98.13021850585938, 39.994232177734375, 83.23983764648438, 46.44843292236328, 25.744583129882812, 6.995063781738281, 41.961029052734375, 44.616920471191406, 35.22962951660156, 15.731819152832031, -1.362152099609375, 33.471954345703125, 15.236724853515625, 11.510711669921875, 3.3162155151367188, -16.729347229003906, 4.615505218505859, -6.898918151855469, 25.074813842773438, -22.36371612548828, 60.960052490234375, 31.92620086669922, -8.120147705078125, 88.57919311523438, 10.755741119384766, 2.05206298828125, 2.4326210021972656, 2.3104171752929688, 46.51476287841797, 12.744209289550781, 23.834861755371094, 29.705795288085938, 16.69032096862793, 10.433563232421875, -3.9370880126953125, -2.857006072998047, 0.5473861694335938, 6.820625305175781, 22.95025062561035, 23.001785278320312, 6.390705108642578, 3.5410690307617188, 32.69435119628906, 25.100204467773438, 13.4443359375, 38.978179931640625, -5.563032150268555, 14.358970642089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000080.npy"}
{"epoch": 0.11747430249632893, "step": 81, "batch_size": 64, "mean": 23.490432739257812, "std": 28.395017623901367, "min": -29.920196533203125, "p10": -9.631399536132811, "median": 19.41441535949707, "p90": 59.09971313476563, "max": 120.59808349609375, "pos_frac": 0.84375, "sample": [44.15582275390625, 34.49517822265625, -29.920196533203125, 40.824859619140625, 25.822662353515625, 59.57664489746094, 20.108844757080078, -11.039936065673828, 7.726539611816406, 33.52405548095703, 32.00181579589844, -22.544387817382812, 2.3677101135253906, 25.20947265625, 87.74313354492188, 10.425409317016602, 24.006973266601562, 37.39338684082031, 59.180389404296875, -15.850982666015625, 1.3499088287353516, 10.361175537109375, 21.94696807861328, 73.28672790527344, 24.300010681152344, 12.061904907226562, 17.923919677734375, 8.287948608398438, 7.230144500732422, -8.009552001953125, 25.986427307128906, 7.55108642578125, 5.460699081420898, 13.268699645996094, 29.062896728515625, 120.59808349609375, 43.28330612182617, 18.719985961914062, 3.5670547485351562, 58.911468505859375, 43.1845703125, 50.4998779296875, 15.641006469726562, -0.6531524658203125, 4.707633972167969, -2.2108497619628906, -18.566783905029297, 15.28703498840332, 15.061851501464844, 30.524795532226562, 14.969676971435547, 13.433053970336914, 21.347267150878906, -15.622055053710938, 39.260597229003906, 45.63213348388672, 110.040771484375, 60.316619873046875, 10.165313720703125, 35.297767639160156, 2.3096847534179688, 28.722633361816406, -10.32647705078125, 34.00840759277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000081.npy"}
{"epoch": 0.11894273127753303, "step": 82, "batch_size": 64, "mean": 18.563827514648438, "std": 30.420879364013672, "min": -59.29170227050781, "p10": -16.61303901672363, "median": 16.258251190185547, "p90": 52.664073944091804, "max": 122.32901000976562, "pos_frac": 0.796875, "sample": [-9.48160171508789, 33.14213562011719, -20.409568786621094, 18.09170150756836, 21.024600982666016, 19.07103157043457, -27.75147247314453, 15.503852844238281, 26.00525665283203, 58.94390106201172, 69.2874755859375, 13.131595611572266, -12.976119995117188, 8.917938232421875, 15.861013412475586, 105.83329772949219, 13.766487121582031, -53.06752014160156, -8.779884338378906, 33.90632629394531, 1.5623397827148438, 33.12291717529297, 53.123497009277344, 4.048330307006836, -18.17171859741211, -6.69366455078125, 3.9847335815429688, 24.116867065429688, 21.495895385742188, 13.530784606933594, 20.998329162597656, 37.77909851074219, 37.74797821044922, -0.8783702850341797, 74.86163330078125, 19.70716094970703, 51.59208679199219, 16.041439056396484, 9.591140747070312, 5.255607604980469, -1.8864059448242188, 30.477676391601562, 61.22502136230469, -18.90484619140625, 29.307235717773438, 21.668060302734375, 7.146539688110352, 48.00575637817383, 35.57952117919922, -21.947097778320312, 9.921646118164062, 7.1698455810546875, -59.29170227050781, 8.345611572265625, 32.95411682128906, 21.980043411254883, 6.1912689208984375, 27.272361755371094, 16.47506332397461, 122.32901000976562, 15.347038269042969, 18.099687576293945, 0.43483734130859375, 47.348121643066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000082.npy"}
{"epoch": 0.12041116005873716, "step": 83, "batch_size": 64, "mean": 21.99319076538086, "std": 26.900634765625, "min": -27.345932006835938, "p10": -9.301779174804686, "median": 18.42578125, "p90": 63.717628479003935, "max": 86.50747680664062, "pos_frac": 0.8125, "sample": [21.813575744628906, 26.358020782470703, 17.884197235107422, 71.55845642089844, 45.82896423339844, 12.939697265625, -7.509674072265625, 71.78341674804688, 56.998809814453125, 4.91815185546875, 12.259567260742188, 18.43470001220703, 1.161956787109375, -5.659767150878906, 18.719573974609375, 71.40939331054688, -14.78619384765625, 86.50747680664062, 10.968093872070312, 74.7355728149414, 39.627685546875, 12.442283630371094, 50.81498718261719, -21.10797882080078, -27.345932006835938, -10.0233154296875, 56.8824462890625, -20.948883056640625, 17.378087997436523, 66.59712219238281, 12.101791381835938, 15.069580078125, 4.861322402954102, 1.528594970703125, 25.64331817626953, 12.092964172363281, 26.572792053222656, -0.08208084106445312, 40.740379333496094, 13.638683319091797, 20.899749755859375, 19.987743377685547, 45.38374328613281, 74.10519409179688, 20.49020004272461, 48.666168212890625, 5.998531341552734, -19.03119659423828, -2.5652828216552734, 2.0180282592773438, -22.87335205078125, 52.172576904296875, 9.991127014160156, 43.309814453125, 25.851852416992188, 24.03314971923828, -7.618194580078125, 7.933467864990234, 18.94710922241211, 33.365631103515625, 43.88069152832031, 18.41686248779297, 29.073089599609375, 2.319751739501953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000083.npy"}
{"epoch": 0.12187958883994127, "step": 84, "batch_size": 64, "mean": 19.20960235595703, "std": 20.670482635498047, "min": -43.95539855957031, "p10": -1.7366424560546871, "median": 16.698697090148926, "p90": 43.14859466552734, "max": 83.69926452636719, "pos_frac": 0.84375, "sample": [7.8775787353515625, 15.634567260742188, 3.6066513061523438, 5.204292297363281, 46.863128662109375, 5.5839691162109375, 5.625762939453125, 21.310317993164062, 15.926214218139648, 9.593280792236328, 30.669300079345703, 35.905738830566406, -19.9033203125, 15.193580627441406, 22.221294403076172, 30.41419219970703, 10.440967559814453, 83.69926452636719, 30.697635650634766, 3.555755615234375, 7.100395202636719, 30.888160705566406, 7.757148742675781, 6.769702911376953, -0.7314720153808594, 43.05888366699219, 26.170677185058594, 32.47575378417969, -43.95539855957031, -1.895843505859375, 54.64265441894531, 3.407562255859375, 37.07701110839844, 16.765426635742188, 19.37743377685547, 8.864166259765625, 49.14051055908203, 43.187042236328125, 30.864532470703125, 21.828590393066406, 3.1875534057617188, 32.38306427001953, -4.025386810302734, 16.631967544555664, 28.933624267578125, 29.425445556640625, -1.36517333984375, -3.7994136810302734, 34.84745788574219, 34.07536315917969, -2.7456741333007812, 41.59661102294922, 56.69146728515625, 20.019901275634766, 61.43792724609375, 16.172882080078125, 30.4410400390625, 8.451416015625, -17.71648406982422, 31.86469268798828, 12.822954177856445, 18.76384735107422, 8.689445495605469, -0.28305816650390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000084.npy"}
{"epoch": 0.12334801762114538, "step": 85, "batch_size": 64, "mean": 19.705352783203125, "std": 24.871047973632812, "min": -31.039138793945312, "p10": -5.590434265136719, "median": 17.608476638793945, "p90": 59.037527465820325, "max": 69.53416442871094, "pos_frac": 0.71875, "sample": [42.82392883300781, 13.738899230957031, 30.665077209472656, 3.6383304595947266, -2.0081100463867188, 22.941265106201172, 26.958602905273438, 2.398113250732422, 60.42682647705078, 9.504077911376953, -2.5815658569335938, 6.00994873046875, 65.944091796875, 26.745338439941406, 24.340505599975586, 20.917007446289062, -5.4244384765625, -18.712158203125, 20.45056915283203, 1.2788925170898438, -31.039138793945312, 69.53416442871094, 19.51104736328125, -1.990966796875, -5.05328369140625, 46.16838073730469, 7.4132232666015625, -8.639984130859375, 18.517662048339844, 10.939529418945312, 48.20716857910156, -5.6615753173828125, -17.146484375, -1.827718734741211, 14.990493774414062, 5.138072967529297, 64.52235412597656, 16.786518096923828, 38.35472869873047, -0.30559730529785156, -4.2920379638671875, 69.49174499511719, -1.6646785736083984, 46.261444091796875, 60.537139892578125, 15.266761779785156, 55.79582977294922, 66.85903930664062, 10.32525634765625, 5.983358383178711, 32.86060333251953, 50.674339294433594, -0.80419921875, -6.55767822265625, 18.430435180664062, 18.75883674621582, 54.58184814453125, 21.771392822265625, 30.392059326171875, 38.2334098815918, -23.281463623046875, 19.15151596069336, -1.098846435546875, 44.992637634277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000085.npy"}
{"epoch": 0.12481644640234948, "step": 86, "batch_size": 64, "mean": 20.98587989807129, "std": 38.07899856567383, "min": -58.33819580078125, "p10": -15.392702102661131, "median": 15.565719604492188, "p90": 66.18696746826173, "max": 145.25918579101562, "pos_frac": 0.734375, "sample": [-13.439239501953125, 19.220848083496094, 69.56175231933594, -19.571372985839844, 35.34925079345703, 31.54150390625, 27.63776969909668, -1.0603103637695312, 106.477783203125, 11.910591125488281, 6.620349884033203, -4.379920959472656, 1.051116943359375, 22.69799041748047, -10.20819091796875, 19.48766326904297, 49.80975341796875, 19.300975799560547, -3.524913787841797, -53.031219482421875, 62.906219482421875, 22.346729278564453, 31.457111358642578, 10.701311111450195, 10.635292053222656, -16.229900360107422, -3.1892318725585938, 67.59300231933594, -4.792915344238281, 7.835500717163086, 37.74765396118164, 19.25519561767578, 7.959226608276367, -58.33819580078125, 47.365692138671875, 54.99072265625, 27.912628173828125, -40.507850646972656, 75.37675476074219, 2.4525699615478516, 145.25918579101562, 19.730724334716797, 25.181427001953125, 11.477588653564453, -4.125205993652344, 130.08767700195312, 6.906879425048828, 122.80120849609375, 47.7327880859375, -6.6251068115234375, -30.51428985595703, 39.024513244628906, 9.771642684936523, 34.6368408203125, 29.366500854492188, 35.32497024536133, 37.17291259765625, 5.985254287719727, 2.0624847412109375, 4.169191360473633, -4.885408401489258, -19.23529815673828, 2.8524627685546875, 20.007678985595703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000086.npy"}
{"epoch": 0.1262848751835536, "step": 87, "batch_size": 64, "mean": 24.446144104003906, "std": 33.614288330078125, "min": -77.30804443359375, "p10": -7.398130798339843, "median": 19.969839096069336, "p90": 66.31752777099611, "max": 130.2517852783203, "pos_frac": 0.8125, "sample": [23.00156593322754, 35.85700988769531, 0.12163352966308594, 9.277717590332031, 39.876312255859375, -7.605621337890625, 31.577056884765625, -0.5327644348144531, 1.7856521606445312, -4.2577667236328125, -23.447341918945312, 46.478485107421875, -6.9139862060546875, 68.50822448730469, 32.181884765625, 59.81101989746094, 53.618804931640625, 10.869504928588867, 90.19430541992188, 59.380218505859375, 20.818710327148438, 19.755508422851562, 7.146867752075195, 41.59661102294922, 40.919960021972656, 2.0842933654785156, 38.46797180175781, 130.2517852783203, 13.78399658203125, 2.6977920532226562, 14.377019882202148, 75.67086791992188, 61.17317199707031, 9.703113555908203, 57.95843505859375, 27.04499053955078, 13.44462776184082, 39.861568450927734, 19.74298095703125, 76.71699523925781, 0.8489055633544922, 81.53800964355469, 14.217727661132812, 20.174625396728516, -77.30804443359375, 11.642866134643555, 41.474327087402344, -34.884490966796875, 39.178733825683594, 70.61436462402344, -15.99062728881836, -16.47742462158203, 8.84317398071289, 19.765052795410156, -6.539520263671875, 61.205902099609375, 30.619094848632812, 33.936187744140625, -37.051116943359375, 24.39678955078125, 15.174346923828125, 0.91015625, 50.18775939941406, -4.922809600830078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000087.npy"}
{"epoch": 0.1277533039647577, "step": 88, "batch_size": 64, "mean": 18.518863677978516, "std": 32.3276481628418, "min": -60.90460205078125, "p10": -9.1239501953125, "median": 11.53247356414795, "p90": 59.70619049072269, "max": 132.062255859375, "pos_frac": 0.765625, "sample": [9.781890869140625, 10.857879638671875, 65.95783996582031, 65.04098510742188, 34.47425842285156, 48.26325988769531, 4.072231292724609, 63.05430603027344, -9.133773803710938, 13.964803695678711, 31.966827392578125, 7.220878601074219, 3.04168701171875, 46.99224853515625, -23.184951782226562, 30.015426635742188, -20.133331298828125, 15.757102966308594, -46.904052734375, -60.90460205078125, -5.569013595581055, 74.29208374023438, 38.41728973388672, -3.9531097412109375, 44.665855407714844, 5.396078109741211, 29.721221923828125, -9.101028442382812, 23.281965255737305, 23.104248046875, 40.59252166748047, 7.1824951171875, -1.8782386779785156, 28.141983032226562, 1.1656990051269531, 94.23077392578125, 10.692802429199219, 51.8939208984375, 4.81787109375, 132.062255859375, 6.267002105712891, 44.778282165527344, 4.250436782836914, 13.917375564575195, 11.378097534179688, 25.25371551513672, 1.00048828125, -3.0063095092773438, 34.11964416503906, 82.58575439453125, 34.56884002685547, 14.69805908203125, -0.5508041381835938, 10.293907165527344, -51.195709228515625, -5.7046661376953125, 10.97824478149414, -1.7250652313232422, 11.686849594116211, 15.210887908935547, 19.293394088745117, -21.077590942382812, 8.380935668945312, 40.446868896484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000088.npy"}
{"epoch": 0.12922173274596183, "step": 89, "batch_size": 64, "mean": 20.72658920288086, "std": 27.619815826416016, "min": -65.46405029296875, "p10": -2.215394592285156, "median": 17.006900787353516, "p90": 49.10557708740236, "max": 122.94154357910156, "pos_frac": 0.859375, "sample": [5.97552490234375, 1.5741710662841797, 5.675689697265625, 21.78364372253418, -9.534343719482422, 50.6715087890625, 41.19678497314453, 16.326995849609375, -9.205429077148438, 57.33374786376953, 59.533714294433594, 23.78310775756836, 22.574050903320312, 45.45173645019531, 36.515708923339844, 29.707584381103516, 13.31268310546875, 10.12799072265625, -2.969663619995117, 19.70730209350586, 15.496368408203125, 2.988466262817383, 16.73457145690918, 11.56362533569336, -9.5948486328125, 1.4166336059570312, 0.8698501586914062, 10.373872756958008, 75.69612121582031, 19.46624755859375, 28.548995971679688, 88.48104858398438, 16.54879379272461, 44.219215393066406, 21.66107940673828, 1.9947967529296875, 13.33527946472168, 122.94154357910156, 4.635349273681641, -2.299407958984375, 28.78940200805664, -0.7921600341796875, 21.622987747192383, 30.812854766845703, 15.589357376098633, 5.668315887451172, 10.307788848876953, 5.562839508056641, 4.1076202392578125, 35.63710021972656, 21.228309631347656, 41.28271484375, -2.0193634033203125, 17.574020385742188, 42.27825164794922, -65.46405029296875, -25.957672119140625, 27.305519104003906, 17.27923011779785, 23.537212371826172, 23.43218994140625, 24.063865661621094, 92.85093688964844, 7.184379577636719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000089.npy"}
{"epoch": 0.13069016152716592, "step": 90, "batch_size": 64, "mean": 28.227087020874023, "std": 41.6912841796875, "min": -50.83802795410156, "p10": -10.848527336120599, "median": 18.97040557861328, "p90": 91.36948394775392, "max": 149.06085205078125, "pos_frac": 0.796875, "sample": [68.21255493164062, -44.23893737792969, 28.843475341796875, 93.24533081054688, 22.81171417236328, 36.44408416748047, 0.7605934143066406, -4.443660736083984, 57.718658447265625, -1.5202960968017578, 45.18333435058594, 18.51980209350586, -1.0130233764648438, 12.1070556640625, 38.062889099121094, 86.42779541015625, -4.4530487060546875, 149.06085205078125, 53.41658020019531, 1.9988250732421875, 3.103872299194336, 10.719108581542969, 20.127464294433594, -13.428489685058594, 117.00640869140625, 16.8104305267334, 24.071876525878906, 120.29574584960938, 4.787773132324219, 23.577388763427734, 138.54391479492188, 22.25720977783203, -25.678848266601562, 11.41285514831543, -3.535888671875, -50.83802795410156, 27.641841888427734, 8.052757263183594, 62.30366516113281, 5.590782165527344, 13.794776916503906, 12.315261840820312, -22.195022583007812, -4.828615188598633, 21.190505981445312, 86.99250793457031, 100.15054321289062, -35.373748779296875, -30.555191040039062, 11.387619018554688, 37.041168212890625, 1.2064990997314453, 19.421009063720703, 36.70977020263672, 67.97409057617188, 32.59898376464844, 10.658933639526367, 10.556291580200195, 61.04852294921875, 17.269847869873047, 23.955711364746094, 17.317934036254883, 97.08462524414062, 40.84498596191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000090.npy"}
{"epoch": 0.13215859030837004, "step": 91, "batch_size": 64, "mean": 23.309101104736328, "std": 34.401737213134766, "min": -34.10810089111328, "p10": -19.130560302734374, "median": 21.721559524536133, "p90": 69.64406585693364, "max": 121.14190673828125, "pos_frac": 0.75, "sample": [18.68859100341797, 53.00495910644531, -33.17607116699219, 7.826698303222656, 73.61335754394531, 0.7240982055664062, -18.5615234375, 43.82685470581055, 3.606395721435547, 21.211952209472656, 42.44843292236328, 39.93506622314453, 42.452064514160156, 12.752525329589844, -34.10810089111328, 111.38960266113281, -11.735885620117188, 2.19500732421875, 7.950653076171875, 84.97648620605469, 9.722770690917969, 32.40529251098633, 121.14190673828125, -17.43378448486328, 27.52362823486328, 54.884666442871094, -20.303077697753906, 2.9150028228759766, 22.662220001220703, -5.2898101806640625, -18.725723266601562, 22.143632888793945, 60.38238525390625, -21.4339599609375, 19.117698669433594, 21.826889038085938, 57.31395721435547, -13.791084289550781, 21.616230010986328, -4.354789733886719, 8.619178771972656, 24.504074096679688, 59.31157684326172, 2.2408599853515625, -19.304061889648438, 39.53248596191406, 24.077571868896484, 78.50019836425781, 25.702720642089844, 41.545021057128906, 10.213851928710938, -25.06537628173828, 0.39975929260253906, 29.2025146484375, -2.290342330932617, 42.88013458251953, -0.38555335998535156, 85.77998352050781, 45.415504455566406, 24.02783966064453, 51.699859619140625, -26.232322692871094, 48.10621643066406, 81.95548248291016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000091.npy"}
{"epoch": 0.13362701908957417, "step": 92, "batch_size": 64, "mean": 23.60449981689453, "std": 33.466407775878906, "min": -46.92228698730469, "p10": -10.796741485595703, "median": 17.51198101043701, "p90": 57.90043640136719, "max": 126.91476440429688, "pos_frac": 0.75, "sample": [-4.69940185546875, 5.747833251953125, 7.026275634765625, 46.67676544189453, -7.808460235595703, 6.75860595703125, -46.92228698730469, -16.28936004638672, 48.061683654785156, 24.782794952392578, 66.47555541992188, 0.22510910034179688, 44.307220458984375, 3.0091075897216797, -4.950004577636719, 110.36117553710938, 56.086021423339844, 126.91476440429688, -41.3253173828125, -7.07830810546875, 25.79354476928711, -10.797096252441406, 26.464187622070312, -7.756359100341797, -18.04021453857422, 58.27369689941406, -10.795913696289062, 73.06149291992188, 29.529945373535156, 5.486713409423828, 72.9433364868164, -2.5657272338867188, 35.49174499511719, 0.5671558380126953, 51.67710876464844, 8.332862854003906, 32.268775939941406, 105.02522277832031, 10.155420303344727, 56.96336364746094, 43.98638153076172, 57.02949523925781, -1.796173095703125, -5.641593933105469, 8.89212417602539, 24.833354949951172, 5.34625244140625, 41.09869384765625, 35.46014404296875, 11.140647888183594, 0.36124229431152344, -13.65707015991211, 51.92451858520508, 16.630403518676758, 18.393558502197266, 14.176689147949219, 36.01464080810547, 51.437774658203125, 43.18402862548828, 24.388221740722656, 11.083555221557617, -12.173416137695312, 55.11982727050781, 34.01569366455078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000092.npy"}
{"epoch": 0.13509544787077826, "step": 93, "batch_size": 64, "mean": 23.836261749267578, "std": 29.824560165405273, "min": -36.252403259277344, "p10": -3.636377334594724, "median": 16.723175048828125, "p90": 58.430398559570314, "max": 152.33377075195312, "pos_frac": 0.84375, "sample": [21.172515869140625, 9.023149490356445, 26.46257781982422, 63.222267150878906, 58.50715637207031, 54.99864196777344, 4.5252838134765625, 22.80847930908203, 45.415321350097656, -14.532737731933594, 74.1842041015625, 19.27039337158203, 51.60126495361328, 51.17181396484375, 152.33377075195312, 12.620868682861328, -0.9599609375, 4.6208953857421875, 2.7286148071289062, 87.91657257080078, -8.616447448730469, 12.860576629638672, 21.96459197998047, 0.30724334716796875, 34.0164794921875, 10.973403930664062, 31.39464569091797, 6.309761047363281, -1.1653060913085938, 12.61334228515625, -0.40075111389160156, 14.099632263183594, 9.497817993164062, 20.42111587524414, 32.909019470214844, 53.38743591308594, 64.89232635498047, -23.45557403564453, -4.695407867431641, 49.98200988769531, 44.87610626220703, 35.16604232788086, 36.358848571777344, 6.59837532043457, 10.068742752075195, 8.847671508789062, 29.288909912109375, 2.874713897705078, 58.25129699707031, 1.2023696899414062, -7.607841491699219, 58.65666198730469, 6.64117431640625, 17.334877014160156, 6.232854843139648, 5.4035797119140625, -19.06543731689453, 31.135658264160156, 46.651695251464844, 35.36067199707031, 45.96968078613281, 16.111473083496094, -36.252403259277344, 1.0280818939208984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000093.npy"}
{"epoch": 0.13656387665198239, "step": 94, "batch_size": 64, "mean": 27.329792022705078, "std": 31.28114891052246, "min": -20.617950439453125, "p10": -7.8596807479858395, "median": 24.237979888916016, "p90": 68.56506423950195, "max": 112.87734985351562, "pos_frac": 0.78125, "sample": [26.191238403320312, -17.355133056640625, 45.099456787109375, 19.12140655517578, 5.3586578369140625, -14.87432861328125, 9.686027526855469, 61.205265045166016, 5.94120979309082, -20.617950439453125, 24.331064224243164, 11.478073120117188, 24.89544677734375, 6.8756103515625, 11.821456909179688, 0.09488296508789062, 58.5511474609375, 11.858261108398438, -7.327976226806641, 111.31170654296875, 5.555515289306641, 30.609050750732422, 32.73786163330078, 53.931907653808594, 79.49368286132812, 28.6171875, 13.592620849609375, 29.679359436035156, 21.44167709350586, -3.810699462890625, -4.941930770874023, 17.219812393188477, 36.038780212402344, 59.064605712890625, 43.04185104370117, 19.254817962646484, 36.27000427246094, 67.8332290649414, 34.17262649536133, 24.144895553588867, 5.441747665405273, 45.741668701171875, -2.8005752563476562, -17.110305786132812, 68.87870788574219, 75.14151000976562, -7.452676773071289, 58.866485595703125, 112.87734985351562, -8.034111022949219, -20.523391723632812, 88.64152526855469, 31.431365966796875, 56.199249267578125, -4.269195556640625, 18.256256103515625, -14.761184692382812, -5.534332275390625, 37.196533203125, 15.209747314453125, 36.8864631652832, 56.51849365234375, 72.60980224609375, 52.10319519042969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000094.npy"}
{"epoch": 0.13803230543318648, "step": 95, "batch_size": 64, "mean": 22.231979370117188, "std": 29.796245574951172, "min": -74.62229919433594, "p10": -6.541800689697265, "median": 16.74095344543457, "p90": 63.3253112792969, "max": 93.18487548828125, "pos_frac": 0.828125, "sample": [16.10723876953125, 42.72064971923828, 6.352561950683594, 23.261383056640625, 20.19066047668457, 69.6961669921875, 7.668544769287109, 6.628885269165039, -7.554786682128906, 16.67266082763672, 19.390798568725586, 4.594390869140625, 35.66038131713867, 81.94642639160156, 50.0604248046875, -6.0467987060546875, 38.30785369873047, 3.6391735076904297, 15.38160514831543, 45.97437286376953, 23.4639892578125, 39.9991455078125, 19.652009963989258, 15.928312301635742, 9.759504318237305, -12.524772644042969, 10.420154571533203, 52.541908264160156, 20.425052642822266, 29.1944580078125, -9.884132385253906, -6.753944396972656, 52.93476867675781, 93.18487548828125, 77.6121826171875, 58.27037048339844, 37.12605285644531, 2.141359329223633, 0.515411376953125, 65.49171447753906, 46.02394485473633, 82.86825561523438, 72.69514465332031, 3.7657928466796875, 14.693462371826172, 8.925573348999023, -1.5957279205322266, 32.74983215332031, 29.6884765625, -74.62229919433594, 2.783344268798828, 2.081310272216797, -30.1903076171875, 46.24513244628906, 43.282676696777344, 13.549945831298828, 42.86134338378906, 11.827877044677734, 16.809246063232422, 17.642959594726562, -5.64710807800293, -2.463785171508789, 15.49339485168457, -36.77288055419922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000095.npy"}
{"epoch": 0.1395007342143906, "step": 96, "batch_size": 64, "mean": 30.2021541595459, "std": 36.12517166137695, "min": -54.57609558105469, "p10": -0.3743946075439445, "median": 21.142333984375, "p90": 79.22219314575197, "max": 156.9261016845703, "pos_frac": 0.890625, "sample": [62.83659362792969, 17.87738800048828, 37.67558288574219, 31.77181625366211, 21.399703979492188, 64.68560791015625, 4.4232177734375, 2.808300018310547, 1.3286304473876953, 25.71642303466797, 111.95793151855469, 16.420909881591797, 10.226486206054688, -4.061779022216797, 2.246417999267578, 22.008670806884766, 29.082286834716797, 43.777732849121094, 87.1428451538086, 91.16098022460938, 9.159709930419922, 44.51000213623047, 37.411582946777344, 156.9261016845703, 74.05652618408203, 15.188610076904297, 20.884963989257812, 9.162269592285156, 14.848342895507812, 16.311386108398438, 9.544990539550781, 136.25259399414062, 32.966041564941406, 28.22545623779297, 5.4750213623046875, 89.90884399414062, 59.40275573730469, 6.442070007324219, 36.983543395996094, 2.049783706665039, 47.096717834472656, 26.42189598083496, 23.129180908203125, -9.838340759277344, 15.445640563964844, 0.41975975036621094, -2.0356216430664062, 40.680442810058594, -0.7147464752197266, 24.84490966796875, 12.282745361328125, 58.78404235839844, -14.466022491455078, 5.22125244140625, 65.22573852539062, 11.384559631347656, 16.938180923461914, 64.59864807128906, 81.43605041503906, 1.0360565185546875, -0.7548274993896484, 31.85516357421875, -54.57609558105469, 2.3262863159179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000096.npy"}
{"epoch": 0.14096916299559473, "step": 97, "batch_size": 64, "mean": 23.385099411010742, "std": 28.15974235534668, "min": -62.521934509277344, "p10": -8.138808441162107, "median": 22.409391403198242, "p90": 57.624321746826176, "max": 108.17071533203125, "pos_frac": 0.78125, "sample": [17.79071044921875, 52.49365234375, 1.95672607421875, 1.871551513671875, -4.536407470703125, -3.1216087341308594, 16.769027709960938, 28.049375534057617, 2.465320587158203, 47.96430969238281, 23.173114776611328, 41.999168395996094, -1.0295448303222656, 18.03449821472168, 79.03460693359375, 62.04920959472656, -21.384124755859375, 35.294769287109375, 0.7183303833007812, 13.741775512695312, 27.048187255859375, 5.797859191894531, 16.28797721862793, -21.141571044921875, 69.18511962890625, 20.223299026489258, 25.872360229492188, 31.340415954589844, 41.07221984863281, 14.690082550048828, 27.24333953857422, -5.571996688842773, 17.453659057617188, -11.664339065551758, 49.07862091064453, 42.222015380859375, 49.047454833984375, 30.028133392333984, -62.521934509277344, -9.288936614990234, 49.48493957519531, 32.490299224853516, -9.892364501953125, 8.187431335449219, 56.450767517089844, 34.984981536865234, 1.2230453491210938, 21.645668029785156, 23.7597599029541, 17.766029357910156, -4.859052658081055, 58.12727355957031, -9.238870620727539, 51.32272720336914, -0.5608329772949219, 65.21702575683594, -1.1347427368164062, 44.24641418457031, 49.910369873046875, 63.22804260253906, 32.59577560424805, 8.951068878173828, 108.17071533203125, 24.83343505859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000097.npy"}
{"epoch": 0.14243759177679882, "step": 98, "batch_size": 64, "mean": 26.731998443603516, "std": 37.19391632080078, "min": -101.63946533203125, "p10": -13.150492095947262, "median": 29.05353546142578, "p90": 68.63235397338867, "max": 127.72702026367188, "pos_frac": 0.796875, "sample": [41.286865234375, 6.593235015869141, 65.58856201171875, 44.17554473876953, 6.490745544433594, 20.893526077270508, 23.545143127441406, 26.254539489746094, 29.056350708007812, 7.335121154785156, 30.99420738220215, -8.43853759765625, 20.826202392578125, 24.244285583496094, 67.63752746582031, 51.01996612548828, -14.55588150024414, 3.297882080078125, -0.7327194213867188, -23.423202514648438, 41.03181076049805, 35.56047821044922, -30.298988342285156, -38.728973388671875, 56.627281188964844, 53.217105865478516, 31.843276977539062, 69.05870819091797, 70.47355651855469, 26.351673126220703, 40.35957336425781, -2.5967254638671875, 62.994361877441406, 127.72702026367188, -101.63946533203125, 83.91218566894531, 59.69647216796875, 29.05072021484375, 46.5859375, 17.30907440185547, -9.87125015258789, 40.317848205566406, -56.71916198730469, 25.584136962890625, 37.443389892578125, 3.3015518188476562, 40.097251892089844, 83.72698974609375, 60.48199462890625, 36.98732376098633, 15.872329711914062, 9.817062377929688, 74.62225341796875, 106.139404296875, 36.042152404785156, 31.83713150024414, 30.679931640625, 13.438322067260742, 4.595424652099609, -8.841617584228516, 53.70825958251953, -3.8790645599365234, 3.5478897094726562, -18.706092834472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000098.npy"}
{"epoch": 0.14390602055800295, "step": 99, "batch_size": 64, "mean": 32.502403259277344, "std": 41.242069244384766, "min": -33.50464630126953, "p10": -7.966613006591795, "median": 21.281310081481934, "p90": 84.28812255859376, "max": 177.9186553955078, "pos_frac": 0.828125, "sample": [-1.3930225372314453, -8.782707214355469, 6.017459869384766, 21.001529693603516, 3.138660430908203, 43.39663314819336, 26.704147338867188, 30.350059509277344, 109.011962890625, 21.017690658569336, 23.365882873535156, 122.57958984375, 8.300548553466797, -30.163780212402344, 85.18342590332031, -4.8400115966796875, 20.892295837402344, 37.16822814941406, -1.2221717834472656, 17.67853546142578, 74.25469970703125, 14.314262390136719, 11.135011672973633, 158.0574951171875, 9.25262451171875, 59.32598876953125, 2.4215545654296875, -24.15465545654297, 10.485414505004883, -33.50464630126953, -9.702842712402344, 53.78633117675781, 38.649253845214844, 26.16482925415039, 63.0146484375, 177.9186553955078, 74.84529113769531, 38.24777603149414, 16.746078491210938, 19.567764282226562, 5.008182525634766, 30.20435333251953, 38.22676086425781, 82.19908142089844, 100.01346588134766, 5.541969299316406, 12.904045104980469, 66.69281005859375, -6.0623931884765625, 1.047525405883789, 32.29261779785156, 88.26335144042969, 30.410686492919922, 10.782588958740234, 57.19902038574219, -9.716796875, 41.845279693603516, 14.822471618652344, 65.82452392578125, 52.171424865722656, 21.54492950439453, 47.20856475830078, 4.068424224853516, -22.569442749023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000099.npy"}
{"epoch": 0.14537444933920704, "step": 100, "batch_size": 64, "mean": 19.239667892456055, "std": 35.788570404052734, "min": -74.70394897460938, "p10": -18.775580024719233, "median": 19.737937927246094, "p90": 64.47654647827149, "max": 144.22515869140625, "pos_frac": 0.765625, "sample": [53.43620300292969, 31.278406143188477, -6.523130416870117, 14.938579559326172, 23.252033233642578, -8.039688110351562, 32.570594787597656, 26.171669006347656, 7.310070037841797, 41.159637451171875, 11.686454772949219, 0.9916915893554688, 37.999420166015625, 24.154094696044922, 4.264982223510742, 79.19381713867188, 19.828277587890625, -45.33662414550781, -30.65472412109375, -51.738861083984375, -13.412267684936523, -10.604804992675781, 1.4605445861816406, -60.97811508178711, 55.15031814575195, 19.647598266601562, 50.070289611816406, 23.893417358398438, 1.1893692016601562, 5.427091598510742, 43.36540985107422, -1.925872802734375, 44.96397399902344, 4.921436309814453, 67.2890625, 30.080352783203125, 8.770978927612305, 11.2730712890625, 62.806190490722656, 25.27301025390625, 39.99953079223633, 15.994461059570312, 7.5022430419921875, 1.4559497833251953, 30.88709259033203, 34.3753662109375, -8.391281127929688, -27.047454833984375, -7.0583953857421875, 67.60406494140625, 37.217979431152344, 69.56365203857422, 24.75274085998535, -74.70394897460938, 144.22515869140625, -11.518415451049805, 15.718719482421875, 80.43075561523438, -21.074142456054688, 65.19241333007812, 46.08824157714844, 7.633445739746094, 37.724517822265625, 20.162059783935547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000100.npy"}
{"epoch": 0.14684287812041116, "step": 101, "batch_size": 64, "mean": 23.208131790161133, "std": 40.0408935546875, "min": -52.93267059326172, "p10": -19.66166496276855, "median": 18.265384674072266, "p90": 84.4170242309571, "max": 132.154541015625, "pos_frac": 0.6875, "sample": [96.02580261230469, 132.154541015625, 29.76384735107422, 29.534210205078125, -14.276687622070312, 102.6163101196289, 106.18569946289062, 10.548141479492188, 91.52084350585938, 107.48863220214844, -34.272125244140625, 28.699798583984375, -38.92015838623047, 47.49993896484375, 13.12728500366211, -7.996391296386719, -20.53919219970703, 14.116281509399414, 61.15363311767578, 27.424453735351562, -13.325691223144531, 25.62786865234375, 51.869476318359375, 62.34331512451172, 18.27264404296875, 18.25812530517578, 9.660991668701172, 65.47469329833984, 115.290283203125, 26.057884216308594, -6.1449737548828125, -6.591215133666992, -13.831344604492188, 51.12582015991211, 17.93317985534668, 38.36100769042969, 67.84144592285156, 45.44194030761719, -6.619903564453125, 7.013370513916016, -2.5608787536621094, -17.61410140991211, 19.85222625732422, -6.818817138671875, 0.2279510498046875, -15.323263168334961, 30.44086456298828, 32.40139389038086, 8.806041717529297, 25.24272346496582, -52.93267059326172, 35.410797119140625, 2.4886016845703125, -30.358013153076172, 25.97427749633789, 16.6337947845459, -14.75106430053711, 38.71842575073242, 7.3540191650390625, -1.8094196319580078, 51.481170654296875, -27.506900787353516, 39.37809753417969, -35.32862854003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000101.npy"}
{"epoch": 0.14831130690161526, "step": 102, "batch_size": 64, "mean": 17.13527488708496, "std": 37.78377151489258, "min": -77.0101318359375, "p10": -21.81753997802734, "median": 11.732864379882812, "p90": 56.20214233398438, "max": 134.6588592529297, "pos_frac": 0.65625, "sample": [2.9432201385498047, 39.743568420410156, 56.938011169433594, 12.28131103515625, 5.1375579833984375, 5.198066711425781, -15.8377685546875, -44.17454528808594, 21.50678825378418, -72.76409912109375, 28.0819091796875, 87.23751831054688, -11.058128356933594, 37.679412841796875, 28.044017791748047, -8.824115753173828, 11.184417724609375, 34.63898468017578, 48.97785186767578, 1.0297431945800781, -14.994468688964844, -4.0908660888671875, 46.00653076171875, -6.014326095581055, 57.65061950683594, 21.949520111083984, 37.22345733642578, 134.6588592529297, -3.332366943359375, -0.09503936767578125, 108.7533950805664, -0.335357666015625, 37.461483001708984, 42.15385437011719, 4.00160026550293, -16.02672576904297, 30.99065589904785, -77.0101318359375, 0.6606903076171875, -28.020301818847656, 0.9534091949462891, -43.89495849609375, -22.742034912109375, -3.5042037963867188, 54.48511505126953, 28.491546630859375, 41.47997283935547, 81.34231567382812, 43.73284912109375, 8.00311279296875, 68.4637680053711, -26.457595825195312, -6.153350830078125, 6.523113250732422, 53.89568328857422, -19.660385131835938, 38.16142272949219, 33.664894104003906, 24.16668128967285, 36.66693878173828, -11.25152587890625, -15.725311279296875, 41.20671081542969, 45.254581451416016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000102.npy"}
{"epoch": 0.14977973568281938, "step": 103, "batch_size": 64, "mean": 34.391334533691406, "std": 41.59410858154297, "min": -36.36299133300781, "p10": -11.938301086425781, "median": 28.310373306274414, "p90": 79.47033615112305, "max": 178.87353515625, "pos_frac": 0.796875, "sample": [-6.001522064208984, 8.382568359375, 14.189962387084961, 33.130287170410156, -19.337692260742188, -24.819732666015625, 33.66432571411133, 39.39353942871094, 13.687776565551758, 19.781795501708984, 27.656909942626953, 44.404518127441406, 17.587112426757812, 19.94335174560547, -0.8438186645507812, 65.55929565429688, 110.65484619140625, 35.25989532470703, 49.1458740234375, 59.988380432128906, -33.34443664550781, -12.320281982421875, 47.59013366699219, 28.963836669921875, -36.36299133300781, 74.15192413330078, 35.290687561035156, -16.284423828125, 14.860885620117188, 19.48217010498047, 37.23286437988281, 161.83755493164062, 25.092056274414062, 19.885120391845703, 70.06552124023438, 67.08431243896484, -5.686210632324219, -11.047012329101562, 21.528583526611328, 17.56922721862793, -4.898902893066406, 39.623687744140625, 62.67390441894531, -27.873626708984375, 22.618316650390625, 99.81005096435547, 77.60836791992188, 26.480751037597656, 178.87353515625, 98.44621276855469, 102.90271759033203, 80.07139587402344, 12.476470947265625, 39.06715393066406, 66.81681823730469, -1.5716590881347656, 31.3767147064209, 3.416685104370117, 4.632289886474609, 50.125038146972656, 78.06786346435547, 16.14240074157715, 32.01446533203125, 45.12781524658203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000103.npy"}
{"epoch": 0.1512481644640235, "step": 104, "batch_size": 64, "mean": 32.55640411376953, "std": 47.21324157714844, "min": -64.38365173339844, "p10": -12.161727714538573, "median": 19.383869171142578, "p90": 90.03089904785156, "max": 184.71368408203125, "pos_frac": 0.75, "sample": [89.71078491210938, -6.877204895019531, 13.833328247070312, 14.642505645751953, 60.95420837402344, -5.1094970703125, -26.67188262939453, 2.9424514770507812, -2.6075820922851562, 37.26012420654297, -12.940519332885742, 33.855472564697266, 147.25344848632812, 51.65605926513672, 84.32547760009766, -2.7188949584960938, 90.1680908203125, 31.95942497253418, -17.706100463867188, 114.24957275390625, 36.96940612792969, 5.3908843994140625, 19.750591278076172, 7.157341003417969, 13.684412002563477, 2.228086471557617, 184.71368408203125, -43.84833526611328, 56.605499267578125, 132.4660186767578, 134.85975646972656, 29.60406494140625, 13.603141784667969, 64.56114196777344, 12.545614242553711, 14.565750122070312, -10.344547271728516, -22.25981903076172, 111.81396484375, 15.031303405761719, 50.660255432128906, -6.6433258056640625, 68.25872802734375, -3.1092758178710938, 83.72007751464844, 37.98981475830078, 32.04450225830078, 50.6573371887207, 36.524688720703125, 32.18458557128906, 4.440753936767578, 89.71005249023438, 8.402595520019531, 27.49419403076172, 62.501800537109375, 67.44884490966797, -7.005285263061523, 27.527746200561523, -0.3509063720703125, 19.017147064208984, -21.384422302246094, 1.7829265594482422, -64.38365173339844, 8.843561172485352], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000104.npy"}
{"epoch": 0.1527165932452276, "step": 105, "batch_size": 64, "mean": 46.19757843017578, "std": 41.30510330200195, "min": -64.43658447265625, "p10": 2.876650238037111, "median": 43.8984260559082, "p90": 99.62243194580081, "max": 176.6494903564453, "pos_frac": 0.90625, "sample": [46.81153869628906, 31.537399291992188, 45.264930725097656, 29.706344604492188, 41.773067474365234, 44.83563995361328, 72.86801147460938, 109.26811981201172, 9.804349899291992, -64.43658447265625, -6.4071502685546875, 80.98397827148438, 81.11264038085938, 13.363668441772461, 35.82518768310547, 48.082584381103516, -32.30801010131836, 2.2261886596679688, -5.872283935546875, 106.96556854248047, 31.801231384277344, 106.50440979003906, 69.06858825683594, 18.70886993408203, 11.129547119140625, 80.8875503540039, 62.541412353515625, 42.88642883300781, 9.504039764404297, 37.90618133544922, 64.4483642578125, 163.41708374023438, 21.529052734375, 50.829017639160156, 18.060943603515625, 19.361175537109375, 69.18384552001953, 89.28848266601562, 46.117462158203125, 14.798995971679688, 50.084716796875, 91.81236267089844, 35.28209686279297, 37.71673583984375, 42.961212158203125, 51.81398010253906, -1.2479591369628906, 64.86717224121094, 68.24293518066406, 102.9696044921875, 36.19819641113281, 22.106590270996094, -17.612060546875, 9.669078826904297, 64.84931945800781, 60.46116638183594, 54.92976379394531, 109.37652587890625, 59.778717041015625, 176.6494903564453, 24.536575317382812, 4.3943939208984375, 12.63504409790039, 74.79133605957031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000105.npy"}
{"epoch": 0.15418502202643172, "step": 106, "batch_size": 64, "mean": 28.927536010742188, "std": 35.46253204345703, "min": -52.652992248535156, "p10": -12.865041351318352, "median": 29.560436248779297, "p90": 75.69280548095703, "max": 131.78109741210938, "pos_frac": 0.84375, "sample": [28.099197387695312, 5.192502975463867, 8.243436813354492, -4.2825164794921875, 18.041030883789062, 1.3418407440185547, 47.33967590332031, -6.163734436035156, 30.097244262695312, 67.36772155761719, 36.23002243041992, 36.364959716796875, 2.625530242919922, 74.57917785644531, 26.00579071044922, 90.00129699707031, 6.951383590698242, 12.112049102783203, 76.17007446289062, 1.2876396179199219, 62.36016845703125, 40.96511459350586, 2.2731781005859375, 84.73297119140625, 23.784421920776367, 42.505287170410156, 7.508613586425781, -15.737030029296875, 52.962013244628906, -34.31975555419922, 48.669944763183594, 14.721923828125, 41.14445114135742, 40.23427963256836, -2.6506214141845703, 5.2270355224609375, 23.44329833984375, 1.8573150634765625, 88.70097351074219, 63.13587188720703, 63.06916809082031, 24.982349395751953, -52.652992248535156, 4.919242858886719, 131.78109741210938, 37.22462463378906, 29.02362823486328, 34.822166442871094, 32.01971435546875, 8.008026123046875, 0.4001941680908203, 56.853118896484375, 103.17774200439453, 35.73204803466797, 55.002899169921875, 55.56547546386719, -23.004989624023438, 33.65308380126953, -21.72119140625, 30.262374877929688, 79.95205688476562, -40.588478088378906, -22.945663452148438, 46.70288848876953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000106.npy"}
{"epoch": 0.15565345080763582, "step": 107, "batch_size": 64, "mean": 32.75652313232422, "std": 42.06280517578125, "min": -87.74777221679688, "p10": -12.6854543685913, "median": 22.30781650543213, "p90": 84.12707901000977, "max": 137.89906311035156, "pos_frac": 0.859375, "sample": [83.5419921875, 109.70796203613281, 47.47076416015625, 68.41621398925781, 25.572555541992188, 30.72797393798828, 50.2518310546875, -3.5200443267822266, -87.74777221679688, 8.593816757202148, 16.16836929321289, 76.16629028320312, -36.308799743652344, 28.255115509033203, 9.982728958129883, 8.472648620605469, 15.187324523925781, 84.3778305053711, 1.3871116638183594, -26.29242706298828, 104.2011489868164, 66.70418548583984, 78.80364990234375, 6.638078689575195, -17.380203247070312, 4.020151138305664, -1.1909141540527344, 5.410499572753906, 137.89906311035156, 72.3784408569336, 19.030742645263672, 16.214515686035156, 95.30193328857422, 44.483551025390625, 64.83589172363281, 87.06656646728516, 16.427988052368164, 28.238304138183594, 16.306013107299805, -16.613487243652344, 61.92704772949219, -46.33240509033203, 64.60395812988281, 74.61483764648438, 11.115882873535156, 81.99031829833984, 0.8688812255859375, 81.66374206542969, 48.61653137207031, 52.978179931640625, 23.158966064453125, 21.456666946411133, 5.345672607421875, -27.639076232910156, 1.3454360961914062, 5.163787841796875, 12.672439575195312, 73.49240112304688, 10.415863037109375, 23.912912368774414, 65.54583740234375, 97.646728515625, 2.6168975830078125, 10.048477172851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000107.npy"}
{"epoch": 0.15712187958883994, "step": 108, "batch_size": 64, "mean": 39.17702102661133, "std": 56.82441329956055, "min": -59.98408508300781, "p10": -23.38497714996338, "median": 27.84130859375, "p90": 121.30213775634769, "max": 208.5067138671875, "pos_frac": 0.765625, "sample": [35.52565002441406, 132.8231964111328, 39.03673553466797, -43.592987060546875, -59.98408508300781, 104.119873046875, 30.52378273010254, 8.890342712402344, 208.5067138671875, -0.25481414794921875, -12.348548889160156, 126.7035140991211, 8.596145629882812, 6.7590484619140625, -6.036762237548828, 72.69539642333984, -6.868877410888672, 95.65443420410156, 158.58277893066406, 105.45050811767578, 114.30046081542969, 81.93330383300781, 1.5725898742675781, -22.73377799987793, 47.81282043457031, 10.757513046264648, 26.04393768310547, 15.378780364990234, 75.60104370117188, 4.896213531494141, 25.242206573486328, 0.5258941650390625, 70.28907012939453, -23.6640625, 171.96127319335938, -38.78388977050781, 78.4595947265625, -30.27871322631836, -14.833236694335938, 4.01671028137207, 27.33275604248047, 7.2572479248046875, 49.65736389160156, 150.83349609375, 33.682106018066406, 9.906990051269531, 87.18671417236328, 72.46476745605469, 30.261186599731445, -50.24517822265625, 15.109046936035156, 50.98381805419922, 124.3028564453125, 62.63323974609375, -1.41046142578125, -1.357187271118164, 39.736061096191406, 15.571006774902344, 72.97506713867188, -50.379642486572266, 28.34986114501953, 42.00772476196289, 64.14515686035156, 23.045547485351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000108.npy"}
{"epoch": 0.15859030837004406, "step": 109, "batch_size": 64, "mean": 41.90185546875, "std": 49.764122009277344, "min": -83.11285400390625, "p10": -10.19664306640625, "median": 35.163631439208984, "p90": 105.73259582519532, "max": 184.87228393554688, "pos_frac": 0.84375, "sample": [48.72200012207031, 8.096179962158203, 132.45069885253906, 84.85974884033203, 29.75292205810547, 53.37421798706055, -56.72483825683594, 11.459213256835938, 17.640151977539062, 49.22105407714844, 128.97872924804688, 42.5666618347168, 71.72396850585938, 53.69969177246094, 53.968536376953125, 5.713197708129883, 21.904659271240234, 36.936073303222656, -12.727592468261719, -5.198368072509766, -6.224235534667969, 184.87228393554688, 0.5435256958007812, -10.283966064453125, 25.211305618286133, 4.607086181640625, 124.7783203125, 30.60740852355957, 31.776601791381836, -37.7994499206543, 63.02772521972656, 67.49427795410156, 9.133359909057617, 68.73312377929688, 56.732452392578125, 38.093223571777344, 96.84596252441406, 70.8803939819336, 106.77678680419922, 102.74202728271484, 0.6150741577148438, 12.477401733398438, -83.11285400390625, 28.86560821533203, 1.9951171875, 71.77265167236328, 31.9034423828125, 33.587406158447266, 19.621187210083008, 81.18055725097656, -47.615386962890625, 33.9407958984375, 36.70383834838867, 108.64497375488281, 126.02165222167969, 35.36614227294922, 86.21874237060547, 34.96112060546875, 89.63337707519531, 103.29615020751953, -9.992889404296875, -22.870330810546875, 2.276670455932617, 101.26334381103516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000109.npy"}
{"epoch": 0.16005873715124816, "step": 110, "batch_size": 64, "mean": 37.83190155029297, "std": 53.00170135498047, "min": -96.42166137695312, "p10": -14.197290802001952, "median": 32.8746976852417, "p90": 109.7043869018555, "max": 179.51242065429688, "pos_frac": 0.828125, "sample": [118.45149230957031, 54.009857177734375, 21.106319427490234, 52.90692138671875, 57.269065856933594, 36.71941375732422, 48.68010330200195, 1.8615760803222656, -96.42166137695312, 44.66719055175781, 66.62448120117188, 16.448455810546875, -62.64414978027344, 18.293519973754883, 11.75533676147461, 0.9408435821533203, 30.2410831451416, 167.05230712890625, 102.38404846191406, 26.7703857421875, 4.427587509155273, -8.335071563720703, 179.51242065429688, 134.18161010742188, 99.45707702636719, 77.14976501464844, 17.720298767089844, -16.799148559570312, -13.538322448730469, 59.262176513671875, -42.64286804199219, -14.479705810546875, 9.873926162719727, 18.799625396728516, 5.884189605712891, 155.96646118164062, 53.26930236816406, 83.54557800292969, 112.8416748046875, 4.371131896972656, 76.039794921875, -8.018546104431152, 38.23687744140625, 36.52418899536133, 14.368099212646484, 11.611358642578125, 79.77227020263672, -58.06060791015625, 75.4232177734375, -24.850692749023438, 38.47187042236328, 1.6371822357177734, 54.86815643310547, 35.5083122253418, 50.31056594848633, 43.666290283203125, 19.476821899414062, 19.941864013671875, 18.335308074951172, 44.900428771972656, 77.89622497558594, 2.260082244873047, -4.214061737060547, 139.55230712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000110.npy"}
{"epoch": 0.16152716593245228, "step": 111, "batch_size": 64, "mean": 42.87803649902344, "std": 58.91316223144531, "min": -84.30311584472656, "p10": -31.93306274414062, "median": 32.723388671875, "p90": 118.4962600708008, "max": 193.18728637695312, "pos_frac": 0.765625, "sample": [193.18728637695312, -50.7633056640625, -83.49380493164062, 120.31512451171875, -6.72796630859375, 53.682708740234375, 13.587135314941406, 33.00440216064453, 26.822120666503906, 145.09400939941406, 31.994415283203125, 77.33377075195312, 42.79176330566406, 55.174407958984375, 28.405776977539062, 109.59747314453125, 110.4435043334961, -34.28338623046875, 97.90199279785156, 7.708234786987305, 79.26116180419922, -26.448974609375, -20.57418441772461, -9.452392578125, 64.70280456542969, 114.25224304199219, -6.599601745605469, 112.06776428222656, -34.55335998535156, 10.935195922851562, -23.689102172851562, 32.44237518310547, 132.53952026367188, 22.74262237548828, 148.21694946289062, 55.52473449707031, 29.63187026977539, 54.6786003112793, 66.96337127685547, 129.57958984375, 23.532546997070312, -41.06292724609375, 19.21302032470703, 28.692562103271484, 134.55288696289062, 69.19819641113281, 28.269180297851562, 59.51594543457031, 112.42466735839844, 2.2336978912353516, 111.73689270019531, 37.81394958496094, 92.5108413696289, 68.78631591796875, 49.92405700683594, 13.750482559204102, -84.30311584472656, 31.20539093017578, 27.819448471069336, 40.771697998046875, -5.256217956542969, 92.05010986328125, -62.33805847167969, -10.844276428222656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000111.npy"}
{"epoch": 0.16299559471365638, "step": 112, "batch_size": 64, "mean": 27.452232360839844, "std": 40.44499588012695, "min": -58.714088439941406, "p10": -24.229346466064452, "median": 24.879663467407227, "p90": 91.08398590087893, "max": 120.95355224609375, "pos_frac": 0.71875, "sample": [42.495121002197266, 120.95355224609375, 85.29252624511719, -6.631557464599609, -42.830753326416016, 32.58982849121094, 37.18614196777344, 31.892364501953125, 28.116897583007812, -5.802085876464844, 99.810791015625, 78.80447387695312, -0.2194538116455078, -2.9227371215820312, 93.5660400390625, 61.7071418762207, 38.70249557495117, 57.037689208984375, 41.1099967956543, 37.941375732421875, 45.619537353515625, 30.15966796875, 69.4403076171875, -14.61737060546875, 24.712600708007812, 22.34828758239746, 30.501136779785156, 32.74540328979492, -24.287841796875, 17.567394256591797, 3.427804946899414, -33.16605758666992, 31.135459899902344, 107.5897445678711, 109.87078094482422, -32.04756164550781, 40.288330078125, 14.80300521850586, 71.51754760742188, -0.37737464904785156, -58.714088439941406, 95.95586395263672, 25.04672622680664, 13.172225952148438, -11.649917602539062, 19.956279754638672, 61.139892578125, -0.06998634338378906, 15.308082580566406, 97.29733276367188, 24.596939086914062, 8.40811538696289, 44.28227233886719, -40.56121826171875, 11.816377639770508, -24.092857360839844, -25.318984985351562, -9.840263366699219, 72.59619903564453, 1.221221923828125, 24.457368850708008, 5.3794708251953125, -8.615463256835938, 39.14070129394531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000112.npy"}
{"epoch": 0.1644640234948605, "step": 113, "batch_size": 64, "mean": 29.506242752075195, "std": 38.20697784423828, "min": -39.20936584472656, "p10": -15.918980216979978, "median": 31.655122756958008, "p90": 82.0059410095215, "max": 118.00658416748047, "pos_frac": 0.734375, "sample": [-32.46240234375, 75.87986755371094, -17.24001693725586, 88.8624267578125, 9.515426635742188, 38.44245147705078, 14.556259155273438, 36.411842346191406, 3.6156368255615234, -9.277664184570312, 8.733909606933594, 84.8271484375, 54.40345001220703, 110.31784057617188, 38.09900665283203, -18.564939498901367, -6.3890380859375, -9.222648620605469, 9.463714599609375, 65.70472717285156, -20.1632080078125, 47.05712890625, 79.23719787597656, -22.115997314453125, 53.46842956542969, -7.2526397705078125, 2.714048385620117, 118.00658416748047, 48.044517517089844, -39.20936584472656, 18.514564514160156, 74.31199645996094, 35.68124771118164, -10.417724609375, 37.18562316894531, 52.33599090576172, 13.759803771972656, 82.20269012451172, 20.88629150390625, -11.523757934570312, 52.17992401123047, 69.97566986083984, -12.83656120300293, 1.015237808227539, -24.298904418945312, 44.90989303588867, 35.43059539794922, 40.610565185546875, -9.898178100585938, 15.380298614501953, 112.53274536132812, -2.7665977478027344, 33.814727783203125, 32.54383850097656, 97.07801055908203, 81.54685974121094, 33.41106033325195, 62.63441467285156, -7.11143684387207, 9.735549926757812, 12.318084716796875, 14.440372467041016, 30.766407012939453, 46.5865478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000113.npy"}
{"epoch": 0.16593245227606462, "step": 114, "batch_size": 64, "mean": 47.027557373046875, "std": 64.63849639892578, "min": -41.00023651123047, "p10": -27.842127990722656, "median": 42.99824142456055, "p90": 110.94941329956059, "max": 350.599365234375, "pos_frac": 0.765625, "sample": [178.52322387695312, 89.67395782470703, 53.953453063964844, 50.480857849121094, 27.289772033691406, 116.95405578613281, 20.082273483276367, 45.640472412109375, -35.58912658691406, 67.63218688964844, -31.5628719329834, 73.6278076171875, 44.24541473388672, 20.877952575683594, -37.21166229248047, 83.41853332519531, 14.98459243774414, -31.895645141601562, 33.59767150878906, 124.13975524902344, 12.957674026489258, 54.72154235839844, -5.41845703125, 115.47753143310547, 41.03496551513672, 59.62250518798828, 34.86907196044922, 24.715389251708984, 36.839935302734375, 60.127567291259766, 64.94385528564453, -5.082756042480469, -8.941986083984375, 56.0816650390625, -25.756729125976562, 15.513677597045898, 49.313232421875, 68.90411376953125, 43.08892822265625, 100.38380432128906, 59.52431106567383, 87.68228149414062, 5.675090789794922, 49.65203094482422, 39.2242317199707, -13.529609680175781, 232.3474884033203, 6.561441421508789, 42.907554626464844, 143.18963623046875, -10.274389266967773, 72.36454772949219, -41.00023651123047, -28.735870361328125, 350.599365234375, 57.326053619384766, -4.181308746337891, 86.28054809570312, 34.38200378417969, -32.25343704223633, 26.866737365722656, 88.08810424804688, -18.905254364013672, 73.71434020996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000114.npy"}
{"epoch": 0.16740088105726872, "step": 115, "batch_size": 64, "mean": 32.295928955078125, "std": 65.49223327636719, "min": -151.76416015625, "p10": -27.20351638793945, "median": 19.100200653076172, "p90": 125.5210182189942, "max": 235.44894409179688, "pos_frac": 0.75, "sample": [0.31207275390625, -2.7215919494628906, 132.17616271972656, 58.14338684082031, -52.37744140625, -0.60479736328125, -47.59333038330078, 10.35489273071289, 12.64815902709961, 17.727752685546875, 25.170654296875, -7.263885498046875, 43.34316635131836, 57.212806701660156, 11.764001846313477, 167.08653259277344, 21.280420303344727, -7.7841644287109375, 235.44894409179688, -11.485067367553711, 73.67037200927734, 5.97955322265625, 139.625, 70.23206329345703, 170.72763061523438, -27.44061279296875, 23.680389404296875, 26.189815521240234, 7.894643783569336, 98.11860656738281, 171.9047393798828, 97.78094482421875, 0.958709716796875, -122.90023040771484, 9.38320541381836, 34.36687469482422, 58.98561096191406, 16.304100036621094, 9.706329345703125, 43.03423309326172, 27.251739501953125, 49.19001770019531, 25.809188842773438, 30.300697326660156, 145.52560424804688, -27.626007080078125, 21.338645935058594, 16.708778381347656, -8.066070556640625, 11.077812194824219, 20.47264862060547, 77.30644226074219, 3.2011642456054688, 14.394298553466797, -20.789072036743164, 109.99234771728516, 65.755859375, -16.161863327026367, 63.28593444824219, -151.76416015625, 107.60675048828125, -47.35327911376953, 5.091386795043945, -26.650291442871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000115.npy"}
{"epoch": 0.16886930983847284, "step": 116, "batch_size": 64, "mean": 35.76426315307617, "std": 45.75696563720703, "min": -35.92939758300781, "p10": -15.212896919250486, "median": 30.528362274169922, "p90": 89.32471313476563, "max": 211.38636779785156, "pos_frac": 0.78125, "sample": [7.482624053955078, 2.5011329650878906, -32.8023567199707, -29.87206268310547, 25.22678565979004, 44.68798828125, 62.755767822265625, 23.057655334472656, 49.01988983154297, 59.79545593261719, 30.27143096923828, -8.761697769165039, 41.60626220703125, 28.17717742919922, 9.211837768554688, -6.539548873901367, 52.834877014160156, -35.92939758300781, 82.48655700683594, 87.79756164550781, 40.10002136230469, -15.749298095703125, 23.132980346679688, 17.90338897705078, 49.112815856933594, 25.064254760742188, 76.01971435546875, 105.92782592773438, 31.73870086669922, -32.75425720214844, 121.49942016601562, 35.86924743652344, 23.097835540771484, 43.10069274902344, -13.961294174194336, -25.702293395996094, 4.391246795654297, 9.609375, -3.303346633911133, 100.41415405273438, 4.710758209228516, 130.7514190673828, 30.785293579101562, 88.14976501464844, 77.44728088378906, 34.450870513916016, 54.422096252441406, -33.18255615234375, -8.590740203857422, 81.72283172607422, 23.860576629638672, -6.13275146484375, 211.38636779785156, 65.33985137939453, 12.4849853515625, 49.912254333496094, 66.09393310546875, 62.469730377197266, -10.48454475402832, 13.807647705078125, 0.7893943786621094, 41.593536376953125, 98.77742004394531, 89.82826232910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000116.npy"}
{"epoch": 0.17033773861967694, "step": 117, "batch_size": 64, "mean": 29.33287239074707, "std": 50.608604431152344, "min": -100.14729309082031, "p10": -25.592084121704094, "median": 23.903563499450684, "p90": 86.1594192504883, "max": 186.491943359375, "pos_frac": 0.765625, "sample": [13.186748504638672, 53.01598358154297, 50.27308654785156, -16.855358123779297, 20.85799217224121, -2.7553787231445312, 19.270986557006836, 39.226051330566406, 19.801605224609375, 39.49726867675781, 43.80611038208008, 66.99923706054688, 131.38632202148438, 112.72702026367188, 16.810279846191406, 45.59236145019531, -56.31626892089844, 88.43780517578125, 4.8974151611328125, -94.92788696289062, -29.336395263671875, 8.763984680175781, 16.982213973999023, 61.461612701416016, 45.26649856567383, -12.893218994140625, -32.2060546875, 3.6599369049072266, 26.949134826660156, -13.689826965332031, 38.93342590332031, 120.119384765625, -3.7842941284179688, 4.678131103515625, 147.15875244140625, 15.004890441894531, 186.491943359375, 14.343231201171875, 30.182540893554688, 64.27962493896484, 15.497905731201172, 9.213878631591797, 7.799339294433594, -8.754730224609375, -100.14729309082031, 30.35692596435547, 80.7489013671875, 32.20099639892578, 90.27259063720703, 38.32157897949219, -45.32707214355469, -9.850481033325195, 7.401805877685547, 54.619197845458984, 80.84318542480469, 79.51887512207031, 6.299694061279297, 49.30116271972656, 71.91486358642578, 28.42584228515625, 42.309471130371094, -3.1161766052246094, -37.03205871582031, 69.18850708007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000117.npy"}
{"epoch": 0.17180616740088106, "step": 118, "batch_size": 64, "mean": 35.825435638427734, "std": 46.314544677734375, "min": -53.457733154296875, "p10": -12.20424766540527, "median": 23.826995849609375, "p90": 108.2766525268555, "max": 158.86428833007812, "pos_frac": 0.765625, "sample": [-6.9847564697265625, 20.720844268798828, 0.4651508331298828, -25.765975952148438, 20.242189407348633, 15.67026138305664, 58.785831451416016, 42.98695373535156, 46.96466064453125, 111.60249328613281, 86.35107421875, 22.94561767578125, 124.35577392578125, 30.735713958740234, -7.533805847167969, 54.51783752441406, -18.025768280029297, -13.684127807617188, 32.93339920043945, 158.86428833007812, 119.28482055664062, -8.75119400024414, -53.457733154296875, 2.6863632202148438, -39.10643005371094, 21.484464645385742, 2.4652042388916016, 33.293739318847656, 13.849842071533203, 15.143325805664062, 11.861763000488281, 74.63417053222656, 4.381439208984375, 75.38774108886719, 24.344451904296875, 48.43699645996094, 23.218482971191406, 41.86091613769531, 148.0382080078125, -2.4832000732421875, 30.366119384765625, -18.08422088623047, -1.8868255615234375, 41.474754333496094, 143.53955078125, 85.07219696044922, 114.10931396484375, 42.14283752441406, 79.0127182006836, 13.988588333129883, -0.19472503662109375, 66.52306365966797, 100.516357421875, 70.67880249023438, 50.75982666015625, 24.415802001953125, 35.46691131591797, 17.434932708740234, -0.3840293884277344, 23.309539794921875, -19.999526977539062, 82.7694091796875, 4.072441101074219, -4.9970550537109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000118.npy"}
{"epoch": 0.17327459618208516, "step": 119, "batch_size": 64, "mean": 35.49361801147461, "std": 56.217437744140625, "min": -68.71939086914062, "p10": -30.091124725341796, "median": 36.97261047363281, "p90": 96.8390823364258, "max": 210.70895385742188, "pos_frac": 0.6875, "sample": [12.789323806762695, 56.16526794433594, 38.86872863769531, 146.79620361328125, 38.6512451171875, -30.379440307617188, -7.692878723144531, 50.70892333984375, -5.300197601318359, 50.978965759277344, -9.949737548828125, -11.697776794433594, 210.70895385742188, 50.41065216064453, 18.968605041503906, 121.04464721679688, 21.772480010986328, 11.153194427490234, 182.0742645263672, 48.45191955566406, 91.77810668945312, 74.14283752441406, 75.87226104736328, -5.860164642333984, 28.69324493408203, 37.56016540527344, -34.34980773925781, 18.367385864257812, 54.870391845703125, 55.621978759765625, 0.49568939208984375, -3.4489212036132812, 99.00807189941406, 37.1546745300293, 170.67343139648438, -3.4051589965820312, 65.95738983154297, 80.42399597167969, -1.584981918334961, 27.328453063964844, -54.30781555175781, 42.491294860839844, -57.442962646484375, 129.63211059570312, 50.93414306640625, 66.05777740478516, -23.2061767578125, -31.207962036132812, 83.53601837158203, 36.79054641723633, -29.41838836669922, -68.71939086914062, -1.6962127685546875, 85.71005249023438, 17.26740264892578, 57.20083999633789, 51.544700622558594, 10.17503547668457, 64.97140502929688, -6.865819931030273, -42.57806396484375, 3.2443389892578125, 41.70112609863281, -18.04491424560547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000119.npy"}
{"epoch": 0.17474302496328928, "step": 120, "batch_size": 64, "mean": 48.66257858276367, "std": 59.5260124206543, "min": -80.05691528320312, "p10": -19.49566459655761, "median": 38.879791259765625, "p90": 135.8168411254883, "max": 191.32427978515625, "pos_frac": 0.84375, "sample": [150.0984344482422, 85.88238525390625, 32.75996398925781, 109.61524963378906, 36.67154312133789, 9.329496383666992, -60.16650390625, 101.48382568359375, 20.04180335998535, 73.69547271728516, 54.05913543701172, 160.58926391601562, 80.43921661376953, 6.273612976074219, 17.42595863342285, 19.751983642578125, 138.51702880859375, 58.35914611816406, 69.85840606689453, -41.40259552001953, -39.966552734375, 127.96928405761719, 39.77568054199219, 32.634185791015625, 41.680381774902344, 37.26702880859375, 46.3035888671875, 5.890338897705078, 164.36764526367188, -80.05691528320312, -22.622779846191406, 39.62431335449219, 26.09882354736328, 4.2706451416015625, 104.9891357421875, 10.058576583862305, 191.32427978515625, 54.02812576293945, 69.17196655273438, 3.450716018676758, 11.683391571044922, -7.533452987670898, 55.363258361816406, 184.3031005859375, 33.50306701660156, 42.8055419921875, 129.5164031982422, 172.70974731445312, 38.13526916503906, 45.728851318359375, 0.6747817993164062, 88.5505599975586, -42.16685485839844, 19.82781219482422, 63.28502655029297, 34.42033386230469, -1.7818183898925781, 112.46054077148438, 74.92900085449219, -13.287471771240234, 75.75341033935547, -22.15631866455078, 35.5364990234375, 2.6031341552734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000120.npy"}
{"epoch": 0.1762114537444934, "step": 121, "batch_size": 64, "mean": 54.022308349609375, "std": 70.0307846069336, "min": -149.16888427734375, "p10": -9.537339782714842, "median": 47.504913330078125, "p90": 149.11922912597657, "max": 219.86756896972656, "pos_frac": 0.78125, "sample": [8.927013397216797, -6.646720886230469, -2.607757568359375, 219.86756896972656, -6.1167449951171875, 9.462333679199219, 96.38340759277344, -15.890857696533203, 78.30762481689453, 76.49102020263672, -1.79571533203125, 62.922935485839844, 138.6522979736328, 110.41570281982422, 81.11660766601562, 0.23003387451171875, 89.64590454101562, 65.65792846679688, 54.354522705078125, -40.335845947265625, 35.645118713378906, 112.90219116210938, -57.63299560546875, 145.55616760253906, 168.8936004638672, 66.45506286621094, 33.98400115966797, 16.93918228149414, 111.742431640625, 4.008966445922852, 77.79200744628906, -22.90062713623047, 47.91542053222656, 83.14448547363281, 2.381143569946289, 200.114013671875, 4.51629638671875, 180.19924926757812, 44.22016906738281, 206.30599975585938, 8.236091613769531, 126.7548599243164, -10.776176452636719, 17.524734497070312, -21.752227783203125, 10.450920104980469, 75.37342834472656, 24.283065795898438, -5.6242218017578125, 55.21392822265625, 5.554872512817383, 19.782119750976562, 63.59147644042969, 130.97589111328125, 150.64625549316406, 71.98445129394531, -149.16888427734375, 57.952239990234375, -5.67462158203125, 78.77418518066406, 207.23983764648438, 19.499778747558594, -1.7318572998046875, 47.09440612792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000121.npy"}
{"epoch": 0.1776798825256975, "step": 122, "batch_size": 64, "mean": 67.32794952392578, "std": 66.2765884399414, "min": -90.13690185546875, "p10": 0.8278617858886727, "median": 52.821231842041016, "p90": 162.1453598022461, "max": 233.86724853515625, "pos_frac": 0.90625, "sample": [149.37168884277344, 21.622207641601562, 75.9307632446289, 83.59307861328125, 19.179704666137695, 26.61675262451172, 49.46311950683594, 96.06763458251953, 22.67456817626953, 115.16288757324219, 15.652351379394531, 130.01693725585938, 124.98816680908203, 233.86724853515625, 103.59888458251953, 35.351539611816406, 16.46143341064453, 54.11585235595703, 161.80421447753906, 49.993682861328125, 20.190099716186523, 140.6284942626953, 36.2274284362793, -9.45635986328125, 83.52561950683594, 11.640213012695312, 63.433250427246094, 141.21517944335938, 74.31771087646484, 65.21117401123047, 76.1354751586914, -26.068984985351562, 171.658447265625, -6.058256149291992, 52.47632598876953, 162.50518798828125, 63.921104431152344, 26.23760223388672, 175.38497924804688, 155.35745239257812, 9.469205856323242, 53.1661376953125, -32.48224639892578, 232.84400939941406, 17.407752990722656, 38.49848556518555, 32.16681671142578, 42.67489242553711, 24.92755889892578, -90.13690185546875, -6.9914703369140625, 27.700668334960938, 30.978906631469727, 66.60995483398438, 201.276123046875, 1.5823783874511719, 116.36424255371094, 54.48108673095703, 0.5044975280761719, 13.809181213378906, 162.29156494140625, 75.40797424316406, 20.21822166442871, 152.2049560546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000122.npy"}
{"epoch": 0.17914831130690162, "step": 123, "batch_size": 64, "mean": 44.054771423339844, "std": 73.77658081054688, "min": -150.53848266601562, "p10": -26.98945560455322, "median": 22.982162475585938, "p90": 143.1687057495118, "max": 235.95712280273438, "pos_frac": 0.65625, "sample": [78.35038757324219, 81.06877136230469, -8.819374084472656, 9.34628677368164, 88.48060607910156, 77.46011352539062, 121.39524841308594, 111.73101806640625, 80.44852447509766, 49.10289764404297, -0.5450611114501953, -21.284927368164062, 8.11069107055664, -41.415748596191406, 156.2360076904297, 87.53543090820312, 16.61172866821289, -12.643997192382812, -4.8573760986328125, 67.87748718261719, -6.1020050048828125, 123.53910827636719, 67.84591674804688, -76.12620544433594, 22.860877990722656, 96.6842041015625, 214.82196044921875, 23.10344696044922, 151.58139038085938, 164.98019409179688, -25.490942001342773, 64.0431900024414, 20.79178237915039, 7.763786315917969, 85.4143295288086, 120.22764587402344, 20.436195373535156, 47.681976318359375, 178.0261993408203, -20.244277954101562, 92.62721252441406, 83.11380004882812, -41.66560745239258, -67.86634826660156, 78.24888610839844, -8.835174560546875, -44.137332916259766, -5.046934127807617, -17.925071716308594, 235.95712280273438, -27.631675720214844, 65.814697265625, 51.781959533691406, 206.57965087890625, -9.671852111816406, 21.919031143188477, 121.83509826660156, 34.83704376220703, -150.53848266601562, -16.65375518798828, -1.5865936279296875, -10.744915008544922, 0.6416816711425781, 2.425291061401367], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000123.npy"}
{"epoch": 0.18061674008810572, "step": 124, "batch_size": 64, "mean": 64.96229553222656, "std": 95.20276641845703, "min": -118.80995178222656, "p10": -26.79657821655273, "median": 43.427635192871094, "p90": 177.07994079589847, "max": 377.9284362792969, "pos_frac": 0.78125, "sample": [148.08836364746094, -25.200424194335938, 41.71110534667969, 34.97039794921875, 25.814228057861328, 150.50941467285156, -97.65211486816406, -9.272476196289062, 160.0582275390625, 189.98007202148438, -3.1584396362304688, 30.75029754638672, 4.641260147094727, 69.79490661621094, 77.9240493774414, 229.95687866210938, 157.56768798828125, 52.33060836791992, -18.222259521484375, 51.17388153076172, -118.80995178222656, -40.85603332519531, 140.74856567382812, -4.138042449951172, 203.02816772460938, 131.1697540283203, 77.20277404785156, -4.36468505859375, 46.370811462402344, 120.42361450195312, -27.48064422607422, 32.80760955810547, 90.89198303222656, 14.79814338684082, 10.5411376953125, 344.72222900390625, 102.94146728515625, 179.16152954101562, 30.282455444335938, 172.222900390625, 104.86380767822266, -52.418418884277344, 17.732196807861328, -19.286331176757812, 40.05621337890625, 124.04754638671875, -103.48031616210938, 98.96784973144531, 18.16834831237793, 45.1441650390625, 39.742549896240234, 79.11890411376953, -67.62875366210938, 254.39944458007812, 22.89349365234375, 377.9284362792969, 46.707794189453125, 168.07455444335938, 1.0683746337890625, 32.54546356201172, 52.3577880859375, 78.28301239013672, 14.367759704589844, 10.503551483154297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000124.npy"}
{"epoch": 0.18208516886930984, "step": 125, "batch_size": 64, "mean": 57.670806884765625, "std": 77.84563446044922, "min": -91.95611572265625, "p10": -29.077206420898435, "median": 33.228705406188965, "p90": 164.0551177978516, "max": 251.9818115234375, "pos_frac": 0.78125, "sample": [-3.2580394744873047, 150.86631774902344, 91.48394775390625, 23.260223388671875, -14.698097229003906, 9.716075897216797, 114.07386779785156, -27.396484375, 140.758056640625, 16.36631202697754, 10.85556411743164, -29.797515869140625, 24.75690269470215, 1.0759124755859375, 101.5174789428711, 110.04786682128906, 87.32417297363281, 53.98957061767578, 18.857194900512695, 3.431537628173828, 105.60113525390625, 18.188339233398438, -55.11973571777344, -10.856842041015625, 90.6788558959961, 251.9818115234375, -35.391822814941406, 30.862340927124023, 108.19024658203125, 36.07282257080078, -4.434898376464844, -79.87454986572266, 35.067100524902344, 17.231374740600586, 30.981178283691406, 211.66294860839844, 6.967082977294922, 207.99899291992188, 120.76873779296875, -23.475936889648438, 12.003990173339844, 102.83053588867188, 189.14434814453125, 92.21150970458984, 229.2197265625, 91.46397399902344, 55.060218811035156, 47.47251510620117, 168.91897583007812, 51.814735412597656, 12.194168090820312, 126.86177062988281, 41.75889587402344, -16.865446090698242, 110.99185180664062, 27.33401107788086, -43.39814758300781, 151.7960968017578, 31.390310287475586, -91.95611572265625, 207.05665588378906, 152.70611572265625, -33.480796813964844, 28.0714054107666], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000125.npy"}
{"epoch": 0.18355359765051396, "step": 126, "batch_size": 64, "mean": 53.836448669433594, "std": 72.14073181152344, "min": -77.97016143798828, "p10": -22.35788116455078, "median": 38.011186599731445, "p90": 158.9620376586914, "max": 251.86541748046875, "pos_frac": 0.828125, "sample": [119.90411376953125, 64.87643432617188, 10.907796859741211, 88.77872467041016, 13.420791625976562, 52.34899139404297, -38.21954345703125, 2.192136764526367, 60.32823181152344, 156.92938232421875, -52.2987060546875, 1.6846275329589844, 251.86541748046875, 19.87763214111328, 86.5013198852539, 1.5753097534179688, 131.25595092773438, 41.982276916503906, 131.14694213867188, 67.094970703125, 44.45964050292969, 52.99155044555664, -7.982269287109375, 37.91312026977539, 64.88787841796875, 186.54736328125, -22.55168914794922, 159.8331756591797, 32.56342697143555, 47.74836730957031, 19.366756439208984, 25.590370178222656, 74.21783447265625, 14.851974487304688, 179.33966064453125, 38.1092529296875, 56.72837829589844, -75.68637084960938, 146.4072265625, -15.81088638305664, -61.050384521484375, -36.31837463378906, 237.60452270507812, -3.463743209838867, 27.651159286499023, 10.045360565185547, 1.4134254455566406, 20.893985748291016, 125.00161743164062, 116.55886840820312, -77.97016143798828, 190.99549865722656, 84.28475952148438, 23.126068115234375, 35.47807312011719, 95.58912658691406, 30.153564453125, 63.25835418701172, 17.147430419921875, -21.905662536621094, 14.005073547363281, 91.99147033691406, 11.041961669921875, 178.32293701171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000126.npy"}
{"epoch": 0.18502202643171806, "step": 127, "batch_size": 64, "mean": 39.496803283691406, "std": 62.4508056640625, "min": -124.76553344726562, "p10": -34.757039642333986, "median": 40.90530014038086, "p90": 116.57612228393555, "max": 182.59117126464844, "pos_frac": 0.78125, "sample": [55.345069885253906, 81.0044174194336, -9.174047470092773, 36.829811096191406, 2.74884033203125, -94.82041931152344, 151.74879455566406, 8.545562744140625, 98.36266326904297, -124.76553344726562, 8.201959609985352, 60.978843688964844, 75.05534362792969, -33.19261932373047, 18.254175186157227, 36.4964599609375, 92.82891082763672, 10.695486068725586, -122.93278503417969, 73.6542739868164, 25.92926788330078, 72.31669616699219, 125.88238525390625, 20.692489624023438, 68.92781066894531, 168.50418090820312, 21.785526275634766, 82.177978515625, -21.00708770751953, 15.339195251464844, 119.08171844482422, 77.0457992553711, 107.17951965332031, 25.27960968017578, 52.35993957519531, 146.6350860595703, -2.0668487548828125, -19.1138916015625, 20.45020294189453, -1.5613975524902344, 17.452770233154297, 115.88005828857422, 9.172233581542969, 75.80023193359375, 47.00958251953125, 64.41486358642578, 44.98078918457031, 116.87443542480469, 110.3149185180664, 52.664154052734375, -45.36241912841797, 48.494415283203125, 19.873918533325195, 182.59117126464844, -59.63105773925781, 60.61408996582031, 46.500572204589844, 92.06034851074219, 47.12040710449219, 13.5810546875, 31.402191162109375, -24.640701293945312, -35.648406982421875, -35.42750549316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000127.npy"}
{"epoch": 0.18649045521292218, "step": 128, "batch_size": 64, "mean": 41.37461853027344, "std": 73.61337280273438, "min": -171.81829833984375, "p10": -47.87363204956054, "median": 36.40241622924805, "p90": 145.2020721435547, "max": 250.60215759277344, "pos_frac": 0.6875, "sample": [46.427494049072266, 43.1114387512207, 134.2632293701172, -14.7244873046875, 90.66543579101562, -61.4578742980957, 101.09422302246094, 143.94671630859375, 36.05010223388672, 42.32557678222656, 164.9189453125, 51.566802978515625, 48.54713439941406, 36.02568054199219, -84.98448944091797, 97.53184509277344, 151.24334716796875, 4.685493469238281, -0.1740131378173828, 75.01766967773438, -71.43292236328125, -3.8356285095214844, 250.60215759277344, -171.81829833984375, 148.95169067382812, 48.71685028076172, 159.43768310546875, 145.74008178710938, 69.2773666381836, 48.64259338378906, -10.956134796142578, -57.11676025390625, 102.58824920654297, -4.0853729248046875, -18.302459716796875, 18.706619262695312, -78.72836303710938, -4.810955047607422, 34.18519592285156, 47.585208892822266, 87.70594024658203, -40.14661407470703, 54.40867614746094, -24.238269805908203, -8.582639694213867, 147.3457794189453, 32.26248550415039, 36.754730224609375, 23.175487518310547, 50.94187545776367, 21.91046905517578, -8.83125114440918, 125.961669921875, -1.3370838165283203, 13.05168342590332, 2.546051025390625, -11.336418151855469, 54.35753631591797, 101.78213500976562, 11.897918701171875, -51.185211181640625, 127.58087158203125, 141.343505859375, 1.1791458129882812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000128.npy"}
{"epoch": 0.18795888399412627, "step": 129, "batch_size": 64, "mean": 43.762454986572266, "std": 83.70062255859375, "min": -141.66180419921875, "p10": -42.321146392822264, "median": 30.899341583251953, "p90": 145.31244201660155, "max": 363.89959716796875, "pos_frac": 0.65625, "sample": [3.343973159790039, 32.218231201171875, -44.56209182739258, 110.13655090332031, -47.285133361816406, 122.00967407226562, 10.499610900878906, -42.386077880859375, 167.6837158203125, 26.080896377563477, -10.554214477539062, 105.82518005371094, 26.864604949951172, 43.23602294921875, 55.980342864990234, 72.61412811279297, 147.55531311035156, -37.748382568359375, -31.798843383789062, 54.972625732421875, 215.49603271484375, 88.80406188964844, 218.48968505859375, -94.32982635498047, 86.11973571777344, -87.47344970703125, -42.169639587402344, 38.989845275878906, 363.89959716796875, 139.99508666992188, 27.816730499267578, 16.018218994140625, -0.9134521484375, 76.40705108642578, -10.473831176757812, -16.413959503173828, 28.379287719726562, 122.29380798339844, 29.58045196533203, -14.826980590820312, -72.11573028564453, -7.422018051147461, 48.95860290527344, 79.11836242675781, 145.57870483398438, -141.66180419921875, 16.179189682006836, 19.084766387939453, -8.313562393188477, -7.046573638916016, 135.30325317382812, 169.42527770996094, 38.247528076171875, 48.047760009765625, 68.53596496582031, -16.90191650390625, 42.843505859375, 53.92144775390625, -37.41661834716797, 85.87963104248047, 144.691162109375, -30.648849487304688, 76.67069244384766, -0.5362491607666016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000129.npy"}
{"epoch": 0.1894273127753304, "step": 130, "batch_size": 64, "mean": 66.1448974609375, "std": 83.9789810180664, "min": -109.03595733642578, "p10": -20.165449523925776, "median": 46.191192626953125, "p90": 193.49052734375002, "max": 321.244384765625, "pos_frac": 0.78125, "sample": [73.54243469238281, 39.78194046020508, 31.11028289794922, 186.25152587890625, -15.876838684082031, 11.783889770507812, 200.29302978515625, 80.34416198730469, -51.37409210205078, -7.400510787963867, 15.564420700073242, 57.93839645385742, 35.49531936645508, 188.00857543945312, 202.4931640625, 21.96936798095703, 93.307861328125, -7.595512390136719, 37.232521057128906, 72.66840362548828, -10.543697357177734, 31.977676391601562, 7.183250427246094, 87.13096618652344, 92.35610961914062, -4.266853332519531, 33.8143310546875, 89.44489288330078, 58.966209411621094, 178.48101806640625, 138.44032287597656, 210.998291015625, -34.49628448486328, 27.740859985351562, 148.61575317382812, -103.96891784667969, 34.36732482910156, 29.220165252685547, 34.40388488769531, -22.00342559814453, 60.544593811035156, 52.60044479370117, 74.01825714111328, 240.1696319580078, 155.61053466796875, -7.780239105224609, 321.244384765625, -40.811607360839844, -3.886920928955078, 89.1952896118164, 33.705589294433594, 39.28950881958008, 195.83993530273438, -31.74787139892578, 146.12403869628906, -109.03595733642578, 130.28602600097656, 1.918661117553711, 120.57754516601562, 115.31028747558594, 208.194580078125, 56.12718200683594, 33.30031204223633, 59.078704833984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000130.npy"}
{"epoch": 0.19089574155653452, "step": 131, "batch_size": 64, "mean": 66.29624938964844, "std": 72.53195190429688, "min": -148.121826171875, "p10": -0.901673126220703, "median": 50.27856636047363, "p90": 158.08023986816406, "max": 261.44140625, "pos_frac": 0.875, "sample": [50.1761360168457, 45.797027587890625, 51.140281677246094, 29.80417251586914, 261.44140625, -148.121826171875, 57.804508209228516, 39.14654541015625, 33.33501434326172, 104.10987091064453, 111.32611083984375, 20.600421905517578, 58.39189147949219, 13.417055130004883, 98.16749572753906, 34.98468017578125, 40.5786247253418, -1.5269355773925781, 138.34115600585938, 72.55284118652344, 41.54020690917969, 157.8388671875, -12.798851013183594, -12.855083465576172, 50.38099670410156, 42.97602844238281, 82.47377014160156, 7.9594879150390625, 198.44601440429688, 39.70403289794922, 41.795108795166016, -80.99015808105469, 158.18368530273438, 9.995307922363281, 220.28182983398438, 4.354511260986328, 100.54511260986328, 29.969200134277344, 138.1305389404297, 47.19389343261719, 128.07196044921875, 31.384923934936523, 202.84735107421875, 26.981082916259766, -90.03488159179688, 124.36282348632812, 7.253814697265625, 105.7599105834961, -0.6912384033203125, 25.948673248291016, -0.9918594360351562, 135.5665283203125, 9.026420593261719, 177.04698181152344, 151.03602600097656, 164.84255981445312, 89.92439270019531, 64.16082763671875, 29.835220336914062, 79.04931640625, 127.82054901123047, 93.9249038696289, 100.13874816894531, 83.10435485839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000131.npy"}
{"epoch": 0.19236417033773862, "step": 132, "batch_size": 64, "mean": 54.16513442993164, "std": 86.51465606689453, "min": -153.27203369140625, "p10": -17.02062759399414, "median": 42.25692939758301, "p90": 143.92914276123048, "max": 369.04376220703125, "pos_frac": 0.8125, "sample": [14.696651458740234, 8.759418487548828, 146.06201171875, -61.68965148925781, 67.07083129882812, 36.961585998535156, 64.80401611328125, 287.3134460449219, 309.7875061035156, 8.5335693359375, 71.98910522460938, 108.5212173461914, 20.84514045715332, 75.1898193359375, -153.27203369140625, 5.001960754394531, 218.12384033203125, -36.29931640625, 66.39800262451172, -13.788579940795898, 2.5926761627197266, -12.368064880371094, 125.82015228271484, 68.64643859863281, 21.09728240966797, -18.25684356689453, 0.4541168212890625, 112.81241607666016, 7.2836761474609375, 3.55352783203125, -14.136123657226562, -12.32415771484375, 19.505022048950195, 92.07891082763672, 77.69749450683594, 144.83706665039062, 50.41717529296875, 66.37615966796875, 74.90739440917969, 49.000789642333984, 53.240386962890625, 33.22250747680664, 31.395946502685547, 141.81065368652344, -103.81703186035156, 47.55227279663086, 117.86685943603516, -66.77953338623047, 12.851016998291016, 93.49269104003906, -19.895456314086914, 57.572486877441406, 105.56626892089844, 17.15808868408203, 55.957672119140625, -12.565948486328125, 30.25121307373047, 101.91435241699219, 26.387451171875, 12.420061111450195, 177.73504638671875, 26.392730712890625, 52.79131317138672, 369.04376220703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000132.npy"}
{"epoch": 0.19383259911894274, "step": 133, "batch_size": 64, "mean": 48.25655746459961, "std": 75.76319885253906, "min": -105.94534301757812, "p10": -19.086682128906247, "median": 36.03285598754883, "p90": 145.51405944824225, "max": 250.90113830566406, "pos_frac": 0.765625, "sample": [11.494873046875, 69.71218872070312, -7.0427398681640625, 109.83273315429688, 8.54312515258789, -40.34016418457031, -7.47540283203125, 35.85259246826172, -74.84310913085938, 250.90113830566406, 52.378990173339844, 71.30196380615234, -6.873626708984375, 102.19855499267578, -78.59814453125, 17.618423461914062, 88.71565246582031, -5.13970947265625, 212.38426208496094, 4.753000259399414, 67.36102294921875, 80.17501831054688, -13.395683288574219, 130.09536743164062, 2.3604812622070312, 106.20637512207031, 46.897857666015625, 77.97621154785156, 84.62932586669922, 65.50503540039062, 11.2071533203125, 20.254852294921875, 25.741979598999023, 244.9425048828125, -1.8969268798828125, 51.694915771484375, 71.26802825927734, -67.4830322265625, 207.57421875, 36.206695556640625, 38.62028884887695, 35.85901641845703, 53.25859832763672, 241.17877197265625, -105.94534301757812, -14.509445190429688, 67.73577880859375, 178.47198486328125, 18.378707885742188, 49.23698043823242, 8.182868957519531, 117.95448303222656, -54.59150695800781, 152.1220703125, 101.60106658935547, -21.048355102539062, 66.47337341308594, 46.434844970703125, -10.043190002441406, 20.546201705932617, 7.680534362792969, 0.055332183837890625, 9.544990539550781, 18.52564239501953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000133.npy"}
{"epoch": 0.19530102790014683, "step": 134, "batch_size": 64, "mean": 51.59928894042969, "std": 67.50420379638672, "min": -43.891868591308594, "p10": -31.62993984222412, "median": 42.31174087524414, "p90": 130.68828735351565, "max": 330.7307434082031, "pos_frac": 0.828125, "sample": [15.787384033203125, -43.891868591308594, -36.709259033203125, 125.98603820800781, 198.2984619140625, 33.59003448486328, 19.989734649658203, 41.48810958862305, -33.389610290527344, 47.77906036376953, 119.45246887207031, 23.80788803100586, 12.037057876586914, 5.987571716308594, 52.163307189941406, 41.89768981933594, -27.5240421295166, 47.281158447265625, 76.69087219238281, 24.964073181152344, 6.216264724731445, 11.3809814453125, 98.10074615478516, 17.32555389404297, 167.36012268066406, 2.4098968505859375, 107.6024169921875, 95.63525390625, 53.6746940612793, 18.242393493652344, 48.710784912109375, 131.69503784179688, 2.026296615600586, 48.065216064453125, 165.81280517578125, 113.1448745727539, 128.33920288085938, -39.73412322998047, 85.79296875, -18.191471099853516, 57.27923583984375, -34.583648681640625, 65.24533081054688, 6.614690780639648, 330.7307434082031, 11.998493194580078, 82.04017639160156, 41.10979461669922, 71.81813049316406, -4.405523300170898, 66.05581665039062, -25.035789489746094, -34.35431671142578, 176.575439453125, 59.07740020751953, 3.5196075439453125, 59.983787536621094, 39.14915084838867, 42.725791931152344, 71.13385772705078, -39.9813232421875, 80.57398986816406, 31.87143325805664, 153.91629028320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000134.npy"}
{"epoch": 0.19676945668135096, "step": 135, "batch_size": 64, "mean": 73.99042510986328, "std": 94.29330444335938, "min": -87.03752136230469, "p10": -33.80209045410156, "median": 53.408809661865234, "p90": 218.49710540771488, "max": 360.47564697265625, "pos_frac": 0.796875, "sample": [28.93181610107422, 133.14767456054688, 31.514469146728516, 241.54049682617188, 123.7872314453125, 73.12828826904297, 245.7684783935547, 41.207454681396484, 242.95883178710938, 222.23854064941406, 131.6090850830078, 89.82505798339844, 190.130615234375, 128.22116088867188, 273.077880859375, 118.14096069335938, 31.133384704589844, 198.9365234375, 11.532051086425781, -38.907798767089844, 0.9807643890380859, 113.00762939453125, 157.09713745117188, 77.57559204101562, 38.26976013183594, 98.77117919921875, 3.4374237060546875, 360.47564697265625, -22.632495880126953, 112.92906188964844, -62.210235595703125, 202.36972045898438, 209.76708984375, -75.27316284179688, 39.54129409790039, -16.462902069091797, 27.10957145690918, 56.15752410888672, 127.49664306640625, -1.8249664306640625, -58.687477111816406, 54.23432922363281, 85.51119995117188, 56.70018005371094, -0.5917015075683594, 52.583290100097656, 2.867328643798828, -14.307907104492188, 47.625450134277344, 16.276107788085938, 33.95172119140625, 111.96432495117188, 93.32244873046875, 87.0701675415039, 6.81329345703125, 99.02732849121094, 42.743324279785156, 226.94857788085938, 13.524131774902344, -87.03752136230469, -29.4718017578125, -35.657928466796875, -41.29267883300781, 6.7667999267578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000135.npy"}
{"epoch": 0.19823788546255505, "step": 136, "batch_size": 64, "mean": 49.19337463378906, "std": 92.4898452758789, "min": -77.71170043945312, "p10": -46.02523689270019, "median": 28.505130767822266, "p90": 121.8586311340332, "max": 408.912353515625, "pos_frac": 0.6875, "sample": [61.5748405456543, 74.39032745361328, 102.74459838867188, 98.62091064453125, -7.115028381347656, -49.70475387573242, 295.1488037109375, 130.9999237060547, -18.59136962890625, 66.3145751953125, 102.25746154785156, 65.06619262695312, -22.926727294921875, 13.025760650634766, 75.88844299316406, 54.099822998046875, 79.68527221679688, 34.969215393066406, 114.18586730957031, 321.5898132324219, -2.29296875, 72.26190185546875, 408.912353515625, 3.0607833862304688, 115.45835876464844, -0.01485443115234375, 223.92010498046875, -7.139240264892578, 259.64813232421875, 24.66079330444336, 9.879554748535156, 2.9683685302734375, 58.49513244628906, -73.7508544921875, 54.89991760253906, -66.12907409667969, 20.21563720703125, -25.18824005126953, 89.00834655761719, 89.1658935546875, 10.420503616333008, -15.900646209716797, -14.192184448242188, 63.635719299316406, 9.240203857421875, 2.1029186248779297, -25.416934967041016, 32.34946823120117, 93.13494873046875, 121.25202178955078, -46.555442810058594, -59.797142028808594, 35.55644226074219, 79.45941925048828, -77.71170043945312, 1.7166290283203125, 70.36282348632812, -22.234752655029297, -44.788089752197266, 24.26941680908203, -3.671703338623047, 122.11860656738281, 3.8480148315429688, -61.08638000488281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000136.npy"}
{"epoch": 0.19970631424375918, "step": 137, "batch_size": 64, "mean": 77.27559661865234, "std": 113.53436279296875, "min": -157.488525390625, "p10": -27.47138290405273, "median": 50.84297180175781, "p90": 251.29396667480495, "max": 387.982421875, "pos_frac": 0.84375, "sample": [-42.33808898925781, 9.479061126708984, 73.22935485839844, 118.9801254272461, 151.87991333007812, 32.973548889160156, 47.67291259765625, 90.32206726074219, -22.925281524658203, 103.24600219726562, 3.832571029663086, 95.26972961425781, 386.78326416015625, 18.923734664916992, 38.104408264160156, -113.40586853027344, 112.71278381347656, 10.999164581298828, 36.5692138671875, 151.6620635986328, 156.17733764648438, 80.63904571533203, 2.835296630859375, 387.982421875, 110.6546630859375, 330.55987548828125, 189.4927978515625, 125.69798278808594, 18.629384994506836, 168.71246337890625, -93.71888732910156, 9.948333740234375, 68.2763900756836, 11.103569030761719, 131.24429321289062, 9.484901428222656, 126.47988891601562, 52.173667907714844, 3.5076370239257812, 50.808128356933594, 64.59728240966797, 43.46119689941406, -4.146278381347656, -157.488525390625, -45.57697296142578, -73.42081451416016, 363.8770751953125, -29.41971206665039, 38.761802673339844, 297.9104919433594, 55.43914794921875, 129.76504516601562, 3.5207347869873047, 93.63496398925781, -17.422090530395508, 27.655466079711914, 50.87781524658203, 9.392192840576172, 1.1478862762451172, 104.31865692138672, 307.6292724609375, 34.07908630371094, 124.60396575927734, 277.7801818847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000137.npy"}
{"epoch": 0.2011747430249633, "step": 138, "batch_size": 64, "mean": 58.77688217163086, "std": 72.48036193847656, "min": -113.36895751953125, "p10": -17.420133209228513, "median": 42.27101707458496, "p90": 152.15726013183598, "max": 276.49908447265625, "pos_frac": 0.78125, "sample": [159.74322509765625, -14.18670654296875, 72.62713623046875, 139.44009399414062, 6.771644592285156, 113.81885528564453, -17.944229125976562, -1.3329753875732422, 8.418487548828125, 174.9386749267578, 32.340824127197266, 20.984298706054688, 40.17964172363281, 11.280981063842773, 27.17133140563965, -113.36895751953125, 16.009363174438477, -20.990570068359375, -2.1047744750976562, -11.906455993652344, 191.237060546875, 177.47732543945312, 15.012245178222656, 9.744659423828125, 116.99505615234375, 129.3750762939453, 132.57803344726562, -3.9827346801757812, 92.3818588256836, 138.28663635253906, 58.150909423828125, 28.857282638549805, 128.4695281982422, 8.358306884765625, 138.79916381835938, 168.7644500732422, 74.19454956054688, 44.36239242553711, -68.28228759765625, 61.745277404785156, 65.43647003173828, 28.441024780273438, 89.32826232910156, 0.5788249969482422, -6.151947021484375, -54.5048828125, 99.44575500488281, 141.670654296875, 14.996334075927734, 39.688720703125, 112.30612182617188, 33.66163635253906, 134.3835906982422, -33.080345153808594, -22.261106491088867, 58.26736068725586, -16.197242736816406, 71.36654663085938, 20.026878356933594, 276.49908447265625, 104.20125579833984, 56.514991760253906, 106.03632354736328, 156.65151977539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000138.npy"}
{"epoch": 0.2026431718061674, "step": 139, "batch_size": 64, "mean": 70.17027282714844, "std": 87.16915893554688, "min": -109.48736572265625, "p10": -29.78574066162109, "median": 57.03584671020508, "p90": 166.10056457519534, "max": 372.2355651855469, "pos_frac": 0.8125, "sample": [-19.651336669921875, 157.65841674804688, 100.1812515258789, 170.99461364746094, -31.530181884765625, 66.27108001708984, 120.8639144897461, -12.111244201660156, 82.96791076660156, 145.77227783203125, -109.48736572265625, 57.77565002441406, -47.504024505615234, 271.63427734375, 140.07913208007812, 47.22887420654297, 86.53133392333984, 56.296043395996094, 33.488555908203125, 62.906253814697266, 143.27981567382812, 73.48422241210938, -72.5782241821289, 123.01726531982422, 48.06819152832031, 1.7939300537109375, 159.0737762451172, 83.77668762207031, 16.073759078979492, 39.65686798095703, 155.29893493652344, 24.813074111938477, 125.74465942382812, 116.48963165283203, 37.74903869628906, 92.03291320800781, -19.92792510986328, -2.0039234161376953, 9.25788688659668, 17.733192443847656, 79.87163543701172, 0.5948104858398438, 15.980781555175781, 150.48968505859375, 23.089988708496094, 264.9932861328125, 95.57083892822266, -25.715377807617188, 76.13140106201172, -46.86614227294922, 83.3724136352539, -45.76500701904297, 34.23963165283203, 85.44007110595703, 372.2355651855469, 39.65120315551758, 251.3206024169922, 38.172943115234375, 49.524436950683594, -39.76969909667969, 190.87594604492188, 30.889297485351562, 44.258338928222656, 169.11204528808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000139.npy"}
{"epoch": 0.20411160058737152, "step": 140, "batch_size": 64, "mean": 63.89509582519531, "std": 89.90765380859375, "min": -128.57273864746094, "p10": -49.25709457397461, "median": 65.22920989990234, "p90": 174.25756072998053, "max": 344.6651611328125, "pos_frac": 0.734375, "sample": [-25.842857360839844, 123.05000305175781, 344.6651611328125, -75.71186065673828, -13.819091796875, 123.83750915527344, 84.4466781616211, 147.4739532470703, 58.21617126464844, 162.1934356689453, 116.9503173828125, 72.12751007080078, 113.89610290527344, 41.54342269897461, 22.413467407226562, 85.56907653808594, 272.8030090332031, 12.931774139404297, 6.167736053466797, 76.21904754638672, 21.732162475585938, 66.05870819091797, 87.14035034179688, 137.28799438476562, -1.9120674133300781, -56.78550720214844, 256.03460693359375, -49.50288391113281, 36.53558349609375, 84.25886535644531, 22.857301712036133, 63.561180114746094, 8.915948867797852, 114.54135131835938, -33.255950927734375, 62.93048095703125, 92.87144470214844, 62.915916442871094, -8.28692626953125, 163.275146484375, 132.97686767578125, -33.47967529296875, 178.9643096923828, -128.57273864746094, -1.7961807250976562, 64.83851623535156, 65.61990356445312, 92.66996002197266, 190.15634155273438, 25.26715087890625, -8.450654983520508, 156.3026885986328, -13.173168182373047, 19.843521118164062, -90.13905334472656, -55.981449127197266, -74.35088348388672, 67.23655700683594, 185.48416137695312, 179.58059692382812, -48.68358612060547, 86.2785415649414, 83.61138916015625, 134.77859497070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000140.npy"}
{"epoch": 0.2055800293685756, "step": 141, "batch_size": 64, "mean": 60.35598373413086, "std": 95.6855697631836, "min": -246.20004272460938, "p10": -46.79419326782226, "median": 59.79778861999512, "p90": 183.61264038085943, "max": 298.39166259765625, "pos_frac": 0.78125, "sample": [61.21563720703125, 38.539207458496094, -15.14303207397461, 100.55327606201172, 80.05473327636719, -32.09275817871094, 105.9190673828125, 60.37633514404297, 0.7710800170898438, -8.784660339355469, 59.219242095947266, 257.30975341796875, -246.20004272460938, 49.678123474121094, -5.0782928466796875, 35.590240478515625, 0.14740562438964844, -54.13397216796875, 261.87774658203125, -63.29803466796875, -36.79778289794922, 204.0365753173828, 48.52797317504883, 84.57138061523438, 145.6392822265625, 82.87350463867188, 106.43080139160156, 3.8524551391601562, -56.660247802734375, 73.36923217773438, 111.89576721191406, 78.846923828125, 33.470420837402344, 171.272705078125, 253.72219848632812, 27.15971565246582, 117.53089141845703, 24.672273635864258, -19.77369499206543, 17.11172103881836, -24.02686309814453, 50.849143981933594, -51.85059356689453, 188.90118408203125, 4.978340148925781, 140.09759521484375, 194.8494873046875, 121.86763000488281, 11.56364631652832, -161.385986328125, 18.687217712402344, 97.24562072753906, 62.817626953125, 88.27218627929688, 111.86395263671875, 134.24859619140625, 298.39166259765625, 127.3574447631836, 1.3051471710205078, 162.20111083984375, 68.34632873535156, 102.33563232421875, -51.078369140625, 6.6718597412109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000141.npy"}
{"epoch": 0.20704845814977973, "step": 142, "batch_size": 64, "mean": 68.87380981445312, "std": 108.02769470214844, "min": -211.85919189453125, "p10": -47.24861564636229, "median": 35.30228042602539, "p90": 238.8000457763672, "max": 297.62432861328125, "pos_frac": 0.765625, "sample": [-20.76752471923828, 265.2082214355469, 33.59907531738281, 41.92504119873047, -30.69274139404297, 245.1492462158203, -26.215557098388672, -22.047842025756836, -91.99703979492188, 33.36629867553711, 137.72329711914062, 95.46499633789062, 156.80401611328125, 30.85857582092285, 34.266014099121094, 226.5522918701172, 15.95479965209961, -211.85919189453125, -155.1126708984375, 75.52753448486328, 19.505233764648438, 31.756927490234375, 17.922061920166016, 297.62432861328125, 168.80885314941406, 36.33854675292969, 146.64065551757812, 26.01181983947754, 145.7069091796875, -22.227008819580078, 106.08895874023438, 120.19844055175781, 236.95301818847656, 84.59840393066406, -1.5792388916015625, 209.27467346191406, 215.91477966308594, 17.145248413085938, 18.597875595092773, 41.139930725097656, -65.37321472167969, -54.343990325927734, 3.1690292358398438, 239.5916290283203, 68.8595962524414, 249.3000030517578, 14.502208709716797, 22.613075256347656, 70.29396057128906, -60.2296142578125, 56.70759963989258, -20.00177001953125, 6.828157424926758, -81.19171142578125, 192.0675506591797, 242.07745361328125, 26.833908081054688, 193.70587158203125, 26.16857147216797, -9.966573715209961, 125.45336151123047, 244.32052612304688, 42.21653747558594, 124.19438934326172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000142.npy"}
{"epoch": 0.20851688693098386, "step": 143, "batch_size": 64, "mean": 44.20860290527344, "std": 94.7577896118164, "min": -154.72769165039062, "p10": -56.80705184936523, "median": 28.569091796875, "p90": 146.6190368652344, "max": 337.7685546875, "pos_frac": 0.65625, "sample": [84.80064392089844, -7.83099365234375, 52.95500946044922, 25.77631378173828, 283.9988098144531, 93.06377410888672, -57.85539245605469, 100.40531158447266, 106.67410278320312, 24.396699905395508, 144.17672729492188, -83.62644958496094, -54.360923767089844, 135.22787475585938, 84.7527847290039, -126.85535430908203, -48.977638244628906, -2.5711708068847656, 105.7412109375, -5.56976318359375, 34.08076477050781, 259.3481140136719, -70.33784484863281, 99.02389526367188, 19.77233123779297, -17.701213836669922, 44.284889221191406, 147.66574096679688, 30.64856719970703, 15.417285919189453, -12.903274536132812, -48.489288330078125, -60.36366271972656, 3.5500850677490234, 133.51512145996094, 5.551513671875, 45.255210876464844, 68.7283935546875, 25.64208984375, 337.7685546875, -36.12207794189453, -11.985170364379883, 33.292823791503906, 84.10305786132812, -2.408672332763672, 57.30657958984375, 156.44290161132812, 140.24127197265625, -4.881839752197266, -52.63909912109375, -89.036376953125, 286.36944580078125, -35.50098419189453, 79.4896011352539, 64.06587219238281, 31.376245498657227, 111.56904602050781, 26.48961639404297, 179.75390625, -154.72769165039062, -10.780784606933594, 41.46534729003906, 3.1809616088867188, 17.507843017578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000143.npy"}
{"epoch": 0.20998531571218795, "step": 144, "batch_size": 64, "mean": 54.41508483886719, "std": 79.04264831542969, "min": -107.42367553710938, "p10": -34.26722450256348, "median": 44.7833366394043, "p90": 171.44002685546874, "max": 244.90689086914062, "pos_frac": 0.734375, "sample": [-34.47966003417969, 109.9692611694336, -4.475797653198242, 171.6605224609375, -102.3866195678711, -18.572784423828125, 69.7853775024414, 117.0390396118164, 44.498863220214844, 11.156330108642578, 65.02989959716797, -41.34194564819336, 76.9268798828125, 104.95558166503906, 58.37725067138672, 170.925537109375, 51.57170867919922, 204.6151123046875, 106.30242156982422, 141.32241821289062, 177.59439086914062, 192.686279296875, -23.289941787719727, -107.42367553710938, 105.7931137084961, -22.71295166015625, 48.40415954589844, -2.9422225952148438, 69.05830383300781, 22.292139053344727, -53.37322235107422, 75.2075424194336, 66.86463928222656, 15.57958984375, -47.823890686035156, 138.16278076171875, 47.546966552734375, 26.903423309326172, 24.313581466674805, 91.21378326416016, 33.42633056640625, 92.96183013916016, 244.90689086914062, 28.4915828704834, 28.58416748046875, 75.03654479980469, -33.771541595458984, 24.931732177734375, 19.022354125976562, 102.49250793457031, 122.67671966552734, 231.19969177246094, -2.5275421142578125, -30.52362060546875, -19.881141662597656, -22.928565979003906, 221.00718688964844, 15.63021469116211, 23.733793258666992, 1.7893600463867188, 45.06781005859375, 29.808387756347656, 150.24427795410156, -45.74773025512695], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000144.npy"}
{"epoch": 0.21145374449339208, "step": 145, "batch_size": 64, "mean": 57.850833892822266, "std": 81.21188354492188, "min": -119.04986572265625, "p10": -44.74619102478026, "median": 44.90338134765625, "p90": 148.4140411376953, "max": 265.99853515625, "pos_frac": 0.75, "sample": [24.00463104248047, -49.481475830078125, -97.28335571289062, 265.99853515625, 148.68740844726562, 113.71077728271484, 244.59832763671875, 1.8983879089355469, -119.04986572265625, 57.148990631103516, 5.970703125, 75.31915283203125, 144.9279327392578, -55.25053024291992, 10.7857666015625, 120.4184799194336, 52.341773986816406, 43.00611114501953, 120.78108215332031, -49.994361877441406, -11.22294807434082, 37.734161376953125, 43.60649490356445, 137.11611938476562, 89.83064270019531, -65.76864624023438, -9.594436645507812, 29.69515609741211, -8.5687255859375, 179.72946166992188, 22.26753807067871, 14.545654296875, 116.42584228515625, -33.69719314575195, -54.854217529296875, 120.53343200683594, 16.37763214111328, 22.467756271362305, 78.57070922851562, -10.071823120117188, 68.19613647460938, -6.536479949951172, 116.77220916748047, -17.310443878173828, 113.90055847167969, 196.8171844482422, 102.54383850097656, -22.912662506103516, 50.10610580444336, 124.52843475341797, 29.301860809326172, 42.367488861083984, 140.5105438232422, 156.60113525390625, 57.10116195678711, 18.87148666381836, -6.112297058105469, 67.32653045654297, 135.04429626464844, 121.18793487548828, 234.8734130859375, 46.20026779174805, 11.637374877929688, 147.77618408203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000145.npy"}
{"epoch": 0.21292217327459617, "step": 146, "batch_size": 64, "mean": 44.6298828125, "std": 89.0619125366211, "min": -239.08795166015625, "p10": -72.05926055908202, "median": 58.244956970214844, "p90": 159.42329406738284, "max": 243.17625427246094, "pos_frac": 0.671875, "sample": [34.60021209716797, -36.334007263183594, 109.15556335449219, -64.8556137084961, 18.845735549926758, 2.7386398315429688, 76.62682342529297, 64.07638549804688, 75.43032836914062, 63.48164367675781, 214.22109985351562, 97.17681121826172, 43.08574676513672, -10.148330688476562, 25.406784057617188, -8.296958923339844, 91.63689422607422, -12.0662841796875, 39.334938049316406, -17.825227737426758, -5.659708023071289, 30.051727294921875, 60.89392852783203, 120.37089538574219, -103.38706970214844, 68.79268646240234, 150.70245361328125, -239.08795166015625, -75.14653778076172, 207.521728515625, -80.33086395263672, 74.00881958007812, -4.676753997802734, 74.43954467773438, 190.19407653808594, -79.65657806396484, 47.92631530761719, 62.54151153564453, 117.65096282958984, 243.17625427246094, 6.202230453491211, -7.479118347167969, 59.70004653930664, 78.86764526367188, 58.53143310546875, 104.88741302490234, 101.68980407714844, -99.07026672363281, 57.95848083496094, -77.29512023925781, 80.89787292480469, 140.26602172851562, 163.16079711914062, 74.3213882446289, -9.080101013183594, 93.89701080322266, 35.231597900390625, -37.40953826904297, 84.78053283691406, -53.006202697753906, 228.3108367919922, -34.48457336425781, 189.88308715820312, -51.065521240234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000146.npy"}
{"epoch": 0.2143906020558003, "step": 147, "batch_size": 64, "mean": 61.472869873046875, "std": 91.58397674560547, "min": -147.00863647460938, "p10": -31.1400634765625, "median": 46.413923263549805, "p90": 196.75840454101566, "max": 298.83465576171875, "pos_frac": 0.75, "sample": [106.273681640625, -64.73309326171875, 40.647796630859375, 235.45269775390625, -31.350181579589844, -9.250823974609375, 98.10391235351562, 97.60576629638672, 50.078147888183594, -41.83655548095703, 29.293981552124023, 78.15398406982422, 59.74059295654297, 126.34884643554688, 26.575130462646484, 201.54324340820312, 116.72430419921875, 18.24538230895996, -5.226470947265625, 185.59378051757812, 160.6193389892578, -24.68201446533203, 174.8195343017578, 22.70201301574707, 278.262451171875, 46.93669891357422, 202.3181915283203, 108.13908386230469, -13.340547561645508, 45.18476104736328, 29.820636749267578, -135.1792449951172, 4.784738540649414, 115.65617370605469, 108.1939697265625, -28.04913330078125, 62.45189666748047, 298.83465576171875, 77.06625366210938, 21.425495147705078, 183.15423583984375, 14.2540283203125, -16.13017463684082, 15.8836669921875, 33.38584899902344, 59.762474060058594, -11.2623291015625, 84.44491577148438, 65.23963165283203, -30.64978790283203, 19.0911865234375, 52.6548957824707, 20.82217025756836, 175.5537567138672, -23.44232940673828, 59.37899398803711, 244.72097778320312, -35.27948760986328, 32.97312927246094, -147.00863647460938, 213.60675048828125, -58.75856018066406, 62.0281982421875, 45.89114761352539], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000147.npy"}
{"epoch": 0.21585903083700442, "step": 148, "batch_size": 64, "mean": 52.11481475830078, "std": 83.58572387695312, "min": -105.47306823730469, "p10": -29.133935546874998, "median": 35.203460693359375, "p90": 145.26886901855468, "max": 346.486572265625, "pos_frac": 0.796875, "sample": [-7.633319854736328, 52.36592102050781, 5.81268310546875, 81.67587280273438, -61.180625915527344, 100.69750213623047, 78.15132141113281, 40.400508880615234, -41.127708435058594, 199.43385314941406, 18.088829040527344, 144.72030639648438, 20.186124801635742, -16.58635711669922, 150.40652465820312, 251.7200927734375, 3.6423683166503906, 15.48333740234375, -39.985626220703125, -70.46244812011719, 134.5639190673828, 128.90499877929688, -10.085474014282227, 89.10348510742188, 96.29489135742188, 41.34965515136719, 61.592010498046875, 49.75773239135742, 108.00242614746094, 0.40062713623046875, 145.50396728515625, 33.584564208984375, 35.814476013183594, 68.35136413574219, -30.53900909423828, 48.101078033447266, 31.82970428466797, 17.284133911132812, 111.31517791748047, 57.09524154663086, 134.79122924804688, 34.592445373535156, -83.0631103515625, 46.795326232910156, 38.04930877685547, 22.456321716308594, 183.55413818359375, 50.18971252441406, -4.366752624511719, -25.855430603027344, -105.47306823730469, 38.174591064453125, 9.228139877319336, 7.483488082885742, 33.8612060546875, 6.79716682434082, 33.319190979003906, 346.486572265625, 321.360107421875, 7.67283821105957, -1.2001209259033203, 2.2793045043945312, 87.77691650390625, 6.404384613037109], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000148.npy"}
{"epoch": 0.2173274596182085, "step": 149, "batch_size": 64, "mean": 65.15960693359375, "std": 82.16009521484375, "min": -186.63270568847656, "p10": -8.316867828369139, "median": 58.25334358215332, "p90": 152.20992889404297, "max": 307.31597900390625, "pos_frac": 0.828125, "sample": [114.63289642333984, 65.216796875, 206.88504028320312, 59.085018157958984, 107.68561553955078, 68.91970825195312, 75.50936889648438, -12.136369705200195, -19.22760009765625, -9.297992706298828, 22.17793083190918, -5.616569519042969, 59.39565658569336, 44.30908203125, 59.714996337890625, -64.7286376953125, 90.66218566894531, 93.66558837890625, 45.057220458984375, 133.43734741210938, 110.34022521972656, 174.80490112304688, 113.90082550048828, 29.51166343688965, 14.606847763061523, 98.3896484375, 49.077606201171875, 125.87764739990234, 76.10614013671875, 131.718505859375, 63.13451385498047, -1.9330997467041016, 307.31597900390625, 37.402488708496094, 152.86996459960938, 25.376754760742188, 224.328857421875, 66.5268325805664, 29.06047821044922, 108.53719329833984, 24.07168960571289, 75.73924255371094, 11.692689895629883, 44.904327392578125, -1.3366317749023438, 54.62268829345703, 14.081775665283203, 57.421669006347656, -14.90167236328125, -186.63270568847656, 96.49658203125, 12.82379150390625, 2.3873748779296875, 150.6698455810547, 11.917448043823242, 278.5148620605469, 16.06282615661621, 41.68231964111328, 140.7079315185547, 266.544921875, 6.760978698730469, -6.027576446533203, 86.18138122558594, -86.4720687866211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000149.npy"}
{"epoch": 0.21879588839941264, "step": 150, "batch_size": 64, "mean": 63.47180938720703, "std": 94.86284637451172, "min": -83.58927917480469, "p10": -21.89505920410156, "median": 38.54088020324707, "p90": 170.87386016845704, "max": 418.6424560546875, "pos_frac": 0.8125, "sample": [293.2186279296875, 322.04901123046875, 18.32830047607422, 30.680130004882812, 60.61286926269531, -40.030067443847656, 185.7330322265625, 38.250892639160156, -79.83841705322266, 72.06806182861328, 418.6424560546875, -6.580015182495117, -18.317481994628906, 18.172149658203125, -8.380661010742188, 130.4802703857422, 19.358001708984375, 50.611610412597656, 48.5509033203125, 72.4264144897461, 38.830867767333984, 62.04033660888672, 148.80520629882812, 26.73956298828125, 108.84466552734375, 87.5064926147461, 191.4807586669922, 130.8261260986328, 15.486099243164062, 69.89532470703125, 14.348136901855469, -48.974647521972656, 84.1410140991211, 173.3190460205078, 67.95111083984375, 7.794502258300781, 165.16842651367188, 21.888593673706055, 143.05474853515625, -23.428306579589844, 92.77201843261719, -40.607215881347656, -40.7701416015625, 5.937225341796875, 118.48992156982422, 3.7037391662597656, 26.867149353027344, 63.97875213623047, 15.214500427246094, 39.224090576171875, 56.14131164550781, 69.24310302734375, 64.43132019042969, 19.097137451171875, 33.001853942871094, 349.72564697265625, 13.207128524780273, 16.510812759399414, 9.943628311157227, 32.0328369140625, 97.25977325439453, -10.909744262695312, -83.58927917480469, -0.46396636962890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000150.npy"}
{"epoch": 0.22026431718061673, "step": 151, "batch_size": 64, "mean": 63.88605499267578, "std": 79.71385955810547, "min": -146.40765380859375, "p10": -27.99817848205566, "median": 55.997718811035156, "p90": 181.56038208007814, "max": 249.3121337890625, "pos_frac": 0.828125, "sample": [-146.40765380859375, 221.0986785888672, 44.24147033691406, -2.3449325561523438, 40.25122833251953, 56.64768981933594, 65.78724670410156, 112.63809204101562, 71.70445251464844, 28.265243530273438, 180.78091430664062, -42.693359375, -29.061180114746094, 105.89137268066406, 55.410919189453125, -43.792118072509766, -6.115879058837891, 41.98082733154297, 57.22975158691406, 50.536537170410156, 231.54156494140625, 56.67561721801758, 143.67149353027344, 55.545753479003906, -52.479225158691406, 177.72247314453125, 24.950424194335938, 5.885040283203125, -25.517841339111328, 23.271194458007812, 37.06462097167969, 12.927129745483398, 19.89072036743164, 9.612321853637695, 109.51519775390625, 181.89443969726562, -62.728485107421875, 63.91344451904297, 105.54256439208984, 98.92013549804688, 8.090383529663086, 59.18054962158203, 96.27629089355469, 81.31095886230469, 12.141525268554688, 193.4599609375, 104.65089416503906, 56.449684143066406, 120.22158813476562, -6.009025573730469, 210.4626922607422, 9.323097229003906, 3.3878040313720703, 139.52749633789062, 14.629440307617188, 14.182975769042969, 201.35842895507812, 111.24303436279297, 249.3121337890625, 175.8504638671875, -50.27668762207031, 75.93717956542969, 22.719528198242188, 105.40901184082031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000151.npy"}
{"epoch": 0.22173274596182085, "step": 152, "batch_size": 64, "mean": 77.40455627441406, "std": 94.28097534179688, "min": -89.18795776367188, "p10": -37.7243927001953, "median": 60.74921989440918, "p90": 214.30006561279302, "max": 314.8253173828125, "pos_frac": 0.796875, "sample": [30.776901245117188, 53.87877655029297, 161.41387939453125, 174.63194274902344, 74.84439086914062, 204.499755859375, 60.41489791870117, 140.22576904296875, 314.8253173828125, 35.72113037109375, 153.04824829101562, 129.4169158935547, 37.45321273803711, -66.89177703857422, -3.9911651611328125, 227.72113037109375, 139.85702514648438, 49.674713134765625, 67.10342407226562, 62.22206115722656, 56.78160095214844, 52.66063690185547, -46.80510711669922, 73.33633422851562, 2.241374969482422, 309.70257568359375, 129.66513061523438, 224.70899963378906, 43.22874450683594, 21.227256774902344, 65.16950988769531, -53.242095947265625, 22.424983978271484, -7.6202392578125, 2.269502639770508, 61.08354187011719, 18.55425262451172, 193.12896728515625, 31.925819396972656, 45.86907958984375, 280.0369567871094, -24.687965393066406, 54.24091339111328, 203.0565185546875, 67.93863677978516, -30.52899169921875, 239.37709045410156, 33.15251159667969, -74.59317779541016, 107.92375183105469, 5.064542770385742, 142.24757385253906, 88.20010375976562, 71.04888916015625, 218.5001983642578, -44.19940948486328, 160.29373168945312, -89.18795776367188, -6.875312805175781, 116.30381774902344, -40.808135986328125, -29.841575622558594, 133.4400634765625, 80.63113403320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000152.npy"}
{"epoch": 0.22320117474302498, "step": 153, "batch_size": 64, "mean": 65.91687774658203, "std": 100.3318862915039, "min": -175.88458251953125, "p10": -34.90672378540039, "median": 53.84783935546875, "p90": 194.97297363281254, "max": 410.70330810546875, "pos_frac": 0.75, "sample": [84.38493347167969, -1.9478912353515625, 9.605255126953125, -5.471824645996094, 21.307720184326172, -7.979949951171875, 27.889244079589844, 163.66452026367188, -38.5904541015625, 184.2993621826172, 92.33993530273438, 9.030708312988281, -39.874698638916016, 81.26820373535156, 241.2956085205078, -67.51117706298828, 76.54588317871094, 16.729272842407227, 55.04595947265625, 103.18819427490234, 52.64971923828125, 410.70330810546875, 38.18329620361328, 63.540565490722656, 81.6961669921875, 183.0355224609375, -27.61243438720703, -41.634483337402344, 199.54737854003906, 202.46621704101562, 140.1328887939453, 39.231285095214844, 236.197998046875, 6.241159439086914, 2.0634899139404297, 1.1024017333984375, 165.26580810546875, 94.30821990966797, -33.96818161010742, -35.308956146240234, 122.52481079101562, 14.06695556640625, 12.639297485351562, 263.57208251953125, 21.870948791503906, -15.595504760742188, -59.794830322265625, 63.978912353515625, 8.274940490722656, -175.88458251953125, 89.04908752441406, 353.1943359375, 74.91964721679688, -31.09807586669922, 67.59424591064453, -4.1974639892578125, 95.97315216064453, 44.00959777832031, 132.370849609375, 134.8035888671875, 75.61236572265625, -20.315467834472656, 62.37866973876953, 105.67245483398438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000153.npy"}
{"epoch": 0.22466960352422907, "step": 154, "batch_size": 64, "mean": 63.05192947387695, "std": 105.05284881591797, "min": -200.57623291015625, "p10": -53.813185119628905, "median": 54.552215576171875, "p90": 191.7267272949219, "max": 298.9565734863281, "pos_frac": 0.65625, "sample": [-10.916601181030273, 55.067726135253906, -200.57623291015625, 23.273767471313477, 298.9565734863281, 54.036705017089844, -9.270109176635742, 146.48724365234375, 175.90521240234375, -53.83151626586914, 92.21576690673828, 230.9051055908203, -86.74845886230469, 124.38713073730469, 117.05819702148438, 56.77096939086914, 2.3943614959716797, 20.671630859375, 36.09945297241211, -4.192560195922852, 109.94111633300781, 58.22068405151367, -5.649333953857422, 63.39921951293945, -28.18609619140625, 185.85733032226562, -53.77041244506836, 206.6873779296875, 151.16116333007812, -88.31611633300781, -33.362709045410156, 178.95208740234375, -80.09441375732422, 169.99005126953125, -49.87754821777344, 196.31900024414062, 294.3086853027344, 173.10128784179688, -4.125823974609375, 176.06040954589844, 192.89271545410156, 114.67868041992188, 18.049724578857422, 81.26406860351562, 189.00608825683594, -12.525543212890625, -16.982620239257812, -4.418039321899414, -91.08818817138672, 113.88459777832031, 155.16249084472656, -21.471923828125, 3.1725597381591797, 29.3994140625, 6.5364227294921875, -77.23643493652344, 108.30978393554688, 139.28109741210938, 119.54877471923828, -52.179443359375, -8.10821533203125, 1.1225872039794922, 81.15821838378906, 276.5565185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000154.npy"}
{"epoch": 0.2261380323054332, "step": 155, "batch_size": 64, "mean": 63.900962829589844, "std": 94.6701431274414, "min": -210.8380584716797, "p10": -22.153705787658687, "median": 61.44193649291992, "p90": 167.96864624023445, "max": 358.04290771484375, "pos_frac": 0.75, "sample": [-7.8691864013671875, 91.06450653076172, 55.9879150390625, -6.16845703125, 94.57026672363281, 14.619964599609375, 78.7343521118164, -26.187545776367188, 75.88190460205078, 22.130020141601562, -7.59417724609375, 62.114524841308594, 101.1413345336914, -79.031005859375, 107.26961517333984, 73.24211883544922, -24.20156478881836, 47.10749053955078, 60.76934814453125, 0.515289306640625, 76.72052001953125, 66.00920104980469, 42.99165344238281, 358.04290771484375, 185.91749572753906, 8.756940841674805, 135.45758056640625, 104.4605941772461, 9.271621704101562, -119.56999206542969, 22.845394134521484, -2.2329559326171875, 23.037322998046875, 91.615478515625, 189.06143188476562, -210.8380584716797, 276.49822998046875, 44.505287170410156, 129.55165100097656, -16.85985565185547, 1.1400718688964844, 93.57225799560547, 149.266845703125, 43.214874267578125, 283.4810485839844, 94.83193969726562, 283.436767578125, 147.93707275390625, -76.14779663085938, 97.438232421875, -7.6178131103515625, 175.98370361328125, -10.87143325805664, 110.13443756103516, 88.3212890625, -48.60723114013672, 59.82440185546875, 130.68771362304688, 122.9209213256836, 10.95907974243164, 137.65309143066406, -2.480978012084961, -17.375368118286133, 72.61936950683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000155.npy"}
{"epoch": 0.2276064610866373, "step": 156, "batch_size": 64, "mean": 36.46794128417969, "std": 107.2270736694336, "min": -277.05517578125, "p10": -103.93064880371092, "median": 25.446046829223633, "p90": 135.71790161132813, "max": 284.664794921875, "pos_frac": 0.734375, "sample": [128.1805419921875, 57.89234161376953, 284.664794921875, 249.5410614013672, 171.35276794433594, 111.26934814453125, -196.51467895507812, 2.27923583984375, -89.675048828125, 20.856307983398438, -199.9597930908203, 132.0955810546875, 95.1369857788086, -23.5479736328125, -63.327640533447266, -138.01748657226562, 38.75115966796875, 108.12985229492188, 255.3667755126953, 75.82579040527344, 20.0850830078125, 123.28793334960938, 48.3961181640625, 21.067745208740234, 4.292764663696289, -7.415580749511719, 15.749122619628906, 29.84221649169922, 250.14300537109375, -18.933883666992188, 242.40289306640625, 137.27032470703125, 120.22373962402344, 66.63674926757812, 9.508209228515625, 7.790914535522461, 73.31468963623047, -119.83920288085938, -277.05517578125, -78.78369903564453, 81.54358673095703, -52.118797302246094, 3.2586517333984375, 127.08143615722656, 10.650527954101562, -110.04019165039062, 81.51676940917969, 120.27155303955078, 77.59602355957031, 10.815652847290039, -3.6816883087158203, 77.01018524169922, 58.98326873779297, 53.558815002441406, -111.6755599975586, -46.09684753417969, 10.573989868164062, 25.051162719726562, 7.1846160888671875, -1.1096954345703125, 25.840930938720703, 74.97174835205078, 6.622859954833984, 117.85519409179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000156.npy"}
{"epoch": 0.2290748898678414, "step": 157, "batch_size": 64, "mean": 81.8030776977539, "std": 105.34732055664062, "min": -299.86138916015625, "p10": -28.51651439666748, "median": 72.6314468383789, "p90": 201.25445861816408, "max": 380.6551513671875, "pos_frac": 0.828125, "sample": [159.73434448242188, 166.65628051757812, 87.87677764892578, 194.62791442871094, 131.31402587890625, 9.337890625, 75.99356079101562, 150.0084228515625, 3.5249099731445312, 157.4693603515625, 57.19856643676758, 216.19046020507812, 50.48583984375, 87.88044738769531, 223.14404296875, 204.0944061279297, 149.60198974609375, 103.77110290527344, 41.44544982910156, 160.58181762695312, 13.300941467285156, 36.715576171875, 69.26933288574219, -29.354312896728516, 112.06503295898438, 252.53443908691406, -4.509178161621094, 31.246925354003906, -2.521392822265625, 118.89097595214844, -299.86138916015625, -37.510169982910156, 27.180740356445312, 50.439109802246094, 125.40913391113281, 80.70724487304688, 250.15206909179688, 131.64102172851562, 104.84870910644531, -31.47247886657715, 26.656940460205078, 283.30303955078125, 35.057044982910156, 59.80146789550781, 9.121173858642578, 177.48463439941406, 107.47868347167969, -26.5616512298584, 178.38568115234375, 124.89376831054688, 183.79623413085938, 52.95591735839844, -54.81620407104492, 21.4095458984375, 183.64443969726562, 152.19570922851562, 380.6551513671875, 55.348487854003906, 50.29319763183594, -140.94403076171875, 17.721466064453125, -64.97724914550781, 13.027223587036133, -20.643341064453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000157.npy"}
{"epoch": 0.2305433186490455, "step": 158, "batch_size": 64, "mean": 87.10411071777344, "std": 131.95655822753906, "min": -169.78067016601562, "p10": -43.38369026184081, "median": 51.796875, "p90": 277.84151916503913, "max": 486.9445495605469, "pos_frac": 0.71875, "sample": [9.666500091552734, 11.120046615600586, 44.471336364746094, 52.04387664794922, 51.30345916748047, -29.031402587890625, 32.16090393066406, 83.51569366455078, 37.34776306152344, -46.59078598022461, 213.67990112304688, -113.91900634765625, 253.5044708251953, 80.47962188720703, -12.810134887695312, -55.46552276611328, -15.618772506713867, 109.20399475097656, -8.740652084350586, 15.416641235351562, 229.62835693359375, -35.90046691894531, 284.047119140625, -169.78067016601562, 302.15069580078125, 73.52234649658203, 141.56475830078125, 20.47418785095215, 35.29100036621094, 11.063236236572266, 82.70840454101562, 87.37667083740234, -18.730506896972656, 150.95916748046875, -54.18984603881836, -1.9693756103515625, 336.77716064453125, 102.29254150390625, -116.47994232177734, 334.5081787109375, 417.9927978515625, -5.3508148193359375, 66.14728546142578, 486.9445495605469, 21.6571044921875, 12.857856750488281, -49.20014572143555, 119.07144165039062, 232.397705078125, 109.32158660888672, 380.00506591796875, 166.5319366455078, 98.17533874511719, 172.6070098876953, -16.275480270385742, 175.95291137695312, 100.36601257324219, -8.852701187133789, 25.530006408691406, 102.9439926147461, 263.3617858886719, 51.54987335205078, 164.07037353515625, -20.1932315826416], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000158.npy"}
{"epoch": 0.23201174743024963, "step": 159, "batch_size": 64, "mean": 59.67416000366211, "std": 105.94503784179688, "min": -208.2112274169922, "p10": -58.72546310424805, "median": 59.50743865966797, "p90": 192.63998107910157, "max": 314.3858947753906, "pos_frac": 0.75, "sample": [9.703369140625, 44.78986358642578, 141.8675994873047, -46.92625427246094, 251.23971557617188, 140.33413696289062, -208.2112274169922, -39.079402923583984, 125.16041564941406, 166.84039306640625, 91.33285522460938, -58.12882995605469, 14.266168594360352, -26.786712646484375, 88.95091247558594, -69.61035919189453, -122.65121459960938, 54.3068733215332, 188.63717651367188, 142.44961547851562, 314.3858947753906, 7.740978240966797, -44.87959289550781, 19.294021606445312, 28.678199768066406, -74.45045471191406, 20.18359375, -53.468963623046875, 87.0811767578125, 54.58961486816406, -20.79163360595703, 194.35546875, 88.74645233154297, 10.083026885986328, 120.31710052490234, -21.547903060913086, 58.42442321777344, -98.61346435546875, -4.138786315917969, 230.29898071289062, 146.23043823242188, 221.7841796875, 61.738441467285156, 71.83148193359375, 1.5124435424804688, 277.026611328125, 12.763605117797852, 1.2852458953857422, 73.62732696533203, 60.5904541015625, 135.24908447265625, 29.048667907714844, -58.981163024902344, -164.83334350585938, 69.4139175415039, 96.31523132324219, 97.35086822509766, 103.07821655273438, 28.279626846313477, 303.7242126464844, 187.67230224609375, 64.06515502929688, 70.02175903320312, 125.57794189453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000159.npy"}
{"epoch": 0.23348017621145375, "step": 160, "batch_size": 64, "mean": 79.51667785644531, "std": 129.3326873779297, "min": -150.59056091308594, "p10": -68.55548477172852, "median": 63.48670196533203, "p90": 241.99942626953126, "max": 464.6781005859375, "pos_frac": 0.703125, "sample": [106.22396087646484, -11.49165153503418, 181.75457763671875, 154.34605407714844, -12.186748504638672, -70.43228149414062, 188.81643676757812, 186.8223419189453, 169.04339599609375, 12.455547332763672, -80.19326782226562, -8.343093872070312, 138.93276977539062, 239.87033081054688, 119.00350189208984, -72.12677001953125, 39.24528121948242, -36.39170837402344, -142.29908752441406, 228.53919982910156, 284.26141357421875, -116.61308288574219, 242.91189575195312, -64.03617095947266, 141.96319580078125, 44.82960510253906, 6.555610656738281, -14.661422729492188, 464.6781005859375, 62.43214416503906, 15.376861572265625, 64.541259765625, -30.52239990234375, 387.0714111328125, 1.85296630859375, 74.0106201171875, 107.2820816040039, 163.42701721191406, 372.132080078125, 304.18402099609375, 32.67637634277344, 217.436767578125, -150.59056091308594, 203.57470703125, 94.3259048461914, 16.202850341796875, -27.403108596801758, 21.048917770385742, 111.57186126708984, 114.24870300292969, -36.996238708496094, 32.473838806152344, 112.38554382324219, -64.1762924194336, 91.74282836914062, 80.62432861328125, -9.81059455871582, 0.00299835205078125, -97.6690902709961, 154.6090087890625, 28.644447326660156, 251.82122802734375, -43.84051513671875, 112.89747619628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000160.npy"}
{"epoch": 0.23494860499265785, "step": 161, "batch_size": 64, "mean": 69.5224380493164, "std": 103.48494720458984, "min": -135.74172973632812, "p10": -48.67658233642578, "median": 49.45800971984863, "p90": 232.22120208740242, "max": 318.586669921875, "pos_frac": 0.75, "sample": [74.75606536865234, 195.85931396484375, -49.815521240234375, 131.62477111816406, 298.0411376953125, -135.74172973632812, -52.994781494140625, -66.93475341796875, 96.50230407714844, -43.66332244873047, 45.343963623046875, 93.77864837646484, 78.4654541015625, -70.47478485107422, 4.279514312744141, 118.04408264160156, 318.586669921875, 82.63313293457031, 141.1278076171875, 265.69415283203125, 15.880683898925781, 56.64133834838867, -7.304389953613281, -34.423439025878906, 37.77128601074219, -10.403018951416016, 127.77301788330078, 38.43645477294922, 277.53826904296875, 297.8089904785156, 121.91380310058594, -32.92125701904297, -0.7756614685058594, -28.27303695678711, 241.32635498046875, 117.79076385498047, 33.678802490234375, -46.01905822753906, 15.108566284179688, 80.44622802734375, -11.171493530273438, 24.28314971923828, 11.415931701660156, 32.66229248046875, 17.82190704345703, 27.592681884765625, 206.07461547851562, 73.39569854736328, 15.929603576660156, 141.61190795898438, 210.97584533691406, -105.55693054199219, 73.32206726074219, 168.79513549804688, 46.898155212402344, 41.399688720703125, 32.516239166259766, 126.49815368652344, 89.53501892089844, -89.19725036621094, 52.01786422729492, 252.9334716796875, 112.57861328125, 69.99686431884766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000161.npy"}
{"epoch": 0.23641703377386197, "step": 162, "batch_size": 64, "mean": 69.31608581542969, "std": 110.30030059814453, "min": -180.29393005371094, "p10": -41.98650283813476, "median": 60.63106918334961, "p90": 199.01107482910157, "max": 443.7065734863281, "pos_frac": 0.75, "sample": [-0.5768814086914062, 53.44204330444336, 104.45840454101562, 241.64341735839844, 56.29864501953125, 219.68771362304688, 29.902311325073242, 135.3688201904297, 59.315345764160156, -7.967077255249023, -20.331165313720703, 195.83050537109375, 69.8980941772461, -105.06688690185547, -39.08941650390625, -18.8963623046875, 160.02960205078125, 163.55404663085938, -4.877166748046875, 122.53025817871094, 82.36397552490234, 27.745216369628906, 52.23005676269531, 7.055206298828125, 21.078405380249023, 200.37417602539062, 130.69046020507812, -32.40690612792969, -180.29393005371094, 145.3821563720703, 103.16130828857422, -112.08192443847656, -14.761064529418945, 19.366735458374023, 147.44784545898438, 69.21757507324219, 231.17807006835938, 443.7065734863281, 64.03225708007812, 123.60039520263672, 85.47437286376953, 93.99647521972656, 156.0597381591797, 167.69480895996094, 144.48182678222656, 4.2658233642578125, -43.228111267089844, 0.29537200927734375, 90.49801635742188, 43.82158660888672, -89.26911926269531, 164.06201171875, 0.5667572021484375, -93.72576141357422, 384.9212646484375, 23.488876342773438, -113.5337142944336, 29.600296020507812, 76.85614013671875, 82.91742706298828, 61.94679260253906, -32.79052734375, 38.587738037109375, 215.0006103515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000162.npy"}
{"epoch": 0.23788546255506607, "step": 163, "batch_size": 64, "mean": 66.97450256347656, "std": 109.43936157226562, "min": -161.67919921875, "p10": -49.3012710571289, "median": 61.109683990478516, "p90": 253.3556823730469, "max": 347.054931640625, "pos_frac": 0.671875, "sample": [-12.139579772949219, -7.2708892822265625, 262.69366455078125, 347.054931640625, -26.960575103759766, -120.53945922851562, 98.15253448486328, -8.905471801757812, -8.764907836914062, 78.30412292480469, 130.6109619140625, -35.521034240722656, 103.51956939697266, 316.50048828125, 107.63117218017578, 18.673187255859375, 94.78533172607422, -18.498580932617188, 15.467079162597656, -62.49695587158203, 55.80207824707031, 157.17462158203125, 105.29507446289062, -37.94422149658203, 107.43460083007812, -1.5083770751953125, 121.97189331054688, -161.67919921875, -66.88643646240234, 106.24849700927734, 257.708740234375, 13.592113494873047, 136.40130615234375, 12.551763534545898, 70.1473617553711, -10.544239044189453, -52.80757141113281, 298.38067626953125, 127.6543197631836, 70.34092712402344, -71.354248046875, 33.13164520263672, -41.119903564453125, 243.19854736328125, 43.71012496948242, 104.93582153320312, -32.490028381347656, 302.29833984375, 148.3749237060547, 30.0048885345459, 61.00282287597656, 109.02130126953125, 61.24980163574219, 175.47665405273438, 61.21654510498047, -24.621170043945312, 81.4727554321289, 14.88250732421875, -95.17464447021484, 1.8885326385498047, 258.2104187011719, 91.28469848632812, 158.6983642578125, -10.559957504272461], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000163.npy"}
{"epoch": 0.2393538913362702, "step": 164, "batch_size": 64, "mean": 82.75531005859375, "std": 104.64041137695312, "min": -180.34097290039062, "p10": -21.13095531463623, "median": 68.37448120117188, "p90": 199.94809875488284, "max": 429.48455810546875, "pos_frac": 0.828125, "sample": [-137.98902893066406, 165.11090087890625, 164.32028198242188, 164.821533203125, 225.39797973632812, 53.61469268798828, 79.59831237792969, 129.63511657714844, 68.93611145019531, 76.81366729736328, 243.54449462890625, 49.24464416503906, 21.103715896606445, 50.603515625, -62.62690734863281, 128.9029541015625, 63.68816375732422, 65.11331939697266, -22.612581253051758, 67.73079681396484, 5.000089645385742, 133.0472412109375, 73.38480377197266, 194.14334106445312, 32.48271942138672, 150.52313232421875, -23.485912322998047, -88.88236236572266, 150.27880859375, 15.425775527954102, 45.44017028808594, -17.59851837158203, 149.34861755371094, 181.76348876953125, 139.33547973632812, 113.46266174316406, 55.377769470214844, 103.61851501464844, 31.17333221435547, 164.71424865722656, 39.06262969970703, 88.2462158203125, 67.81285095214844, 33.134490966796875, 45.21246337890625, 0.909820556640625, 72.90556335449219, 33.37346649169922, -180.34097290039062, 138.29417419433594, 76.72605895996094, 38.490142822265625, 334.7912292480469, 6.820343017578125, 342.12432861328125, 202.43585205078125, -1.3624420166015625, 216.0386962890625, 80.90385437011719, -55.66508483886719, -17.673828125, 104.83963012695312, 429.48455810546875, -3.7295684814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000164.npy"}
{"epoch": 0.24082232011747431, "step": 165, "batch_size": 64, "mean": 54.09429931640625, "std": 100.5995864868164, "min": -158.5150604248047, "p10": -76.75446701049805, "median": 39.637046813964844, "p90": 203.2449981689454, "max": 282.136962890625, "pos_frac": 0.703125, "sample": [92.30752563476562, 90.24571990966797, -87.13752746582031, 18.97637939453125, -93.54248046875, -2.1565628051757812, 78.1643295288086, 112.76142883300781, -14.655929565429688, -81.49610137939453, 130.85159301757812, -148.17080688476562, -75.02986145019531, 37.67186737060547, 33.80711364746094, -95.1075210571289, -8.146671295166016, -48.344757080078125, 154.817138671875, 154.48062133789062, 82.24021911621094, 24.623626708984375, 16.118961334228516, 282.136962890625, -158.5150604248047, 39.14385986328125, -18.17518424987793, -46.91563415527344, -1.642364501953125, 138.29473876953125, -77.49358367919922, 17.660734176635742, 210.05517578125, 182.89651489257812, 49.300689697265625, -25.17346954345703, 79.8660659790039, 60.3240966796875, 77.02750396728516, 4.18522834777832, 40.13023376464844, 63.840003967285156, 157.4480743408203, 78.627197265625, 217.38864135742188, -18.512969970703125, 96.89723205566406, 60.6700439453125, 7.405860900878906, -70.38616943359375, 31.534423828125, 227.3531494140625, 26.27660369873047, -42.879737854003906, 60.996219635009766, 267.147216796875, 28.715118408203125, 266.16851806640625, 223.21978759765625, 121.68754577636719, 117.31953430175781, 13.820182800292969, 113.55911254882812, 187.35458374023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000165.npy"}
{"epoch": 0.2422907488986784, "step": 166, "batch_size": 64, "mean": 99.35612487792969, "std": 140.33229064941406, "min": -92.65542602539062, "p10": -33.83179512023925, "median": 67.42060089111328, "p90": 273.2290679931642, "max": 574.2780151367188, "pos_frac": 0.796875, "sample": [453.5870361328125, 310.77484130859375, 32.235931396484375, 487.10357666015625, -37.62836456298828, 5.559429168701172, 19.91610336303711, 70.22097778320312, 171.70025634765625, 46.07806396484375, 126.59861755371094, -11.018295288085938, 10.873613357543945, 160.7397003173828, -65.97222900390625, 83.20497131347656, 159.80599975585938, 13.508331298828125, -8.513744354248047, 193.06710815429688, 3.0469608306884766, 574.2780151367188, 43.7295036315918, -50.79738998413086, -50.68121337890625, -50.879825592041016, 41.84727478027344, 146.51693725585938, -92.65542602539062, 239.68740844726562, 287.60406494140625, 469.3785705566406, 110.94709014892578, 71.57246398925781, 27.226852416992188, 368.8623046875, 11.628864288330078, 203.79385375976562, 26.5184326171875, 157.2769012451172, -19.333194732666016, 87.64891052246094, 18.458053588867188, 11.616127014160156, 168.68862915039062, 131.90354919433594, 225.5569610595703, -7.404956817626953, 77.05779266357422, 169.2545166015625, 31.67612075805664, 83.94569396972656, -62.218841552734375, 64.62022399902344, -6.669036865234375, -24.973133087158203, 113.51447296142578, 1.732208251953125, 119.61203002929688, 5.626861572265625, 93.47830200195312, 18.840856552124023, 206.81436157226562, 88.60189056396484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000166.npy"}
{"epoch": 0.24375917767988253, "step": 167, "batch_size": 64, "mean": 105.8395767211914, "std": 117.45293426513672, "min": -140.01513671875, "p10": -18.0773681640625, "median": 90.33937454223633, "p90": 260.4908264160157, "max": 431.74078369140625, "pos_frac": 0.828125, "sample": [103.54214477539062, 63.604373931884766, 146.00888061523438, 212.1926727294922, 111.94784545898438, 125.34742736816406, 95.74310302734375, 69.53184509277344, 98.54399108886719, 43.86616134643555, 431.74078369140625, 186.3111572265625, 71.85477447509766, -17.257781982421875, 23.21392822265625, 74.20515441894531, 83.56482696533203, -59.37084197998047, 131.93313598632812, 185.12753295898438, 23.93572425842285, 40.58750915527344, 79.21315002441406, 58.45716857910156, 105.42430114746094, -140.01513671875, 278.5299072265625, 246.53372192382812, -18.428619384765625, 173.0789794921875, -11.458833694458008, 107.09854125976562, 78.95458984375, 70.29521942138672, 376.535888671875, 175.73333740234375, 270.1706848144531, 84.9356460571289, 206.0802459716797, -126.86300659179688, 232.9504852294922, 266.4724426269531, 334.9249572753906, 149.56399536132812, 408.25177001953125, -14.178970336914062, 181.84288024902344, 149.2381591796875, 219.734619140625, 175.47970581054688, 104.37153625488281, 13.001205444335938, -2.3576488494873047, 31.932723999023438, 66.02880096435547, 136.64114379882812, 105.1851577758789, 58.744850158691406, 22.060012817382812, -100.50450897216797, 24.277759552001953, 14.374399185180664, -44.94239807128906, -19.810272216796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000167.npy"}
{"epoch": 0.24522760646108663, "step": 168, "batch_size": 64, "mean": 76.53134155273438, "std": 105.47482299804688, "min": -85.82365417480469, "p10": -51.78319282531738, "median": 54.31381034851074, "p90": 198.0081970214844, "max": 427.7503662109375, "pos_frac": 0.796875, "sample": [16.631210327148438, 112.81270599365234, 27.252967834472656, 20.061603546142578, 5.661623001098633, 33.37119674682617, 138.20468139648438, -47.32015609741211, 19.04141616821289, 11.744300842285156, -85.82365417480469, 189.3270721435547, 427.7503662109375, 94.99845886230469, -53.6959228515625, 195.7357177734375, 180.37637329101562, 238.41734313964844, 198.98211669921875, 48.83885955810547, 219.10684204101562, 273.06622314453125, 178.77212524414062, 175.00125122070312, -46.588722229003906, -57.79411315917969, -17.563560485839844, 128.42799377441406, 148.7082977294922, 137.30960083007812, 84.78739929199219, 27.23992919921875, -63.32215118408203, 99.03366088867188, 38.82093811035156, 116.73513793945312, -70.76715087890625, -10.645687103271484, -31.976154327392578, 30.416061401367188, 29.028261184692383, 89.56267547607422, 65.21279907226562, 94.77159118652344, 114.32249450683594, 69.50200653076172, 27.741300582885742, -61.20899200439453, 268.5356750488281, 56.62177276611328, 159.9774932861328, 22.17560386657715, 27.161142349243164, 26.523208618164062, 127.04895782470703, 59.38484191894531, 28.25459098815918, -83.38253021240234, 52.0058479309082, -32.178436279296875, 378.4818420410156, 16.60137176513672, 94.09054565429688, 136.63589477539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000168.npy"}
{"epoch": 0.24669603524229075, "step": 169, "batch_size": 64, "mean": 66.98680114746094, "std": 130.0533447265625, "min": -276.7462158203125, "p10": -67.96896743774413, "median": 43.72690773010254, "p90": 221.20569610595706, "max": 563.3004150390625, "pos_frac": 0.75, "sample": [76.28128814697266, 86.99462890625, 4.952724456787109, 17.578529357910156, 215.97166442871094, 112.80123138427734, 18.830490112304688, 159.43190002441406, 90.59236907958984, 38.00875473022461, -0.8367195129394531, 44.527915954589844, -276.7462158203125, 1.314706802368164, 58.24154281616211, -24.818710327148438, 28.230804443359375, -84.65406799316406, 86.48318481445312, 165.82989501953125, 149.34901428222656, -55.557708740234375, 65.5216064453125, -88.46073913574219, 208.51512145996094, 223.4488525390625, 68.10806274414062, 5.7250518798828125, -16.893814086914062, 30.068214416503906, 215.04144287109375, -7.967567443847656, 19.59752082824707, 70.1307601928711, 91.57955932617188, 128.32516479492188, 253.27706909179688, 25.91548728942871, 66.81643676757812, 230.97227478027344, 5.442205429077148, 388.2625732421875, -124.41178131103516, 92.11700439453125, 3.7693252563476562, 54.941322326660156, 29.811447143554688, -73.28807830810547, 337.54559326171875, -16.16912078857422, 563.3004150390625, -3.9211273193359375, 93.28447723388672, 15.143287658691406, 287.48779296875, -9.5174560546875, 82.91331481933594, 18.730422973632812, 42.925899505615234, -25.39525604248047, -122.93299865722656, 92.42362976074219, 196.0880889892578, -143.92306518554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000169.npy"}
{"epoch": 0.24816446402349487, "step": 170, "batch_size": 64, "mean": 79.06198120117188, "std": 106.79735565185547, "min": -225.87057495117188, "p10": -32.792518615722656, "median": 64.7021255493164, "p90": 219.3711151123047, "max": 327.4330139160156, "pos_frac": 0.828125, "sample": [187.76934814453125, -136.03488159179688, 140.53826904296875, 82.0031509399414, 217.99920654296875, 80.44866943359375, 121.60330200195312, 12.619163513183594, 142.69635009765625, 5.2996368408203125, 91.69849395751953, -2.5084190368652344, 74.23194885253906, -37.69898986816406, 9.59501838684082, -49.05662536621094, 115.15007781982422, 234.84414672851562, 17.499435424804688, 304.93218994140625, 4.935020446777344, -21.161001205444336, 176.82261657714844, 48.3819694519043, 2.1078414916992188, 18.24140167236328, -225.87057495117188, -32.39026641845703, 69.94903564453125, 197.94692993164062, 70.17637634277344, 126.78636169433594, 55.260074615478516, 76.29341125488281, 178.92825317382812, -50.280738830566406, 313.1411437988281, 6.115394592285156, 10.625608444213867, 327.4330139160156, 74.8183364868164, -95.9468994140625, 58.19780349731445, 121.36819458007812, 202.38232421875, -32.96491241455078, 26.52397346496582, 1.389841079711914, 38.55199432373047, 59.45521545410156, 89.42154693603516, 45.647544860839844, 234.72488403320312, 42.940731048583984, 133.59674072265625, 54.864776611328125, 186.5785369873047, 219.95907592773438, -18.083667755126953, 191.15762329101562, 232.24740600585938, 12.712594985961914, 193.30288696289062, 20.049114227294922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000170.npy"}
{"epoch": 0.24963289280469897, "step": 171, "batch_size": 64, "mean": 63.538963317871094, "std": 133.1363983154297, "min": -267.56365966796875, "p10": -89.32990493774413, "median": 59.53072738647461, "p90": 213.01500244140627, "max": 581.0468139648438, "pos_frac": 0.703125, "sample": [-82.10015869140625, 222.3173065185547, 16.65361785888672, 92.63883972167969, -102.77147674560547, -49.475341796875, 6.640838623046875, 106.72259521484375, -78.56488037109375, 135.82302856445312, 12.327411651611328, 155.60372924804688, 85.97807312011719, 110.23811340332031, 52.59520721435547, -77.10575103759766, 98.70606994628906, -99.51478576660156, 110.04275512695312, -6.794654846191406, 205.4569091796875, 99.30841064453125, 51.955780029296875, 87.5998306274414, 326.1554870605469, 68.42472839355469, -92.7939224243164, 581.0468139648438, -92.4283676147461, 48.84678649902344, 67.95701599121094, -267.56365966796875, 420.45977783203125, 142.83706665039062, 85.69297790527344, -119.84259033203125, 208.57809448242188, 104.66943359375, 11.300491333007812, 300.20159912109375, 3.55645751953125, 67.26126861572266, 214.91653442382812, -25.25611686706543, -94.33118438720703, 13.81536865234375, 66.46624755859375, 86.53480529785156, -59.20852279663086, 104.05705261230469, -25.43260955810547, -57.92237854003906, 135.2867431640625, 42.76349639892578, 79.37380981445312, -11.010560989379883, 34.09979248046875, 155.21939086914062, -20.866783142089844, -9.452056884765625, 88.22936248779297, 16.86541748046875, 265.27935791015625, 48.42551803588867], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000171.npy"}
{"epoch": 0.2511013215859031, "step": 172, "batch_size": 64, "mean": 67.64393615722656, "std": 111.52900695800781, "min": -197.2257080078125, "p10": -69.25734634399413, "median": 73.9396858215332, "p90": 193.76623382568363, "max": 459.6619873046875, "pos_frac": 0.703125, "sample": [-41.272926330566406, 101.99439239501953, -106.01052856445312, 90.34697723388672, 36.34820556640625, -46.600669860839844, 148.54995727539062, 100.84759521484375, 74.70928192138672, -74.6277084350586, -123.74559020996094, 51.75046920776367, 50.11870574951172, -78.87652587890625, 149.73623657226562, 88.28390502929688, 20.207012176513672, 69.11410522460938, -46.986080169677734, 67.61255645751953, -32.664283752441406, 95.67498779296875, 184.7940673828125, 145.237548828125, 20.055091857910156, 73.17008972167969, 230.66848754882812, 196.6287078857422, -75.62979125976562, 247.6201171875, -27.933609008789062, 111.30281829833984, 101.31836700439453, 91.29217529296875, 119.34722900390625, 303.8686218261719, 459.6619873046875, 43.22925567626953, 119.39376068115234, 31.543468475341797, -19.259536743164062, -44.79151153564453, 77.51162719726562, 78.16468048095703, 187.08712768554688, -25.869415283203125, 13.345273971557617, 23.350698471069336, -30.338394165039062, 170.55740356445312, 158.03135681152344, -4.66632080078125, -56.72650146484375, 103.17256164550781, 107.8042221069336, -197.2257080078125, 66.88897705078125, 100.41046142578125, 230.69769287109375, 93.52978515625, -17.220252990722656, -78.66239166259766, 242.10928344726562, 181.23251342773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000172.npy"}
{"epoch": 0.2525697503671072, "step": 173, "batch_size": 64, "mean": 66.7756118774414, "std": 96.24974822998047, "min": -184.03781127929688, "p10": -25.491563796997063, "median": 50.05641174316406, "p90": 205.71159973144538, "max": 348.30926513671875, "pos_frac": 0.765625, "sample": [45.11476135253906, 75.19837188720703, 36.5914306640625, -7.931755065917969, -2.297191619873047, 43.58564758300781, 0.024593353271484375, 136.25914001464844, 117.38591003417969, 9.360122680664062, -18.613525390625, -41.697601318359375, 15.95869255065918, -3.8788375854492188, 82.7706298828125, 263.76397705078125, 69.49715423583984, 56.91330337524414, 67.85598754882812, 189.95034790039062, 114.61184692382812, 83.29229736328125, 81.94723510742188, -53.48011016845703, 51.410606384277344, 26.286582946777344, 348.30926513671875, 28.62944793701172, 157.58349609375, 212.21881103515625, 64.96754455566406, 20.389211654663086, -63.3280029296875, 111.89086151123047, 103.74281311035156, -19.152633666992188, 119.53844451904297, 9.30267333984375, -2.1822376251220703, 232.09628295898438, 65.43788146972656, 190.52810668945312, -28.208248138427734, 39.897727966308594, -8.813194274902344, 16.16824722290039, 235.13363647460938, 116.24940490722656, -46.75178527832031, -2.829833984375, 93.33280181884766, 48.70221710205078, 129.20155334472656, -68.50390625, -184.03781127929688, 330.30218505859375, 35.771202087402344, 5.300439834594727, 93.69256591796875, 19.88113021850586, 270.0771179199219, 59.84516906738281, 56.28541946411133, 43.0914306640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000173.npy"}
{"epoch": 0.2540381791483113, "step": 174, "batch_size": 64, "mean": 79.48760986328125, "std": 119.0953598022461, "min": -164.3258514404297, "p10": -48.7787841796875, "median": 59.65779113769531, "p90": 257.36902923583995, "max": 461.0216979980469, "pos_frac": 0.765625, "sample": [21.364578247070312, -15.7557373046875, -49.28166961669922, -92.85350036621094, -2.8642120361328125, 7.253776550292969, -12.978748321533203, 9.900609970092773, 113.74384307861328, 200.60606384277344, 232.2491912841797, -0.5658035278320312, 66.54679107666016, 18.28502655029297, 59.013145446777344, -39.874183654785156, 125.11979675292969, 167.71958923339844, 143.6991729736328, 268.1346740722656, 54.76708984375, 33.1930046081543, 60.30243682861328, -16.329206466674805, 327.1883850097656, -84.05783081054688, 133.9375, 296.93603515625, 28.1910400390625, 93.58056640625, 11.439725875854492, -14.212108612060547, 62.486839294433594, -80.50129699707031, 137.7008056640625, -47.605384826660156, 162.70864868164062, 107.0009994506836, -125.78549194335938, 274.43487548828125, 313.49017333984375, 332.7065124511719, 32.94176483154297, 125.58355712890625, 15.940774917602539, -164.3258514404297, 3.609567642211914, 129.9034423828125, 122.13652038574219, 140.1182861328125, 83.15013122558594, 48.05348205566406, 22.822471618652344, 140.23919677734375, -92.64582061767578, 187.7132110595703, 148.91770935058594, 96.4306640625, 46.74121856689453, 13.198558807373047, 461.0216979980469, 119.36456298828125, 92.19581604003906, 33.060447692871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000174.npy"}
{"epoch": 0.2555066079295154, "step": 175, "batch_size": 64, "mean": 109.42562866210938, "std": 109.26695251464844, "min": -90.48745727539062, "p10": -9.744813346862792, "median": 86.40789413452148, "p90": 243.76709289550783, "max": 436.1126708984375, "pos_frac": 0.84375, "sample": [22.33150291442871, 37.329010009765625, 58.883323669433594, -90.48745727539062, 86.45988464355469, 165.57101440429688, 229.40179443359375, 86.35590362548828, 23.35663604736328, 147.22447204589844, 309.42681884765625, -69.95317077636719, -10.454559326171875, 311.8235778808594, 114.42219543457031, 31.185272216796875, 214.8741455078125, 232.2630157470703, 235.6063232421875, 156.35025024414062, 185.78512573242188, -65.7910385131836, 195.5602264404297, 436.1126708984375, 42.50151062011719, 135.88461303710938, 317.76519775390625, 47.378684997558594, 139.40182495117188, 81.32405853271484, -11.152885437011719, 55.90423583984375, 308.2157897949219, 19.02200698852539, 8.542655944824219, 35.45262908935547, -2.7260475158691406, 263.0758361816406, 8.004400253295898, 82.6883773803711, 129.23072814941406, 117.89404296875, 123.66313934326172, 208.31967163085938, -0.45645713806152344, 220.47817993164062, 238.00091552734375, 188.7317657470703, -26.617969512939453, 107.9677963256836, 177.41131591796875, 91.18515014648438, 86.26856994628906, 154.85574340820312, 41.649444580078125, 70.31889343261719, 144.51039123535156, -8.088739395141602, 43.531700134277344, 246.23831176757812, -53.054054260253906, 68.63007354736328, 0.2984466552734375, 57.353424072265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000175.npy"}
{"epoch": 0.25697503671071953, "step": 176, "batch_size": 64, "mean": 91.84109497070312, "std": 120.14725494384766, "min": -64.30500793457031, "p10": -32.21166458129883, "median": 64.30959129333496, "p90": 227.912223815918, "max": 499.69915771484375, "pos_frac": 0.78125, "sample": [14.668914794921875, 28.336196899414062, 100.04417419433594, 222.7571258544922, 62.1881217956543, 29.352558135986328, 12.127899169921875, 330.4628601074219, 17.795364379882812, -27.7113037109375, -31.699134826660156, 476.4566955566406, 17.450632095336914, -64.30500793457031, 230.50494384765625, 69.9774169921875, 31.651275634765625, 217.7361297607422, 177.89068603515625, -3.5072479248046875, -9.094314575195312, 71.96814727783203, 143.18101501464844, -47.54920959472656, 230.12155151367188, 2.274026870727539, 29.869043350219727, 197.68663024902344, 182.7978515625, 7.188941955566406, 119.68049621582031, -62.210906982421875, 152.07009887695312, -47.52818298339844, 37.532470703125, 169.8167724609375, -46.09184646606445, 79.9388198852539, 181.36361694335938, 134.9381866455078, 191.83731079101562, 0.4934520721435547, -44.17301940917969, -2.714130401611328, 148.66961669921875, 139.6537322998047, 132.00491333007812, 69.0013198852539, 136.18795776367188, 288.0315246582031, 34.64685821533203, -32.43132019042969, 20.997482299804688, 17.37147331237793, 138.1483612060547, 66.43106079101562, -18.411392211914062, 15.400203704833984, 134.27101135253906, -0.23562240600585938, 164.8306121826172, 499.69915771484375, 16.822601318359375, 321.16583251953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000176.npy"}
{"epoch": 0.25844346549192365, "step": 177, "batch_size": 64, "mean": 90.87483215332031, "std": 120.2770767211914, "min": -109.20533752441406, "p10": -25.93174209594726, "median": 63.101768493652344, "p90": 252.2091842651368, "max": 581.9981689453125, "pos_frac": 0.75, "sample": [167.57162475585938, 47.427154541015625, 235.1686553955078, 125.0119400024414, 66.58100891113281, 127.68229675292969, -109.20533752441406, 92.07853698730469, 191.71270751953125, 170.091064453125, 259.51226806640625, 43.303504943847656, 89.01506042480469, 264.5694580078125, 129.8927001953125, 78.82460021972656, 21.16693878173828, 364.2300720214844, 29.676780700683594, 178.4788818359375, 59.622528076171875, 175.81126403808594, -76.05218505859375, 150.1978759765625, -3.3647899627685547, 43.678466796875, 203.26470947265625, -22.31195831298828, -2.91461181640625, 1.630950927734375, 55.427574157714844, 289.83843994140625, 103.19046020507812, -19.469207763671875, 11.788787841796875, 83.16752624511719, -19.970489501953125, 260.604736328125, 0.2106037139892578, -30.199003219604492, -83.82044982910156, 159.12213134765625, -4.711330413818359, 51.30829620361328, -83.03480529785156, 156.35325622558594, 130.8352508544922, 15.99947738647461, -8.274116516113281, -5.14996337890625, 203.3424530029297, 9.141969680786133, 84.39218139648438, -13.901744842529297, 185.23468017578125, 38.062313079833984, 37.91639709472656, -28.72919273376465, 163.50625610351562, 46.97553253173828, 581.9981689453125, 289.506103515625, 80.45809173583984, -27.483078002929688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000177.npy"}
{"epoch": 0.2599118942731278, "step": 178, "batch_size": 64, "mean": 114.88732147216797, "std": 143.4801788330078, "min": -147.6109619140625, "p10": -35.21310348510741, "median": 93.71886825561523, "p90": 321.5360229492189, "max": 559.1182861328125, "pos_frac": 0.796875, "sample": [49.2655143737793, 23.514617919921875, 158.10269165039062, -9.164093017578125, 139.56536865234375, 93.37212371826172, 112.6077651977539, 38.640769958496094, 18.302745819091797, 297.38739013671875, 0.13994598388671875, 110.61434936523438, 279.79241943359375, 110.7291259765625, 50.694183349609375, 162.57086181640625, 405.7373046875, 116.96560668945312, 559.1182861328125, -56.78218078613281, 210.34410095214844, 60.858299255371094, 13.483245849609375, 68.3922348022461, -10.113100051879883, 182.05555725097656, 381.4115295410156, -51.26287078857422, 53.8096923828125, 0.7902908325195312, -75.72508239746094, 94.06561279296875, 223.72540283203125, 23.606124877929688, 127.12963104248047, 192.1561737060547, 135.9356689453125, -96.49232482910156, 68.6381607055664, 268.6238708496094, 32.3109245300293, 117.778076171875, -147.6109619140625, 445.1012878417969, -10.193199157714844, 159.53317260742188, 117.12025451660156, 161.7412109375, -39.44965362548828, 455.7757263183594, 34.24114990234375, 142.4251708984375, 351.1546325683594, -4.089227676391602, 21.3603515625, 221.90220642089844, 93.02197265625, -25.32781982421875, 64.93873596191406, 221.46566772460938, 331.88543701171875, -90.82954406738281, 178.9384765625, -13.01287841796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000178.npy"}
{"epoch": 0.26138032305433184, "step": 179, "batch_size": 64, "mean": 81.46770477294922, "std": 141.28077697753906, "min": -272.66778564453125, "p10": -82.86427307128905, "median": 77.42520523071289, "p90": 302.96589355468757, "max": 385.9840393066406, "pos_frac": 0.6875, "sample": [120.25371551513672, 162.09130859375, 181.32681274414062, 385.9840393066406, 63.69664764404297, -94.83082580566406, 185.1058807373047, 24.727489471435547, -11.242843627929688, 14.915075302124023, 100.76564025878906, 59.90873718261719, 49.09803771972656, 92.67662811279297, -22.12848663330078, 102.39552307128906, -75.33480834960938, 312.0858459472656, -55.11982345581055, -51.77105712890625, 77.15879821777344, -86.0911865234375, 361.1108703613281, 22.95293617248535, 230.35939025878906, 126.3782958984375, 163.1620635986328, 313.79107666015625, 169.64796447753906, 30.9879150390625, 82.33650207519531, 283.5009460449219, 344.36077880859375, 97.16519165039062, 77.15926361083984, 130.5341796875, -38.220947265625, -27.533069610595703, -0.7141494750976562, 86.1415023803711, 85.5258560180664, -35.545372009277344, 77.69114685058594, -93.67530822753906, 130.13189697265625, 345.1254577636719, -96.46349334716797, 251.8174591064453, 114.34737396240234, -133.26116943359375, -52.81262969970703, 239.70741271972656, 33.46925354003906, 249.863037109375, 311.3080139160156, 216.7528533935547, -160.08245849609375, 164.76138305664062, -60.372901916503906, -73.48336791992188, -272.66778564453125, 16.153594970703125, 23.54474449157715, -56.693443298339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000179.npy"}
{"epoch": 0.26284875183553597, "step": 180, "batch_size": 64, "mean": 104.57914733886719, "std": 126.15655517578125, "min": -201.7624053955078, "p10": -28.3197919845581, "median": 87.63663101196289, "p90": 266.47852783203126, "max": 503.383056640625, "pos_frac": 0.828125, "sample": [98.84490203857422, 164.32369995117188, -74.60459899902344, 1.7825279235839844, -65.58895111083984, 503.383056640625, -24.55135726928711, 68.75520324707031, 104.33428955078125, 59.77769470214844, 7.4621429443359375, 234.527587890625, -201.7624053955078, 137.96957397460938, 185.49070739746094, 121.05511474609375, 85.81575012207031, 85.89289855957031, 105.9726791381836, 64.20988464355469, 169.28012084960938, -146.3801727294922, 69.86088562011719, 296.11163330078125, 75.48905181884766, 231.85797119140625, 158.29867553710938, 4.6359100341796875, 40.20457458496094, 54.64998245239258, 280.22052001953125, 3.8637847900390625, 256.8854675292969, 170.77691650390625, 79.00582122802734, -4.921285629272461, 75.6901626586914, 207.51638793945312, -23.011337280273438, 178.84268188476562, 87.3144760131836, 20.352943420410156, 252.28854370117188, 69.92730712890625, -130.11959838867188, 87.95878601074219, 1.3458251953125, 267.9324951171875, 236.9935760498047, 92.4801254272461, 139.07664489746094, 263.0859375, -29.93483543395996, 338.28546142578125, 94.02945709228516, -23.612762451171875, 194.21792602539062, 158.3773193359375, 28.709632873535156, -73.10138702392578, 268.8966369628906, 286.51678466796875, 29.16637420654297, 190.90969848632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000180.npy"}
{"epoch": 0.2643171806167401, "step": 181, "batch_size": 64, "mean": 64.92919921875, "std": 116.48666381835938, "min": -205.09262084960938, "p10": -57.13630599975586, "median": 45.409767150878906, "p90": 216.84354705810554, "max": 389.651611328125, "pos_frac": 0.765625, "sample": [130.88206481933594, -60.29564666748047, 15.298131942749023, 67.02491760253906, 112.5565414428711, -3.3814620971679688, 31.109607696533203, 39.594932556152344, 86.13037872314453, 121.67543029785156, -3.2736549377441406, 17.308061599731445, 5.378969192504883, 16.66657257080078, -155.13568115234375, 224.70486450195312, 116.23652648925781, -27.164093017578125, 76.29621887207031, -16.20230484008789, 29.431095123291016, -57.174888610839844, -17.690750122070312, 226.05897521972656, -80.69680786132812, 85.98391723632812, 131.38125610351562, 275.68524169921875, 64.93537902832031, 53.085296630859375, 131.09405517578125, 84.02144622802734, -204.61390686035156, 45.984107971191406, 23.814987182617188, 84.38644409179688, 13.163938522338867, 191.12376403808594, 137.86058044433594, -57.04627990722656, 16.719207763671875, -205.09262084960938, 224.66920471191406, -23.204486846923828, 353.74688720703125, 168.72679138183594, -158.41661071777344, 52.33698272705078, -32.281211853027344, 135.46112060546875, 198.58367919921875, 389.651611328125, 150.58486938476562, 36.4952507019043, 133.1604461669922, 6.736347198486328, 4.588489532470703, 1.1865158081054688, 293.12603759765625, 40.266117095947266, 30.009986877441406, 185.0833740234375, 152.29693603515625, 44.835426330566406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000181.npy"}
{"epoch": 0.2657856093979442, "step": 182, "batch_size": 64, "mean": 111.99694061279297, "std": 134.331787109375, "min": -242.88052368164062, "p10": -53.79853019714354, "median": 108.58872604370117, "p90": 273.9595672607422, "max": 380.5345153808594, "pos_frac": 0.84375, "sample": [293.32989501953125, 80.25300598144531, 324.05133056640625, 194.68698120117188, 88.05012512207031, 223.0124969482422, 241.26266479492188, 271.6553649902344, 350.064208984375, 72.626953125, 132.8455352783203, 48.77267837524414, 29.097986221313477, -17.94253921508789, 266.41082763671875, -242.88052368164062, 204.48663330078125, 66.21145629882812, 4.957927703857422, 48.58514404296875, 371.94720458984375, 1.1632862091064453, 257.6125793457031, 120.56913757324219, 153.07833862304688, -59.765281677246094, 250.22027587890625, 97.13929748535156, 18.310672760009766, 71.06484985351562, 147.64639282226562, 254.31588745117188, -107.34807586669922, -119.8609848022461, 164.88375854492188, 237.08856201171875, 44.787200927734375, 150.85488891601562, 215.73883056640625, 227.69468688964844, -64.92350006103516, 77.3632583618164, 33.48332214355469, 127.84091186523438, 149.45359802246094, 178.42984008789062, 181.13348388671875, 120.03815460205078, -128.24957275390625, 6.638126373291016, 380.5345153808594, 144.3177490234375, 51.05604553222656, 5.605623245239258, -31.25762939453125, 6.500299453735352, -129.87173461914062, 74.9781265258789, 274.94708251953125, 15.985084533691406, 345.1695556640625, -39.8761100769043, 49.98692321777344, 161.8409423828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000182.npy"}
{"epoch": 0.26725403817914833, "step": 183, "batch_size": 64, "mean": 60.72479248046875, "std": 109.3719253540039, "min": -162.70738220214844, "p10": -77.47502822875977, "median": 58.51387596130371, "p90": 189.0671264648438, "max": 342.7542724609375, "pos_frac": 0.75, "sample": [14.163162231445312, 71.00765991210938, -5.507925033569336, 60.220542907714844, 13.557964324951172, 173.34178161621094, 152.5071563720703, 142.6646728515625, 46.9490966796875, 52.867095947265625, -78.19435119628906, 138.49334716796875, 61.33308792114258, 18.19151496887207, 28.628448486328125, 83.3087387084961, 339.4774169921875, 248.35150146484375, 142.13096618652344, 28.844762802124023, 68.8838882446289, 120.00038146972656, 300.69122314453125, 99.44305419921875, -9.562095642089844, 4.858959197998047, 78.88905334472656, 56.80720901489258, 0.946075439453125, -162.70738220214844, 171.06967163085938, 78.08122253417969, -54.618408203125, 13.354835510253906, 177.42877197265625, 62.235198974609375, 46.73927307128906, 95.7229995727539, 133.98040771484375, 227.780517578125, -129.5953369140625, -44.429222106933594, 342.7542724609375, -75.7966079711914, 29.741981506347656, -31.188232421875, 79.62989044189453, -98.7939453125, -108.87140655517578, -108.33941650390625, 160.23663330078125, 108.25221252441406, 201.19337463378906, 194.05499267578125, 87.97293090820312, 51.849632263183594, 27.066322326660156, -64.54717254638672, 137.28118896484375, 84.4029541015625, -153.22320556640625, -62.872066497802734, 19.540206909179688, -2.29473876953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000183.npy"}
{"epoch": 0.2687224669603524, "step": 184, "batch_size": 64, "mean": 64.11538696289062, "std": 111.73335266113281, "min": -155.009521484375, "p10": -48.047241592407225, "median": 33.33728790283203, "p90": 210.97258605957035, "max": 339.360107421875, "pos_frac": 0.734375, "sample": [29.62158203125, -108.7529525756836, 66.96404266357422, 78.05182647705078, 198.04762268066406, 186.321533203125, 10.445571899414062, 14.5850830078125, 156.57644653320312, -18.220359802246094, 313.2129821777344, 339.360107421875, 61.21385192871094, 214.9520263671875, -43.85851287841797, 191.73300170898438, -0.2216644287109375, 221.5567626953125, 98.6557846069336, 272.07781982421875, -25.712322235107422, 305.1702575683594, -89.45449829101562, 16.853118896484375, 42.532840728759766, 33.53015899658203, 42.54774475097656, 9.897560119628906, 89.76220703125, 3.295766830444336, 42.304931640625, 96.97454833984375, 145.10275268554688, 198.37477111816406, -89.74955749511719, 201.68722534179688, 15.21075439453125, -25.49810028076172, -38.44758224487305, 23.530628204345703, 70.76488494873047, -18.853607177734375, 30.35726547241211, 6.656335830688477, 15.388816833496094, 197.8193817138672, 69.84645080566406, 55.58481216430664, -53.24169921875, -131.7093505859375, -49.842411041259766, 27.620527267456055, -1.1857795715332031, 6.5552215576171875, -155.009521484375, 34.68019104003906, -35.99617004394531, 175.8530731201172, 33.14441680908203, 103.77957916259766, 10.766487121582031, -1.5858993530273438, 99.83961486816406, 331.9163513183594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000184.npy"}
{"epoch": 0.2701908957415565, "step": 185, "batch_size": 64, "mean": 50.61490249633789, "std": 120.40911102294922, "min": -192.69537353515625, "p10": -64.02278327941893, "median": 26.461185455322266, "p90": 199.8696685791016, "max": 424.2530212402344, "pos_frac": 0.640625, "sample": [-161.91232299804688, -67.57341003417969, -17.317827224731445, 86.20513916015625, 75.99507141113281, 17.833284378051758, -6.573137283325195, 77.97447204589844, 399.1315612792969, 2.5436477661132812, 16.328243255615234, 110.5897445678711, 106.12306213378906, -8.777732849121094, 252.83102416992188, 37.823577880859375, -11.006391525268555, 38.300132751464844, 136.8306427001953, -101.19668579101562, -10.101421356201172, 34.7949104309082, 21.817859649658203, 71.38829040527344, 78.85708618164062, -6.9322662353515625, -117.54539489746094, 21.873382568359375, 87.07062530517578, -133.91571044921875, 34.4224853515625, 20.793697357177734, 94.63394165039062, 89.20399475097656, 25.496826171875, 366.779541015625, -54.09711456298828, 33.736412048339844, 189.59335327148438, -43.5293083190918, 33.91062545776367, 5.26905632019043, -18.56000518798828, -8.485000610351562, -23.404245376586914, -3.357858657836914, 320.62530517578125, 138.29530334472656, 149.9291229248047, 0.3011035919189453, 424.2530212402344, -29.653602600097656, 27.42554473876953, 67.08351135253906, -16.33348846435547, 204.2738037109375, 35.7999267578125, 125.76190185546875, -192.69537353515625, 259.40679931640625, -75.87100982666016, 91.43122100830078, -8.807205200195312, -55.73798751831055], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000185.npy"}
{"epoch": 0.27165932452276065, "step": 186, "batch_size": 64, "mean": 95.4560317993164, "std": 116.65131378173828, "min": -190.415771484375, "p10": -22.139906311035155, "median": 74.6743278503418, "p90": 231.5572906494141, "max": 450.62042236328125, "pos_frac": 0.8125, "sample": [-190.415771484375, 128.7786102294922, 312.27349853515625, 150.2666015625, 145.48269653320312, -1.59747314453125, 75.82711791992188, 48.764522552490234, 207.76882934570312, 153.92723083496094, 65.95904541015625, 219.38955688476562, 1.6890945434570312, 81.21255493164062, 36.788761138916016, -19.81247329711914, 88.8565673828125, -115.78681945800781, 40.7735710144043, 41.986053466796875, 295.61126708984375, 43.66659927368164, -5.535575866699219, 172.71826171875, 41.148948669433594, 72.88615417480469, 98.03573608398438, 143.89401245117188, 315.91131591796875, 107.09835815429688, 178.32772827148438, -25.016626358032227, 154.303955078125, 66.34648132324219, 30.062177658081055, 124.95954895019531, 388.40789794921875, 236.77203369140625, 48.67390823364258, 2.8561363220214844, 156.03680419921875, 28.097394943237305, 73.52153778076172, 62.71746826171875, -63.44647216796875, 393.5738525390625, -21.200424194335938, 69.36734771728516, -22.54254150390625, -45.80027770996094, 450.62042236328125, 154.8907470703125, 68.31507873535156, 142.212158203125, 2.23095703125, 143.877685546875, 93.13914489746094, -26.249794006347656, 2.6298370361328125, 119.08729553222656, 145.59426879882812, 133.39889526367188, -7.286285400390625, 93.14035034179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000186.npy"}
{"epoch": 0.27312775330396477, "step": 187, "batch_size": 64, "mean": 79.94140625, "std": 107.06732177734375, "min": -149.99734497070312, "p10": -37.361049270629884, "median": 62.117462158203125, "p90": 214.85820922851565, "max": 388.050048828125, "pos_frac": 0.78125, "sample": [-149.99734497070312, 205.84686279296875, -20.929906845092773, 59.45492935180664, -79.84890747070312, 227.45147705078125, 83.83729553222656, 162.24310302734375, 54.569854736328125, 40.365726470947266, 74.54664611816406, 67.65164947509766, 334.85821533203125, 79.61650848388672, 124.06935119628906, 211.04815673828125, 161.99746704101562, 66.78236389160156, 117.60392761230469, -46.64931869506836, 55.75440216064453, 35.54100799560547, 388.050048828125, -37.987701416015625, 151.71116638183594, 57.75205993652344, 239.91473388671875, -34.27097702026367, 188.81175231933594, 41.47349548339844, -49.183441162109375, 86.4629135131836, 18.31884765625, -60.44500732421875, -35.898860931396484, 147.6285400390625, -35.40904998779297, 3.013212203979492, -8.029319763183594, 209.13580322265625, 148.4507598876953, 347.21258544921875, 97.23143768310547, 45.97815704345703, 63.94842529296875, 37.317649841308594, 40.22310256958008, 45.52659606933594, -125.74922943115234, 40.41803741455078, 104.3748550415039, -10.296073913574219, 156.90773010253906, 92.84622192382812, 76.37952423095703, 195.1916961669922, 89.80725860595703, 0.9036178588867188, 60.2864990234375, 227.73670959472656, 216.4910888671875, 36.757102966308594, -24.73000144958496, 16.174705505371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000187.npy"}
{"epoch": 0.2745961820851689, "step": 188, "batch_size": 64, "mean": 98.32379150390625, "std": 120.28564453125, "min": -119.6957778930664, "p10": -16.91444931030273, "median": 71.38194274902344, "p90": 285.94708251953125, "max": 499.34722900390625, "pos_frac": 0.796875, "sample": [308.90313720703125, 123.31417083740234, 242.12684631347656, 70.88313293457031, 71.06078338623047, -3.0700836181640625, -31.217178344726562, -111.80365753173828, 293.92620849609375, 79.77482604980469, 52.0694580078125, -29.349082946777344, 118.5275650024414, 77.1514892578125, 286.96221923828125, 253.09877014160156, 90.7980728149414, -23.237403869628906, 53.56404113769531, 7.432947158813477, -1.8438568115234375, 28.742347717285156, 41.670989990234375, 141.70346069335938, 70.4390640258789, 25.596580505371094, 15.606353759765625, -5.338748931884766, 319.4116516113281, 49.482025146484375, 368.79998779296875, -119.6957778930664, 283.57843017578125, 107.49839782714844, 156.66079711914062, 118.51773071289062, 65.51632690429688, 188.2677001953125, -10.12158203125, 188.25726318359375, 167.44483947753906, 25.067092895507812, -86.95040893554688, -18.999181747436523, 190.7163848876953, 39.982643127441406, 217.03091430664062, 31.03976058959961, 5.9429779052734375, 332.994873046875, 90.74385833740234, 167.63720703125, 56.38624954223633, -11.364383697509766, 165.86813354492188, 81.71516418457031, 102.80866241455078, 499.34722900390625, 94.90118408203125, 71.7031021118164, 76.401123046875, -12.050073623657227, 23.14276885986328, 17.54669761657715], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000188.npy"}
{"epoch": 0.27606461086637296, "step": 189, "batch_size": 64, "mean": 79.6611328125, "std": 133.67152404785156, "min": -169.3841552734375, "p10": -65.78843688964844, "median": 60.62401580810547, "p90": 196.64479980468752, "max": 566.9457397460938, "pos_frac": 0.78125, "sample": [40.35785675048828, 175.57749938964844, -125.38148498535156, 344.5438232421875, -67.56684875488281, -61.63880920410156, 45.201438903808594, 6.528373718261719, 2.356222152709961, 34.4205322265625, 151.90162658691406, 148.9072265625, 148.46163940429688, -11.026336669921875, 162.92715454101562, 45.866058349609375, 189.5015869140625, 45.43012237548828, -169.3841552734375, 6.097267150878906, -82.2009048461914, 25.929170608520508, 81.34687805175781, 177.29925537109375, 142.22901916503906, 83.88600158691406, -43.94932556152344, 57.43368148803711, 36.95906448364258, 182.80517578125, 4.876789093017578, 20.518789291381836, 94.54798889160156, 482.15234375, 60.972320556640625, -25.90544891357422, 22.69525146484375, 213.8051300048828, 85.83155822753906, 10.997611999511719, -166.7821502685547, 123.25841522216797, -57.68292236328125, 122.96092987060547, 62.5509147644043, 331.55621337890625, 176.1103973388672, 566.9457397460938, -21.50792694091797, 123.3465576171875, 172.04867553710938, 30.566024780273438, 80.2962646484375, 151.35389709472656, 110.76641845703125, 199.7061767578125, 255.13351440429688, 94.95535278320312, -3.0740432739257812, -169.04595947265625, 60.27571105957031, -81.15860748291016, 27.46612548828125, 162.9556884765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000189.npy"}
{"epoch": 0.2775330396475771, "step": 190, "batch_size": 64, "mean": 95.53968811035156, "std": 140.0072479248047, "min": -214.4184112548828, "p10": -64.4300651550293, "median": 65.91733169555664, "p90": 301.8294555664063, "max": 422.9517517089844, "pos_frac": 0.796875, "sample": [-3.785552978515625, 112.06848907470703, 189.53005981445312, 163.58596801757812, 17.14914321899414, -88.25518035888672, 249.46408081054688, 44.541419982910156, 12.372480392456055, 363.40960693359375, 82.95599365234375, 64.59896087646484, 137.83042907714844, 21.24591827392578, 367.38458251953125, 145.24447631835938, 110.19837951660156, 78.95747375488281, 195.71536254882812, 23.388198852539062, 3.5635547637939453, 265.39984130859375, -214.4184112548828, 64.11595916748047, 272.115478515625, -20.579376220703125, 61.65740966796875, 328.4510498046875, 154.77781677246094, 54.19038391113281, -16.370407104492188, 303.62890625, 20.78522491455078, -3.2158069610595703, 138.59158325195312, 74.88728332519531, -95.58951568603516, -65.63909912109375, -151.00909423828125, 14.691970825195312, 109.26773834228516, -61.608985900878906, 110.23400115966797, 53.09571838378906, -27.890344619750977, -92.39080810546875, 103.95149230957031, 297.6307373046875, 50.346160888671875, 54.38562774658203, -190.5675811767578, 386.1904296875, 124.92135620117188, 23.3666934967041, 208.17845153808594, 105.6469497680664, 67.23570251464844, 385.90521240234375, 4.88878059387207, 422.9517517089844, 203.5194091796875, 223.12098693847656, 33.69071578979492, 40.834808349609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000190.npy"}
{"epoch": 0.2790014684287812, "step": 191, "batch_size": 64, "mean": 137.9830322265625, "std": 152.693603515625, "min": -146.19528198242188, "p10": -10.3233959197998, "median": 114.20819091796875, "p90": 326.99763183593757, "max": 572.3182373046875, "pos_frac": 0.859375, "sample": [488.5576477050781, 109.07638549804688, 572.3182373046875, 30.541702270507812, 298.937744140625, 71.99418640136719, 143.17601013183594, 268.32403564453125, 15.922346115112305, 361.43865966796875, 101.7724609375, 171.07168579101562, -11.949417114257812, 271.7674865722656, 293.70257568359375, 31.660316467285156, 36.23535919189453, 81.01471710205078, 27.899063110351562, 69.5641860961914, 261.2974853515625, 53.278900146484375, 43.1488037109375, 294.414306640625, 125.47022247314453, 8.951217651367188, 8.603309631347656, 543.2888793945312, 188.1042938232422, -21.317298889160156, 228.67803955078125, -75.64543151855469, 17.39134979248047, -48.512454986572266, 142.19464111328125, 176.380126953125, 182.15272521972656, 147.8889923095703, 333.16656494140625, 31.402128219604492, 8.30459976196289, 251.61056518554688, 225.1480712890625, -146.19528198242188, 67.46253967285156, 508.2746887207031, 36.374412536621094, -65.12306213378906, 119.33999633789062, 95.49516296386719, 184.14598083496094, 312.60345458984375, 36.084007263183594, -6.529346466064453, 124.3748779296875, 173.9681854248047, -73.33721160888672, 121.12127685546875, -3.5466766357421875, 126.53583526611328, 363.2738037109375, 34.59343719482422, 88.31802368164062, 175.25494384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000191.npy"}
{"epoch": 0.28046989720998533, "step": 192, "batch_size": 64, "mean": 91.70048522949219, "std": 148.4692840576172, "min": -260.8277893066406, "p10": -50.542318725585936, "median": 67.80596542358398, "p90": 316.1694702148438, "max": 548.2846069335938, "pos_frac": 0.734375, "sample": [-49.702491760253906, -144.0408935546875, 41.803977966308594, 173.09434509277344, 129.52383422851562, -37.592979431152344, -50.902244567871094, -2.0516223907470703, 32.80694580078125, -260.8277893066406, 25.826202392578125, 97.07647705078125, -36.3262939453125, 230.14820861816406, 18.216659545898438, 39.17643737792969, -71.57546997070312, 101.11634063720703, -43.137542724609375, 174.55357360839844, -119.1482925415039, 418.8036193847656, -48.587894439697266, 76.92755889892578, -16.140945434570312, 321.49560546875, 358.28631591796875, 377.3251647949219, 153.62814331054688, 356.833251953125, 95.77035522460938, 77.31504821777344, 548.2846069335938, 119.6451416015625, 181.3440704345703, 28.651643753051758, 117.67234802246094, 216.0067596435547, 273.0760803222656, 60.84670639038086, 47.71647644042969, 303.7418212890625, 7.380075454711914, 40.43788146972656, 133.75413513183594, 59.946754455566406, 59.494712829589844, -172.0224151611328, 196.36209106445312, 70.82486724853516, 82.68022918701172, -9.237579345703125, -44.506736755371094, -67.62775421142578, 16.764495849609375, 229.29937744140625, 71.47702026367188, -33.94688415527344, 38.208099365234375, 207.79238891601562, 140.80661010742188, 121.96989440917969, 64.78706359863281, 337.5074768066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000192.npy"}
{"epoch": 0.28193832599118945, "step": 193, "batch_size": 64, "mean": 52.65251159667969, "std": 141.3990020751953, "min": -413.68243408203125, "p10": -87.26598739624022, "median": 52.867305755615234, "p90": 228.6952011108399, "max": 369.922119140625, "pos_frac": 0.65625, "sample": [115.22686767578125, -26.95525360107422, 29.480918884277344, 186.73541259765625, -411.18603515625, 96.04673767089844, 63.096763610839844, 52.21155548095703, -413.68243408203125, 301.2044372558594, 64.94950103759766, -11.399175643920898, 199.019287109375, -34.3800163269043, -24.520889282226562, 94.03555297851562, 105.09532928466797, 126.3165283203125, -2.2920303344726562, 232.1187744140625, 2.966062545776367, -20.38568878173828, 13.76430892944336, 151.345947265625, 27.28511619567871, 134.45925903320312, -58.281490325927734, -44.85358428955078, 3.6917877197265625, -59.13874816894531, -26.088396072387695, 220.7068634033203, -98.22467041015625, -94.6559829711914, 242.36508178710938, -125.40914154052734, 179.478759765625, -92.86284637451172, 19.062501907348633, 53.52305603027344, 10.153289794921875, 139.73162841796875, 272.77880859375, 163.28082275390625, 116.6553955078125, -135.31370544433594, 315.74530029296875, 81.4584732055664, 125.55973052978516, 124.17110443115234, -64.07080078125, 96.64480590820312, -1.4830131530761719, -74.20664978027344, -47.70213317871094, 143.02044677734375, 369.922119140625, 127.77665710449219, -66.82020568847656, 21.338699340820312, 282.173828125, 76.99785614013672, 14.96466064453125, 107.11365509033203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000193.npy"}
{"epoch": 0.2834067547723935, "step": 194, "batch_size": 64, "mean": 86.26107788085938, "std": 99.67212677001953, "min": -103.73922729492188, "p10": -49.88416595458984, "median": 68.35005187988281, "p90": 242.6201629638672, "max": 305.6581726074219, "pos_frac": 0.8125, "sample": [138.03121948242188, 21.336936950683594, 32.37614440917969, 11.35888671875, 120.37139892578125, 246.72567749023438, 103.61843872070312, 165.07476806640625, -18.271011352539062, 19.930030822753906, 29.90403938293457, 246.84182739257812, -67.65086364746094, -58.06719970703125, 43.100074768066406, 62.55790710449219, 212.36605834960938, 55.47406005859375, 70.20050048828125, 129.92520141601562, 35.55289077758789, 81.76053619384766, 12.755069732666016, 7.054973602294922, 118.3114242553711, -79.83699035644531, -4.191095352172852, 226.29013061523438, 63.565773010253906, 79.55570983886719, 246.1024932861328, 131.8894805908203, 239.70228576660156, 173.09896850585938, 25.12602996826172, 282.72613525390625, 62.93989562988281, 26.305707931518555, 143.6127471923828, 46.3291015625, 189.85789489746094, -75.250244140625, 45.141632080078125, -15.238533020019531, 104.21980285644531, 164.38876342773438, -2.3935508728027344, -103.73922729492188, 37.32182312011719, -53.03656005859375, 109.89227294921875, -64.2438735961914, 63.80525207519531, 243.8706817626953, 74.99534606933594, 305.6581726074219, 133.0102081298828, 66.49960327148438, 286.6011962890625, 93.45247650146484, 175.094970703125, 183.6859130859375, 115.78829956054688, -42.52857971191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000194.npy"}
{"epoch": 0.28487518355359764, "step": 195, "batch_size": 64, "mean": 93.17719268798828, "std": 118.43743896484375, "min": -127.9245834350586, "p10": -49.713039779663085, "median": 76.71570205688477, "p90": 246.85775756835943, "max": 399.26861572265625, "pos_frac": 0.78125, "sample": [82.37439727783203, -23.65960693359375, 222.9453582763672, 42.67995834350586, 71.35479736328125, 399.26861572265625, 68.59284210205078, 328.31060791015625, 214.300537109375, -7.9368133544921875, -4.261871337890625, 47.47523498535156, -6.260414123535156, -99.59117126464844, 258.38726806640625, 113.74983215332031, 50.54437255859375, 226.2075958251953, -42.95024871826172, 329.30023193359375, -49.741207122802734, 106.39717864990234, -101.66058349609375, 37.348297119140625, 96.53948974609375, 106.19959259033203, 233.25125122070312, 299.3463134765625, 152.53819274902344, 128.8384246826172, 148.00942993164062, -80.74251556396484, 76.29264831542969, 186.09249877929688, 1.207061767578125, 152.18199157714844, -30.626028060913086, 11.724327087402344, 166.89279174804688, 206.5001678466797, -127.9245834350586, 252.68911743164062, 43.53482437133789, 73.27291107177734, 389.3971862792969, 141.2830810546875, 77.13875579833984, 58.15618896484375, -52.753501892089844, 10.235937118530273, 159.79794311523438, -69.70980834960938, 221.68524169921875, 107.52577209472656, 24.977828979492188, 80.827880859375, 29.62622833251953, 54.39667510986328, 17.664915084838867, 84.11314392089844, -49.647315979003906, 55.18954849243164, 151.46595764160156, 112.97561645507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000195.npy"}
{"epoch": 0.28634361233480177, "step": 196, "batch_size": 64, "mean": 105.90283203125, "std": 143.84640502929688, "min": -223.51126098632812, "p10": -56.199887084960935, "median": 79.71743392944336, "p90": 309.5011413574219, "max": 451.0350341796875, "pos_frac": 0.78125, "sample": [189.772705078125, 202.3842010498047, 311.91064453125, 215.86778259277344, -57.917091369628906, 285.6785888671875, 303.87896728515625, 434.3759765625, 232.32186889648438, 69.14894104003906, 8.71109390258789, 51.87787628173828, 283.83868408203125, 194.5666961669922, 49.044708251953125, 10.719423294067383, -13.88290786743164, 88.39756774902344, -8.404190063476562, 45.944244384765625, 436.44097900390625, -16.45022201538086, 61.67677307128906, 36.18263626098633, 128.7584991455078, 128.552001953125, 124.79638671875, 139.16928100585938, 46.397552490234375, -11.483112335205078, 114.61679077148438, 77.53973388671875, 434.24945068359375, 451.0350341796875, -18.67572784423828, 181.57020568847656, 84.46211242675781, 59.97224426269531, 109.36207580566406, 83.31765747070312, 63.084434509277344, 56.367889404296875, 219.007568359375, -2.3175601959228516, 132.74917602539062, 30.57944107055664, 81.89513397216797, -157.10968017578125, 66.3693618774414, -223.51126098632812, -67.5281982421875, 156.61764526367188, 90.173095703125, -62.83774185180664, 318.5107421875, -52.193077087402344, -133.27850341796875, 66.19371032714844, 0.736236572265625, 128.61398315429688, 263.1473083496094, 322.230712890625, -75.3564453125, 5.911455154418945], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000196.npy"}
{"epoch": 0.2878120411160059, "step": 197, "batch_size": 64, "mean": 81.5740737915039, "std": 113.25190734863281, "min": -117.93977355957031, "p10": -38.90371398925781, "median": 52.21286392211914, "p90": 235.64595489501954, "max": 401.7397155761719, "pos_frac": 0.796875, "sample": [59.071983337402344, 332.5087585449219, 137.44398498535156, -20.7607421875, 160.83139038085938, 7.0533599853515625, 19.073226928710938, -64.6995849609375, 64.02617645263672, 401.7397155761719, 354.4153137207031, 237.5862274169922, 66.76575469970703, 123.76785278320312, -97.78943634033203, -0.7020034790039062, 128.22323608398438, 279.40814208984375, 27.82598114013672, 37.665470123291016, 103.54721069335938, 213.0468292236328, 42.38712692260742, -5.51641845703125, 58.02722930908203, 8.64605712890625, -103.49820709228516, 130.07333374023438, 75.70764923095703, 91.91349792480469, 259.183349609375, 144.8836212158203, 35.11604309082031, 231.11865234375, 2.829254150390625, 191.9445037841797, -41.291015625, 0.0666046142578125, -75.61239624023438, -117.93977355957031, 33.29755401611328, 119.61483001708984, -0.42635536193847656, 14.130912780761719, 26.72231101989746, 78.87226867675781, 309.69378662109375, 202.37918090820312, 26.181472778320312, 188.92495727539062, -12.254610061645508, 45.373226165771484, 89.21149444580078, 22.68677520751953, 19.05156707763672, 120.45052337646484, 169.23159790039062, 77.15380859375, 202.19699096679688, -33.333343505859375, -64.50146484375, 21.722312927246094, 19.874420166015625, 46.39849853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000197.npy"}
{"epoch": 0.28928046989721, "step": 198, "batch_size": 64, "mean": 103.49455261230469, "std": 142.75363159179688, "min": -234.0670166015625, "p10": -32.193214607238765, "median": 86.87192153930664, "p90": 229.39146575927737, "max": 590.58251953125, "pos_frac": 0.828125, "sample": [-120.88996124267578, 95.15122985839844, -34.03771209716797, 526.42138671875, 118.95263671875, 203.260498046875, 326.5648498535156, 55.051509857177734, 142.2070770263672, 48.064048767089844, 119.30078125, 163.70428466796875, -142.39804077148438, 6.894508361816406, 2.61395263671875, 231.6037139892578, 143.74444580078125, 119.77157592773438, 79.71826171875, 87.54479217529297, -27.889387130737305, -38.15623474121094, 34.82025909423828, 157.61256408691406, 11.738348007202148, 69.64600372314453, -73.20614624023438, 195.16134643554688, 86.19905090332031, 82.99617004394531, 90.40877532958984, 116.8322982788086, 138.464111328125, 72.96327209472656, 129.0074005126953, 14.16732406616211, 75.39399719238281, 149.47006225585938, 153.81814575195312, 224.22955322265625, -234.0670166015625, 532.65771484375, 95.51323699951172, 63.25476837158203, -3.1615676879882812, 12.958999633789062, 238.40292358398438, -39.545066833496094, 3.3646621704101562, 74.86094665527344, 36.368953704833984, 91.03408813476562, 85.13897705078125, 136.35911560058594, -6.681901931762695, 144.27745056152344, 152.64266967773438, 206.16656494140625, 11.94144058227539, 414.8182373046875, -4.9298095703125, 590.58251953125, 48.35340881347656, 136.41880798339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000198.npy"}
{"epoch": 0.2907488986784141, "step": 199, "batch_size": 64, "mean": 104.01869201660156, "std": 139.33583068847656, "min": -165.69708251953125, "p10": -55.58073844909667, "median": 86.64188385009766, "p90": 282.2914459228516, "max": 454.783447265625, "pos_frac": 0.78125, "sample": [205.46685791015625, 170.12962341308594, -134.09664916992188, -8.631866455078125, 272.6278381347656, 75.30664825439453, -60.62152099609375, 203.0434112548828, -164.46240234375, 114.83397674560547, -0.9009323120117188, -77.85214233398438, -165.69708251953125, 213.31593322753906, 205.82589721679688, 65.89447784423828, 14.109764099121094, 137.2200927734375, 384.0369873046875, 45.561119079589844, 87.07476806640625, 167.5951690673828, 375.20220947265625, 299.84674072265625, -93.01840209960938, 46.09864807128906, -36.509864807128906, 122.64983367919922, 208.68154907226562, 25.2308349609375, 35.09919738769531, 249.65664672851562, 454.783447265625, 79.93601989746094, 76.8988265991211, 197.50860595703125, -22.340087890625, 113.80888366699219, 33.919219970703125, 95.92762756347656, 63.07008361816406, 275.6991882324219, 135.23556518554688, 34.05379867553711, 285.11669921875, 123.4804458618164, 71.1168212890625, -41.23439025878906, -43.818912506103516, -164.1507110595703, 264.3172302246094, 80.97822570800781, 103.3712158203125, 86.20899963378906, 347.96820068359375, 177.2297821044922, 5.09703254699707, 92.73546600341797, 55.31016540527344, 88.48362731933594, 48.60826110839844, 152.79074096679688, 422.9185791015625, -20.549713134765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000199.npy"}
{"epoch": 0.2922173274596182, "step": 200, "batch_size": 64, "mean": 102.84646606445312, "std": 155.4116668701172, "min": -290.94134521484375, "p10": -45.29775428771971, "median": 84.64266586303711, "p90": 326.2521484375, "max": 531.9830322265625, "pos_frac": 0.78125, "sample": [231.9246826171875, 105.23777770996094, 59.67341232299805, 387.5226135253906, 65.29183959960938, 150.30398559570312, 15.5325927734375, -1.7749481201171875, 407.79681396484375, -92.30795288085938, 322.9635925292969, 239.98248291015625, 100.61031341552734, 3.1141319274902344, 85.23886108398438, 322.42498779296875, 216.3351287841797, 390.5396728515625, 5.4694976806640625, 327.6615295410156, 66.55996704101562, 0.46549034118652344, 161.09486389160156, 216.4155731201172, -25.61697769165039, 71.94158172607422, 182.75369262695312, -192.53509521484375, 531.9830322265625, 87.73102569580078, 103.51786804199219, -28.506595611572266, -5.1153411865234375, 73.9285888671875, 52.236663818359375, -28.21998405456543, -290.94134521484375, 5.001373291015625, 98.05519104003906, 350.0564270019531, 4.856351852416992, 89.9665756225586, 56.61650085449219, -12.062171936035156, 4.312564849853516, -158.95367431640625, 84.04647064208984, -52.49396514892578, 295.8688659667969, -27.356985092163086, 7.32305908203125, 414.57232666015625, 43.813568115234375, 27.67424774169922, 98.47581481933594, 253.73402404785156, 169.24758911132812, 159.14801025390625, 105.60479736328125, 167.42306518554688, -119.13604736328125, 140.27537536621094, -68.94148254394531, 123.81201934814453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000200.npy"}
{"epoch": 0.2936857562408223, "step": 201, "batch_size": 64, "mean": 101.25636291503906, "std": 143.0900115966797, "min": -281.8214416503906, "p10": -46.71069183349608, "median": 81.652587890625, "p90": 278.0167175292969, "max": 479.517822265625, "pos_frac": 0.8125, "sample": [252.13214111328125, 17.920286178588867, 212.99974060058594, -222.94656372070312, 48.598106384277344, -90.6611328125, 11.363895416259766, 14.739633560180664, -26.618881225585938, 85.60223388671875, 3.754425048828125, 27.126739501953125, 35.849212646484375, -22.068832397460938, 466.9206237792969, 106.66580963134766, 62.80314636230469, 99.34493255615234, 26.447242736816406, 232.94662475585938, 341.33331298828125, 129.29052734375, 10.442657470703125, 93.81657409667969, 219.16073608398438, 280.6267395019531, -78.57797241210938, 213.1425018310547, 191.05258178710938, -13.211698532104492, 73.45756530761719, 120.60137939453125, 126.69152069091797, 120.95077514648438, -32.821380615234375, -69.27418518066406, 176.160888671875, 71.6688003540039, 51.075645446777344, 479.517822265625, 104.4541015625, -22.37126922607422, 166.96484375, 77.70294189453125, 47.05467224121094, 166.18260192871094, 152.53146362304688, 245.18275451660156, 386.8250427246094, 76.56082153320312, 87.1095962524414, -68.43130493164062, 144.8993682861328, 71.2364501953125, -281.8214416503906, 295.05572509765625, 410.0599670410156, 139.64442443847656, 12.985834121704102, -52.66325378417969, 271.9266662597656, 50.20360565185547, 11.474853515625, 139.6148681640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000201.npy"}
{"epoch": 0.29515418502202645, "step": 202, "batch_size": 64, "mean": 71.93101501464844, "std": 142.24163818359375, "min": -211.66818237304688, "p10": -99.36132812499997, "median": 60.20570373535156, "p90": 256.66062622070314, "max": 526.6281127929688, "pos_frac": 0.703125, "sample": [526.6281127929688, -49.309329986572266, 106.27149200439453, 311.93505859375, 2.9747791290283203, -53.753868103027344, 212.54522705078125, -52.05113220214844, 123.39051818847656, 92.39949035644531, 49.89034652709961, -195.7517852783203, 173.23056030273438, 244.27850341796875, 208.02078247070312, 50.79832458496094, -199.77114868164062, 47.186614990234375, 103.8427963256836, 330.6583251953125, 113.5085678100586, 287.55609130859375, -133.1424560546875, 191.40582275390625, 24.269546508789062, 30.107131958007812, 401.897705078125, -17.169164657592773, 104.79502868652344, 139.42166137695312, 102.43626403808594, 124.25283813476562, 253.74014282226562, -26.722091674804688, -37.55438232421875, -115.255126953125, 80.14552307128906, 59.7470703125, 9.741950988769531, 225.84765625, 105.18539428710938, -8.33953857421875, 22.540103912353516, 262.6190490722656, 156.31246948242188, 111.77021789550781, 78.20578002929688, -211.66818237304688, 60.664337158203125, 94.00946044921875, 14.910770416259766, 43.78880310058594, -44.70585632324219, -69.3332748413086, 38.07171630859375, -110.83000183105469, 82.35084533691406, -72.60108947753906, -47.16397476196289, 53.570899963378906, 257.9122619628906, -134.71575927734375, 90.05972290039062, -21.473270416259766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000202.npy"}
{"epoch": 0.2966226138032305, "step": 203, "batch_size": 64, "mean": 88.74185180664062, "std": 122.0002670288086, "min": -183.9979248046875, "p10": -64.21826019287109, "median": 71.45519256591797, "p90": 253.94359436035163, "max": 447.37249755859375, "pos_frac": 0.796875, "sample": [18.271747589111328, 133.57167053222656, 160.84982299804688, 66.21399688720703, -28.046428680419922, 286.0398254394531, -73.96928405761719, 135.84573364257812, 28.27666473388672, 117.8863525390625, 1.9560775756835938, 104.36136627197266, 105.22808074951172, 164.81655883789062, -108.01438903808594, 185.3732452392578, 129.8240966796875, -24.460189819335938, 191.14895629882812, 144.4223175048828, 264.6802673339844, 412.63018798828125, -74.71954345703125, 115.25592041015625, 129.6944580078125, 38.996055603027344, 447.37249755859375, 13.917709350585938, -84.72935485839844, 143.5240936279297, 0.7860641479492188, 168.90298461914062, 192.94802856445312, 261.86566162109375, 31.212646484375, 177.00289916992188, 82.04227447509766, 105.3673324584961, 75.01214599609375, 123.81525421142578, 31.323867797851562, 62.98786163330078, 45.38671875, 235.45877075195312, -54.96100616455078, -183.9979248046875, -59.61054992675781, 67.89823913574219, -26.133264541625977, 9.803058624267578, 165.3621826171875, 13.708183288574219, 25.90578842163086, 47.00249481201172, -66.1929931640625, -76.72109985351562, 141.40621948242188, 59.12322998046875, 267.0324401855469, 327.2779846191406, -6.952180862426758, 230.55174255371094, 35.22985076904297, 23.415206909179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000203.npy"}
{"epoch": 0.29809104258443464, "step": 204, "batch_size": 64, "mean": 81.89997863769531, "std": 143.6380615234375, "min": -212.22216796875, "p10": -69.88979339599607, "median": 64.21564483642578, "p90": 304.6741271972658, "max": 475.9053955078125, "pos_frac": 0.703125, "sample": [219.25267028808594, 74.47735595703125, 43.474849700927734, 41.36686706542969, 121.7256851196289, -13.138229370117188, 221.09075927734375, -102.57477569580078, 91.04943084716797, 20.460474014282227, -41.56081771850586, 22.068082809448242, 14.201484680175781, 85.10277557373047, 55.700439453125, -38.029823303222656, -212.22216796875, 170.2845458984375, 51.553192138671875, 7.632097244262695, 262.82464599609375, -127.47061920166016, -27.838333129882812, -6.216327667236328, 70.86343383789062, -15.24893569946289, 71.90664672851562, 150.9446563720703, 152.5467071533203, -79.40974426269531, 397.67657470703125, -21.22077178955078, 62.4879150390625, 28.507299423217773, -46.15620422363281, 146.47915649414062, 49.25193786621094, 153.36868286132812, 428.247802734375, -199.50514221191406, 322.609619140625, 165.7071533203125, 163.25033569335938, 73.73530578613281, 94.84844970703125, -13.257570266723633, 42.10755157470703, 222.2102508544922, 332.1716613769531, -3.65057373046875, 84.2003173828125, 163.47317504882812, -121.43740844726562, 65.94337463378906, 475.9053955078125, 370.5435791015625, -29.071027755737305, 147.16139221191406, 23.597810745239258, 351.9154968261719, -119.53973388671875, 82.09140014648438, 110.80523681640625, -47.67657470703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000204.npy"}
{"epoch": 0.29955947136563876, "step": 205, "batch_size": 64, "mean": 108.63500213623047, "std": 140.55197143554688, "min": -252.134765625, "p10": -67.90677413940429, "median": 111.54692077636719, "p90": 275.973129272461, "max": 397.0291442871094, "pos_frac": 0.78125, "sample": [40.172950744628906, 133.660888671875, 221.05963134765625, 376.73529052734375, 114.91358184814453, 5.956827163696289, -8.789819717407227, 28.159423828125, 10.443649291992188, 228.08804321289062, 260.1221923828125, -89.07861328125, -99.65460205078125, 93.30618286132812, -15.099884033203125, 78.11692810058594, 16.869775772094727, 368.3751525878906, 232.07057189941406, 27.232601165771484, 102.90897369384766, 100.73199462890625, -140.55059814453125, 171.0452423095703, 304.6904296875, 242.01791381835938, 193.58694458007812, -2.8393592834472656, -67.0300064086914, 209.9935302734375, -44.89011764526367, 397.0291442871094, 127.61929321289062, 83.75403594970703, 37.786399841308594, 124.28410339355469, 391.0887145996094, 263.8450012207031, 120.62025451660156, 134.68711853027344, -207.78363037109375, 140.88002014160156, 120.28424835205078, 237.6821746826172, 150.62033081054688, -68.28253173828125, 146.4514923095703, 86.12165832519531, 281.1708984375, 80.75817108154297, 250.4630584716797, 196.13319396972656, 171.38327026367188, 71.12355041503906, 152.4199981689453, 350.2114562988281, 108.18025970458984, -12.413368225097656, 22.512222290039062, -252.134765625, -75.42219543457031, 219.170166015625, -31.273223876953125, 41.344207763671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000205.npy"}
{"epoch": 0.3010279001468429, "step": 206, "batch_size": 64, "mean": 117.12614440917969, "std": 127.1107177734375, "min": -239.77175903320312, "p10": -35.52505664825438, "median": 106.53827285766602, "p90": 290.371258544922, "max": 406.49346923828125, "pos_frac": 0.84375, "sample": [52.57080078125, 184.94659423828125, 86.17326354980469, 122.87895202636719, 406.49346923828125, 303.2294921875, 82.25331115722656, 50.995567321777344, 217.11412048339844, 258.67681884765625, 55.190887451171875, 132.93150329589844, 19.97505760192871, 106.21595001220703, -239.77175903320312, 67.55877685546875, 77.27410888671875, 162.4696502685547, 60.175148010253906, -19.674283981323242, 215.2197265625, -42.31824493408203, 234.487548828125, 351.1875305175781, 124.57766723632812, 192.54635620117188, 28.34101104736328, 202.57896423339844, -92.91252136230469, -107.14009857177734, 192.0812530517578, 310.482177734375, 211.76321411132812, 221.4788055419922, 4.919221878051758, 250.07424926757812, 74.43360137939453, -66.8997802734375, 158.80441284179688, -135.69947814941406, 48.03826141357422, 214.48513793945312, 11.717884063720703, 167.56842041015625, 86.09580993652344, -12.421127319335938, 118.0368881225586, 84.04656219482422, 191.6936798095703, 93.69924926757812, -60.953834533691406, 6.410453796386719, 101.58556365966797, 184.49893188476562, 355.0924072265625, 260.36871337890625, -4.034433364868164, 118.421630859375, 106.860595703125, 316.1591491699219, 66.36444854736328, 21.765037536621094, 308.9014892578125, 195.98928833007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000206.npy"}
{"epoch": 0.302496328928047, "step": 207, "batch_size": 64, "mean": 134.46798706054688, "std": 155.47544860839844, "min": -134.90017700195312, "p10": -28.983270263671873, "median": 89.51468658447266, "p90": 314.5174499511719, "max": 586.1400756835938, "pos_frac": 0.84375, "sample": [20.64529037475586, -134.90017700195312, 62.11328887939453, 98.21537780761719, 181.89547729492188, 46.83312225341797, 317.874267578125, -30.126731872558594, -15.913528442382812, 12.337387084960938, 89.88482666015625, 77.12053680419922, 299.4517517089844, -9.103912353515625, 133.91940307617188, 156.9066619873047, 53.95643615722656, 90.97453308105469, 34.784854888916016, 36.48834991455078, 50.84144973754883, 167.91680908203125, 29.44007110595703, 254.68186950683594, 28.106918334960938, -62.628326416015625, 306.68487548828125, 61.1783447265625, 31.27730369567871, -55.99091720581055, 282.0596618652344, 268.49957275390625, -57.77497100830078, 0.7751312255859375, 30.76410675048828, 260.58148193359375, 35.91157531738281, 230.7598114013672, 153.6181640625, 245.7086639404297, 261.1004333496094, 124.58509826660156, 109.73771667480469, 132.5054931640625, 262.81121826171875, 249.09567260742188, 84.91121673583984, 385.1063232421875, 505.5529479980469, 280.30999755859375, 46.686004638671875, 438.06207275390625, -48.100975036621094, 45.69001007080078, 586.1400756835938, 42.316162109375, 89.14454650878906, 99.17733764648438, 86.1826171875, 522.6964111328125, -84.45548248291016, 195.13400268554688, 432.1093444824219, -26.31519317626953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000207.npy"}
{"epoch": 0.3039647577092511, "step": 208, "batch_size": 64, "mean": 106.39715576171875, "std": 156.0309600830078, "min": -335.8766174316406, "p10": -41.075463104248044, "median": 77.94448852539062, "p90": 324.5382110595704, "max": 441.44024658203125, "pos_frac": 0.765625, "sample": [-208.6074676513672, 144.7502899169922, -153.2298583984375, 297.357177734375, 23.94806671142578, -20.347736358642578, 378.5699462890625, 30.95991325378418, 111.97469329833984, 249.5965576171875, 244.75318908691406, 55.489173889160156, 79.91171264648438, 166.4991912841797, 207.18612670898438, 59.227928161621094, 441.44024658203125, 19.629486083984375, 429.48272705078125, 137.31600952148438, -6.756378173828125, 32.01696014404297, 245.44314575195312, 174.54837036132812, 75.97726440429688, 27.85858917236328, 52.579345703125, -3.407684326171875, -119.54948425292969, 280.64764404296875, -142.5906982421875, -7.870700836181641, 49.28053665161133, 106.97516632080078, 138.91461181640625, 300.76788330078125, 279.4596252441406, 39.74522018432617, -12.338279724121094, 240.44522094726562, -41.980743408203125, 69.70906066894531, 26.07275390625, 340.4796142578125, 141.4678955078125, 337.63189697265625, 98.9030990600586, -335.8766174316406, -38.96314239501953, 142.81509399414062, 15.61566162109375, 126.21183776855469, 334.7254943847656, 99.87339782714844, 48.90306091308594, -132.27676391601562, 20.204132080078125, 380.1390075683594, 183.37374877929688, 289.11773681640625, 202.56622314453125, 70.494384765625, -1.0854339599609375, -16.757415771484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000208.npy"}
{"epoch": 0.3054331864904552, "step": 209, "batch_size": 64, "mean": 93.75180053710938, "std": 150.7190704345703, "min": -327.6315612792969, "p10": -83.88482818603515, "median": 90.0728988647461, "p90": 311.834701538086, "max": 427.9144287109375, "pos_frac": 0.71875, "sample": [334.42462158203125, 80.84768676757812, 175.1008758544922, -44.67296600341797, 366.00152587890625, 394.760009765625, 318.9383239746094, 112.45735168457031, 125.60291290283203, 102.67095947265625, -66.71406555175781, 94.56529998779297, 165.16860961914062, 98.53389739990234, 164.01632690429688, 142.09909057617188, 162.56227111816406, -163.25546264648438, -87.25576782226562, 57.930076599121094, 250.35540771484375, 224.89779663085938, -3.8870792388916016, -94.943359375, 50.226165771484375, -46.2999267578125, -13.931665420532227, 6.800010681152344, 102.95179748535156, 151.1984405517578, 116.85260009765625, 85.58049774169922, -123.08866119384766, -28.956607818603516, 60.969886779785156, 66.86407470703125, -76.01930236816406, -34.225215911865234, 374.8949279785156, 295.25958251953125, 49.723236083984375, 427.9144287109375, 9.816452026367188, -37.01775360107422, -26.24700927734375, 41.822425842285156, 51.62776184082031, 75.20115661621094, 256.40936279296875, 137.9072723388672, 141.86517333984375, -37.010826110839844, -127.02273559570312, -327.6315612792969, 180.82489013671875, 104.29367065429688, 31.61099624633789, 52.402374267578125, 133.44479370117188, -149.64041137695312, 343.00286865234375, 292.5667419433594, 208.0614013671875, 266.9099426269531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000209.npy"}
{"epoch": 0.3069016152716593, "step": 210, "batch_size": 64, "mean": 108.5168228149414, "std": 148.2569580078125, "min": -227.63223266601562, "p10": -30.519593811035154, "median": 82.30493927001953, "p90": 321.5768676757814, "max": 486.8726806640625, "pos_frac": 0.78125, "sample": [8.6988525390625, 15.524835586547852, 132.27294921875, 68.8065185546875, -32.01722717285156, -89.28440856933594, 186.55789184570312, -25.010149002075195, 141.71969604492188, -36.078102111816406, 193.9903564453125, 20.19879913330078, 31.941509246826172, 115.5357894897461, 271.64447021484375, 145.53829956054688, 0.9132537841796875, 73.94319152832031, 211.10202026367188, 15.152484893798828, 290.08074951171875, 90.66668701171875, 31.864686965942383, 232.6087646484375, 262.770263671875, 6.096029281616211, 15.951789855957031, 333.0264892578125, 189.24517822265625, 370.5184326171875, 126.29805755615234, 254.0996551513672, 113.06046295166016, 292.359130859375, -134.74769592285156, -25.59100341796875, -82.04290771484375, -26.748214721679688, 29.27855110168457, 11.296001434326172, 138.52186584472656, 165.82525634765625, 353.7724609375, 113.63375854492188, -6.263235092163086, -27.025115966796875, 437.6940002441406, -12.327743530273438, 128.84254455566406, 156.01333618164062, 276.26483154296875, 486.8726806640625, 294.861083984375, 48.066123962402344, -67.42282104492188, 93.87229919433594, -22.47655487060547, 0.6482582092285156, 11.182661056518555, 353.78582763671875, 5.72442626953125, 17.164512634277344, -227.63223266601562, 394.2364501953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000210.npy"}
{"epoch": 0.30837004405286345, "step": 211, "batch_size": 64, "mean": 92.89525604248047, "std": 165.66407775878906, "min": -225.66122436523438, "p10": -67.400138092041, "median": 62.42278861999512, "p90": 265.9051513671875, "max": 568.8287353515625, "pos_frac": 0.703125, "sample": [127.50043487548828, -59.36231994628906, 246.14154052734375, 98.4365463256836, 62.49691390991211, 164.43861389160156, -45.88714599609375, 112.6797866821289, 538.2449340820312, -225.66122436523438, -36.48863983154297, 58.69966125488281, 26.975658416748047, 11.9259033203125, 67.46075439453125, 233.78977966308594, 145.3170928955078, 209.37811279296875, 174.890380859375, -15.685256958007812, -0.9732894897460938, -47.783447265625, 207.63583374023438, 110.12832641601562, -70.84491729736328, -0.1981964111328125, -44.4459228515625, 513.904296875, 39.53920364379883, 568.8287353515625, 469.02716064453125, -46.26002502441406, -135.42800903320312, 143.16893005371094, 244.7734375, 318.67132568359375, -153.89254760742188, 142.16046142578125, 51.5548095703125, -14.673171997070312, 122.39949035644531, 409.395263671875, -34.14356231689453, 69.66670227050781, 10.468854904174805, 51.36393737792969, 72.74978637695312, -88.40556335449219, 14.839282989501953, 206.8876953125, -160.68104553222656, 62.348663330078125, 202.0836944580078, 39.46513366699219, 158.5477294921875, 268.880615234375, 93.7702407836914, 258.96240234375, 18.637496948242188, 131.6300811767578, 33.54970169067383, 16.17340087890625, -12.058090209960938, -191.4202880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000211.npy"}
{"epoch": 0.30983847283406757, "step": 212, "batch_size": 64, "mean": 132.0928955078125, "std": 171.62367248535156, "min": -259.5975341796875, "p10": -64.03234558105467, "median": 134.81912231445312, "p90": 321.32439575195315, "max": 573.6549072265625, "pos_frac": 0.75, "sample": [-259.5975341796875, -89.83760070800781, 321.8402404785156, 261.05670166015625, 53.03966522216797, 244.13436889648438, 112.86944580078125, -70.21427917480469, -38.23109436035156, -1.6778030395507812, 238.5898895263672, 182.41598510742188, -207.8190155029297, -38.73675537109375, -23.579021453857422, 6.357017517089844, 322.3768310546875, 213.92483520507812, 433.170166015625, 244.7359161376953, 115.04745483398438, -110.67900848388672, 31.821643829345703, 93.71479797363281, 148.42666625976562, 173.4327392578125, 224.75241088867188, -80.38389587402344, 320.1207580566406, -10.077554702758789, 129.78433227539062, 179.71749877929688, 573.6549072265625, 173.39857482910156, 53.286705017089844, -231.50172424316406, 316.70526123046875, -49.60783386230469, 132.09243774414062, 566.803955078125, 295.8910217285156, 122.48773193359375, 42.638641357421875, 231.0367431640625, 14.891021728515625, 144.24520874023438, -33.66055679321289, 58.3040657043457, 315.5078125, 48.47957229614258, 144.19015502929688, 365.37286376953125, 398.7359619140625, 262.68310546875, 298.2597351074219, -7.609165191650391, 231.80857849121094, 67.11045837402344, 151.5928955078125, 137.54580688476562, 68.3076171875, 304.54400634765625, -41.12371826171875, 177.3782958984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000212.npy"}
{"epoch": 0.31130690161527164, "step": 213, "batch_size": 64, "mean": 126.10995483398438, "std": 153.3463592529297, "min": -205.50222778320312, "p10": -59.96718139648437, "median": 112.95209884643555, "p90": 344.5236450195313, "max": 440.9504699707031, "pos_frac": 0.796875, "sample": [288.17413330078125, -91.4705810546875, 32.702781677246094, -108.65206146240234, 226.9689178466797, 18.0057373046875, 65.63103485107422, 100.48954772949219, -205.50222778320312, -84.52114868164062, 10.473073959350586, 304.82049560546875, 351.6200866699219, -33.302589416503906, 87.83919525146484, 178.80645751953125, 9.224679946899414, 67.65571594238281, 162.66729736328125, 321.48028564453125, 429.974609375, 312.3218994140625, 48.60558319091797, 412.11810302734375, 166.71888732910156, 278.5880126953125, 378.9471740722656, -16.410228729248047, -4.302894592285156, -15.695110321044922, 182.96078491210938, 125.4146499633789, 201.3338623046875, 125.98796844482422, -56.476837158203125, 15.81475830078125, 175.016357421875, 347.7771911621094, 277.0361328125, 147.628173828125, -8.238521575927734, 193.41380310058594, 336.9320373535156, -61.463043212890625, 203.0135040283203, 62.091705322265625, 169.34329223632812, 440.9504699707031, -138.56341552734375, 196.42166137695312, 258.7027587890625, 301.3243408203125, 349.8289794921875, 47.98768997192383, 9.811294555664062, 5.817584991455078, -127.22249603271484, 77.57522583007812, 96.96216583251953, 63.71723175048828, 23.32311248779297, 25.514419555664062, 165.0974884033203, 142.22596740722656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000213.npy"}
{"epoch": 0.31277533039647576, "step": 214, "batch_size": 64, "mean": 113.76599884033203, "std": 147.74057006835938, "min": -176.06768798828125, "p10": -50.10351638793945, "median": 90.65762329101562, "p90": 293.58701477050784, "max": 516.3482666015625, "pos_frac": 0.78125, "sample": [57.27643585205078, 410.5124206542969, 14.134746551513672, 199.75022888183594, -10.394132614135742, -12.644857406616211, -75.51884460449219, -176.06768798828125, 275.6064453125, -83.44461059570312, 174.19851684570312, 39.900001525878906, 109.50794219970703, -0.7567424774169922, 31.473953247070312, 102.35261535644531, 230.00088500976562, -27.470544815063477, 52.15150451660156, 189.89454650878906, 67.3169937133789, 63.331687927246094, 109.54619598388672, 274.620849609375, 244.60360717773438, 81.00303649902344, 17.31584930419922, 1.906869888305664, 123.404541015625, -45.18782043457031, 100.31221008300781, 46.354042053222656, 6.383110046386719, 340.1068420410156, -72.8541030883789, 516.3482666015625, 344.9813232421875, 7.9580535888671875, 26.2196044921875, 395.7003479003906, 214.0360870361328, 207.32894897460938, 262.16265869140625, 295.742431640625, -141.55946350097656, 510.94482421875, 25.426488876342773, 200.55450439453125, 219.03738403320312, 104.4903793334961, -50.93614959716797, 178.80592346191406, -42.98876190185547, 116.41339111328125, -48.16070556640625, 234.31063842773438, 77.23274230957031, 288.5577087402344, 65.5896987915039, 151.3194580078125, 131.0456085205078, 126.83280944824219, -55.77208709716797, 60.775020599365234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000214.npy"}
{"epoch": 0.3142437591776799, "step": 215, "batch_size": 64, "mean": 118.28256225585938, "std": 137.79710388183594, "min": -149.2458038330078, "p10": -62.97738494873047, "median": 104.36443710327148, "p90": 292.84925842285156, "max": 520.306396484375, "pos_frac": 0.828125, "sample": [67.91455078125, 328.4442443847656, 124.2568359375, 266.98822021484375, -63.336669921875, -95.79232788085938, 55.161781311035156, 121.19031524658203, 140.26036071777344, 97.63774871826172, 130.15147399902344, 44.424049377441406, 1.466684341430664, -30.556188583374023, 414.0084228515625, 257.470703125, 243.64215087890625, 288.8614501953125, 520.306396484375, 165.2356414794922, 103.69068908691406, 407.07440185546875, 110.01807403564453, -91.16679382324219, 73.83401489257812, 197.2139892578125, -68.08460998535156, 66.96826934814453, 192.51666259765625, 252.41726684570312, 23.22352409362793, 69.32170104980469, 50.843780517578125, -5.896902084350586, 158.2902374267578, 95.8266372680664, -121.9457778930664, 149.7149200439453, 196.91445922851562, 254.39410400390625, 351.01019287109375, 105.0381851196289, 195.86013793945312, -16.016952514648438, 127.73100280761719, 50.47125244140625, 48.567420959472656, -62.13905334472656, 43.20717239379883, 203.70132446289062, -149.2458038330078, 152.05816650390625, 388.2032470703125, 32.343360900878906, 82.69110107421875, 97.5428466796875, 294.5583190917969, 11.283004760742188, 205.67323303222656, 33.892799377441406, 106.36557006835938, 45.258758544921875, -85.22869873046875, 114.35242462158203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000215.npy"}
{"epoch": 0.315712187958884, "step": 216, "batch_size": 64, "mean": 117.41053771972656, "std": 167.65435791015625, "min": -285.84002685546875, "p10": -71.28892593383789, "median": 101.4305191040039, "p90": 362.97230224609376, "max": 563.2782592773438, "pos_frac": 0.75, "sample": [185.90841674804688, 410.68658447265625, -72.33698272705078, 142.81549072265625, 202.28228759765625, 42.551177978515625, -183.97972106933594, 157.96499633789062, 215.67202758789062, 18.72602081298828, 563.2782592773438, 98.94740295410156, 111.74465942382812, -71.64564514160156, -16.01529312133789, -13.734466552734375, -90.92755889892578, 206.05178833007812, 34.96384811401367, -175.03549194335938, 2.2858734130859375, 306.798095703125, -21.982940673828125, -16.551727294921875, 111.21463012695312, 150.91285705566406, 49.4320068359375, 191.03797912597656, 121.22178649902344, 7.891935348510742, 103.91363525390625, 197.90863037109375, 25.792129516601562, -54.46379852294922, 384.5423889160156, 365.50604248046875, -65.89588928222656, 310.8004150390625, 40.92616271972656, 80.40518951416016, 55.649803161621094, 264.5892639160156, -4.030050277709961, -285.84002685546875, 58.98048400878906, 184.9562225341797, 239.16177368164062, 396.427490234375, 46.23420715332031, -70.45658111572266, 274.70703125, 40.58729553222656, 115.3460922241211, 170.98947143554688, -46.36627960205078, 332.47052001953125, 237.39852905273438, 334.40972900390625, 396.1694030761719, 377.97247314453125, 20.7998046875, -72.18695831298828, 29.630897521972656, 357.06024169921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000216.npy"}
{"epoch": 0.31718061674008813, "step": 217, "batch_size": 64, "mean": 136.8664093017578, "std": 155.13499450683594, "min": -171.30088806152344, "p10": -16.147467803955074, "median": 92.16363143920898, "p90": 370.6636474609375, "max": 533.6607666015625, "pos_frac": 0.859375, "sample": [140.39236450195312, 137.27224731445312, 91.58841705322266, 132.76907348632812, 63.270240783691406, 50.857139587402344, 68.72396087646484, 233.1149444580078, 75.40677642822266, 92.13397216796875, 54.082427978515625, 372.5784912109375, 145.72946166992188, 366.1956787109375, -128.41119384765625, 3.4463272094726562, 34.65943908691406, 1.275115966796875, 200.5704345703125, 533.6607666015625, -1.5408363342285156, 62.630760192871094, 266.9100036621094, 421.3280944824219, 92.19329071044922, 210.66946411132812, -29.14820098876953, 69.58140563964844, -171.30088806152344, 114.53961181640625, 484.1814880371094, 200.71060180664062, 9.465330123901367, 63.1368408203125, 257.1153564453125, 2.00360107421875, 230.23397827148438, 288.4098205566406, -12.201919555664062, 491.03564453125, 427.91619873046875, -90.2258529663086, 341.033203125, 277.43756103515625, 138.25587463378906, 450.9071350097656, 4.6399993896484375, 97.73739624023438, 70.2313232421875, 63.963531494140625, 70.85609436035156, 164.71817016601562, 260.91595458984375, 1.1201343536376953, 153.341064453125, 75.40911865234375, -41.91832733154297, -17.838417053222656, 25.86913299560547, -35.24894714355469, 128.60801696777344, 206.48728942871094, 245.57347106933594, 20.391469955444336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000217.npy"}
{"epoch": 0.3186490455212922, "step": 218, "batch_size": 64, "mean": 106.43551635742188, "std": 161.29302978515625, "min": -261.2113037109375, "p10": -75.52274780273437, "median": 80.03852081298828, "p90": 330.3433074951172, "max": 555.0560913085938, "pos_frac": 0.75, "sample": [92.43693542480469, 61.472694396972656, 172.76632690429688, 248.7497100830078, 149.165283203125, 30.583263397216797, 331.8722839355469, 77.21000671386719, 10.175912857055664, 72.30821990966797, 156.6066436767578, 51.9366455078125, -18.920989990234375, 138.67697143554688, 451.33984375, 174.9502410888672, 119.38893127441406, -27.347557067871094, 135.87814331054688, 195.54541015625, 37.38056945800781, 57.36046600341797, 4.096279144287109, 236.83560180664062, -57.87312316894531, -261.2113037109375, 326.77569580078125, 97.22271728515625, 186.508544921875, 203.4215545654297, 34.54695129394531, 415.1212158203125, 414.01971435546875, 247.79934692382812, 201.80511474609375, 66.05039978027344, -21.03871726989746, -26.980409622192383, 80.74154663085938, -15.34317398071289, -37.856414794921875, -99.33257293701172, 144.11810302734375, -91.36256408691406, -74.02803802490234, -50.13477325439453, 555.0560913085938, 332.35693359375, 208.7912139892578, -76.16333770751953, -150.57498168945312, 235.00877380371094, 446.69482421875, 12.102930068969727, 47.01688003540039, 79.33549499511719, 158.92152404785156, 105.5516586303711, 44.400978088378906, -175.16766357421875, 79.09717559814453, -148.7694549560547, 303.532470703125, 111.24362182617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000218.npy"}
{"epoch": 0.3201174743024963, "step": 219, "batch_size": 64, "mean": 92.58406829833984, "std": 174.80233764648438, "min": -423.2983703613281, "p10": -112.45169448852536, "median": 74.52236557006836, "p90": 351.8211486816407, "max": 479.76385498046875, "pos_frac": 0.75, "sample": [458.57196044921875, 156.01193237304688, 7.969358444213867, 334.3529357910156, 5.245248794555664, 429.39776611328125, -423.2983703613281, 16.055688858032227, 35.94122314453125, 315.0036315917969, -11.896499633789062, 99.06269836425781, 206.64505004882812, 449.01214599609375, 479.76385498046875, 169.10202026367188, 60.74559783935547, 257.71661376953125, 112.77218627929688, 103.4595947265625, 273.7991638183594, 36.92295837402344, -123.4122543334961, 1.361083984375, -53.01000213623047, -68.01789093017578, 359.3075256347656, -4.601287841796875, 364.2367248535156, 391.05877685546875, 41.262367248535156, 272.6660461425781, -8.687515258789062, 48.81639099121094, 7.8901519775390625, 173.25108337402344, 114.56038665771484, 17.898902893066406, 114.20867919921875, 30.0029296875, 300.0002136230469, 131.866943359375, 100.374755859375, -175.05184936523438, 83.49797058105469, 161.28109741210938, -174.74169921875, 6.057626724243164, 9.149267196655273, 224.69027709960938, 26.493408203125, -129.7379150390625, -1.1455001831054688, -51.76438522338867, -233.8028106689453, 94.50641632080078, 167.1044921875, 83.79544830322266, -89.07099914550781, -30.771514892578125, 144.9273681640625, 68.08389282226562, 80.9608383178711, -122.47199249267578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000219.npy"}
{"epoch": 0.32158590308370044, "step": 220, "batch_size": 64, "mean": 77.35502624511719, "std": 127.04686737060547, "min": -246.79562377929688, "p10": -70.32778930664062, "median": 74.8817367553711, "p90": 199.86394958496095, "max": 445.5641174316406, "pos_frac": 0.734375, "sample": [166.76779174804688, 49.894630432128906, 70.14093017578125, -19.83245086669922, 41.41022491455078, 326.8800048828125, 360.1448669433594, 10.023326873779297, 147.64869689941406, 155.01295471191406, 124.6735610961914, 278.3518981933594, 136.1800537109375, 144.74508666992188, -219.53598022460938, 107.5230712890625, 169.54519653320312, 43.12725830078125, 135.21054077148438, 196.42372131347656, 68.31260681152344, 23.722986221313477, 4.6035614013671875, -246.79562377929688, -68.0573501586914, 201.3383331298828, -22.506723403930664, 50.956878662109375, 206.6112060546875, 62.863529205322266, -63.36279296875, 160.83045959472656, 127.14790344238281, -21.169998168945312, 215.6049346923828, 115.65265655517578, 445.5641174316406, 151.9996795654297, -5.756843566894531, 110.3586654663086, 14.091312408447266, 49.76286697387695, -0.9748134613037109, 105.47694396972656, -112.23441314697266, 140.07037353515625, 64.43997192382812, 133.5565643310547, -142.6920166015625, 79.5023193359375, 70.26115417480469, -0.6167221069335938, 147.04510498046875, -49.90592956542969, 46.507694244384766, 153.18540954589844, 176.14703369140625, -71.30083465576172, 176.79806518554688, 179.92886352539062, -20.567180633544922, 156.5789794921875, -139.31826782226562, -147.27389526367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000220.npy"}
{"epoch": 0.32305433186490456, "step": 221, "batch_size": 64, "mean": 170.87864685058594, "std": 162.7834014892578, "min": -184.22882080078125, "p10": -17.32612380981445, "median": 174.84353637695312, "p90": 383.0998687744141, "max": 545.7762451171875, "pos_frac": 0.84375, "sample": [77.51978302001953, 111.41458129882812, 250.08973693847656, -5.7684173583984375, 97.86933898925781, 545.7762451171875, 2.5285110473632812, 394.9490966796875, 222.57476806640625, 276.8643798828125, 280.29168701171875, 15.09262466430664, 294.10125732421875, 268.6914978027344, 153.23793029785156, 272.46661376953125, -14.999603271484375, 451.3849182128906, -3.413942337036133, 94.2043228149414, 161.00128173828125, 305.44635009765625, 386.3966369628906, 467.7264404296875, 483.8883056640625, 246.82196044921875, 209.21817016601562, -184.22882080078125, 54.583038330078125, 65.78722381591797, 170.55630493164062, 197.0375213623047, -29.349166870117188, 62.43761444091797, 65.46212768554688, 179.13076782226562, 114.2826156616211, -132.73062133789062, 258.2276611328125, 375.40740966796875, -18.323204040527344, 296.53265380859375, 221.37631225585938, 318.5829162597656, 114.217529296875, 223.65008544921875, 33.644500732421875, 373.8883056640625, 322.80548095703125, 241.525634765625, -37.33176803588867, -108.2240982055664, 6.525413513183594, 258.6244201660156, 47.98731994628906, -76.19184875488281, 9.917093276977539, 133.75314331054688, 47.536869049072266, 202.92462158203125, 303.2375793457031, 90.90605926513672, 485.47552490234375, 201.2147979736328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000221.npy"}
{"epoch": 0.3245227606461087, "step": 222, "batch_size": 64, "mean": 128.28977966308594, "std": 163.05369567871094, "min": -244.22531127929688, "p10": -46.39433975219727, "median": 118.20893859863281, "p90": 355.80759582519545, "max": 565.4016723632812, "pos_frac": 0.78125, "sample": [-46.618385314941406, 438.7974548339844, -5.5164337158203125, -107.60513305664062, -6.162437438964844, -88.05498504638672, -14.932220458984375, 124.4954833984375, 5.912261962890625, 230.39047241210938, 104.75786590576172, 122.25334930419922, 418.2918701171875, 89.56364440917969, 122.5658187866211, 236.22915649414062, -244.22531127929688, 160.25927734375, 258.30316162109375, 174.35617065429688, 155.43251037597656, 113.93173217773438, -211.3231201171875, 325.0828552246094, -125.6063461303711, 267.98126220703125, 500.0406494140625, 91.68099975585938, 42.72949981689453, 15.597427368164062, 164.80068969726562, 22.51409912109375, 159.964599609375, -45.87156677246094, 205.8633575439453, 197.73277282714844, 44.78623962402344, 150.65823364257812, -22.706714630126953, 229.5067138671875, -29.715675354003906, 99.27281188964844, 83.8372573852539, 81.89479064941406, 324.3866271972656, 240.02093505859375, 114.1645278930664, 240.58953857421875, 154.12095642089844, 46.8251953125, -36.43989562988281, 166.73695373535156, 368.975341796875, 68.63592529296875, 416.2417907714844, 208.28060913085938, 124.89115905761719, -109.21076965332031, 218.30419921875, 19.97808074951172, 565.4016723632812, 55.08856201171875, 422.08251953125, 110.32587432861328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000222.npy"}
{"epoch": 0.32599118942731276, "step": 223, "batch_size": 64, "mean": 107.05718994140625, "std": 179.18881225585938, "min": -344.72625732421875, "p10": -113.98109283447265, "median": 109.78412628173828, "p90": 312.3448425292969, "max": 626.3522338867188, "pos_frac": 0.75, "sample": [31.151718139648438, -70.7936019897461, 217.04800415039062, 137.92828369140625, 138.91259765625, 358.576171875, 197.71902465820312, 102.73540496826172, 211.08924865722656, 162.60723876953125, -154.35472106933594, 55.06317138671875, 96.29374694824219, 235.59976196289062, 300.22113037109375, 292.81439208984375, 135.80392456054688, 87.39588928222656, 553.9404907226562, -12.473812103271484, 134.85267639160156, -54.70806121826172, 93.17910766601562, -60.69992446899414, -58.07439422607422, 305.2951965332031, -149.31736755371094, 375.3072509765625, 104.46540069580078, -116.77799224853516, 3.33636474609375, 9.352127075195312, 265.35333251953125, -344.72625732421875, 122.86865234375, 138.58566284179688, -203.8755645751953, 199.68948364257812, 71.70034790039062, 626.3522338867188, 233.26242065429688, -107.45499420166016, 176.85067749023438, 266.2454833984375, -142.139892578125, -60.85208511352539, 284.2252197265625, 331.154541015625, 155.7109832763672, -5.010108947753906, 46.980743408203125, 39.80671691894531, 33.644378662109375, 406.37109375, 26.48644256591797, 27.99968719482422, -12.132116317749023, 10.037221908569336, 143.30545043945312, 129.31121826171875, 115.10285186767578, 164.79501342773438, -266.8430480957031, 315.3661193847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000223.npy"}
{"epoch": 0.3274596182085169, "step": 224, "batch_size": 64, "mean": 124.37174987792969, "std": 186.22703552246094, "min": -333.69287109375, "p10": -74.87051162719726, "median": 70.05747604370117, "p90": 363.2789642333985, "max": 670.1328125, "pos_frac": 0.796875, "sample": [259.4755859375, 2.6950531005859375, 137.14349365234375, 26.682655334472656, -85.17411041259766, 564.3087768554688, 245.2152862548828, 45.259376525878906, 72.6169662475586, -333.69287109375, 46.210914611816406, 183.0914306640625, 151.13441467285156, 155.3034210205078, 450.8253173828125, 88.89932250976562, 54.473289489746094, 227.6190948486328, 192.41644287109375, 670.1328125, 299.779296875, 107.90771484375, 357.4803161621094, 263.5874938964844, -137.0369415283203, -82.54733276367188, 276.1477966308594, 67.49798583984375, 238.79461669921875, -22.972381591796875, 6.2279510498046875, 39.83282470703125, 35.749366760253906, 634.0545654296875, 262.3411560058594, 239.4305419921875, 112.56787872314453, -75.62987518310547, 29.27381706237793, 24.110942840576172, 251.10235595703125, 1.02557373046875, 47.98224639892578, -24.505535125732422, 109.32003784179688, -84.21617889404297, -64.10920715332031, 12.330703735351562, -110.01420593261719, 29.30136489868164, 180.54412841796875, 126.2760009765625, 365.76409912109375, -5.057310104370117, 8.093708038330078, 65.37681579589844, 5.22540283203125, 327.2210998535156, 40.282623291015625, -73.09866333007812, 89.1968002319336, 419.31549072265625, 427.6029968261719, -16.40304946899414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000224.npy"}
{"epoch": 0.328928046989721, "step": 225, "batch_size": 64, "mean": 146.93002319335938, "std": 186.6449737548828, "min": -262.6552429199219, "p10": -93.19714126586912, "median": 146.3915252685547, "p90": 402.68798217773445, "max": 734.08349609375, "pos_frac": 0.71875, "sample": [243.59771728515625, 166.80868530273438, 419.75128173828125, 365.77252197265625, 35.55016326904297, 113.46829986572266, 47.30419921875, -106.23931884765625, -144.38079833984375, 450.121826171875, 366.9530334472656, 277.7754821777344, -149.36268615722656, -72.82903289794922, 284.19085693359375, -44.03705978393555, -111.85689544677734, 21.77931785583496, -9.998088836669922, 68.86585998535156, 110.72354125976562, 143.56007385253906, 235.2613525390625, 168.96163940429688, -9.942506790161133, 145.31573486328125, 28.998138427734375, 172.16241455078125, -19.15450668334961, 256.94415283203125, 323.34356689453125, 177.9611053466797, 227.24842834472656, 102.21161651611328, -45.3378791809082, 471.96844482421875, 449.28564453125, -7.777290344238281, 251.57794189453125, 409.53717041015625, 99.65631103515625, 341.27117919921875, 193.0654754638672, -37.216392517089844, 163.43443298339844, 147.46731567382812, 734.08349609375, 282.5640563964844, 166.2137451171875, 78.60691833496094, 420.893310546875, -5.7135009765625, 235.11422729492188, -101.92633056640625, -103.04130554199219, 386.70654296875, 300.07183837890625, 178.18435668945312, 174.43072509765625, -262.6552429199219, 135.8955078125, 141.214111328125, -48.314491271972656, -32.5687255859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000225.npy"}
{"epoch": 0.3303964757709251, "step": 226, "batch_size": 64, "mean": 118.97482299804688, "std": 174.26934814453125, "min": -208.06114196777344, "p10": -39.074589920043934, "median": 79.1956558227539, "p90": 349.91426086425787, "max": 603.6814575195312, "pos_frac": 0.765625, "sample": [603.6814575195312, -158.21966552734375, -43.29940414428711, 14.127899169921875, -13.33660888671875, 169.58897399902344, -10.19287109375, 66.49118041992188, 174.22616577148438, -14.41103744506836, 133.31851196289062, 10.757369995117188, -7.115959167480469, 37.554176330566406, -185.69833374023438, 91.80735778808594, -110.12730407714844, 354.3565368652344, 337.6134338378906, 30.591026306152344, 339.5489501953125, 364.155029296875, 84.35384368896484, 135.52745056152344, 184.16285705566406, 297.09619140625, 18.233970642089844, -29.216690063476562, 132.05636596679688, 428.7747497558594, 303.26446533203125, -74.23694610595703, 181.85179138183594, 78.21660614013672, 47.27748107910156, 559.087890625, 234.14730834960938, 544.62841796875, 301.4974670410156, -9.266746520996094, -4.215576171875, 143.25784301757812, -208.06114196777344, 157.4666748046875, 45.259891510009766, 124.9937744140625, 4.847877502441406, 5.2266693115234375, 9.177835464477539, 80.1747055053711, -4.5018310546875, 270.766845703125, 108.52310180664062, 199.01535034179688, 49.958656311035156, 122.65577697753906, -89.91360473632812, 6.43739128112793, 504.0299072265625, 60.39800262451172, 43.54663848876953, 135.94003295898438, 35.274261474609375, 211.2565155029297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000226.npy"}
{"epoch": 0.33186490455212925, "step": 227, "batch_size": 64, "mean": 144.17855834960938, "std": 174.31427001953125, "min": -381.69488525390625, "p10": -16.484533309936516, "median": 110.09749603271484, "p90": 368.63389282226564, "max": 609.2084350585938, "pos_frac": 0.84375, "sample": [309.0401611328125, 108.0643310546875, 132.01226806640625, 382.3672180175781, 226.53817749023438, 37.4488525390625, 57.15901184082031, 101.15889739990234, 306.2354736328125, 19.756668090820312, 92.89142608642578, 109.11131286621094, 186.97451782226562, 40.660789489746094, -19.867481231689453, 27.165443420410156, 164.80361938476562, 103.66966247558594, 195.8448028564453, 609.2084350585938, 163.29627990722656, 78.68075561523438, 351.1079406738281, 220.1911163330078, 42.81847381591797, 288.60107421875, 164.32516479492188, 251.3868408203125, 105.00125122070312, 94.50050354003906, 179.66073608398438, 68.55259704589844, 350.42340087890625, 5.876323699951172, -81.54299926757812, -113.91130065917969, 187.62347412109375, 217.15750122070312, 503.20001220703125, 111.08367919921875, 191.05267333984375, 467.1683349609375, -113.45720672607422, 578.5487670898438, -5.453996658325195, 20.55127716064453, 188.69493103027344, 44.982460021972656, 222.10687255859375, 38.263675689697266, 132.50906372070312, 370.9454345703125, 116.009521484375, -8.590988159179688, 7.040239334106445, -51.16487121582031, 363.24029541015625, -381.69488525390625, -8.523712158203125, 89.89949035644531, -59.158531188964844, 23.268762588500977, 131.8353271484375, 491.0788879394531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000227.npy"}
{"epoch": 0.3333333333333333, "step": 228, "batch_size": 64, "mean": 103.1017074584961, "std": 165.70947265625, "min": -267.6720275878906, "p10": -105.69986801147459, "median": 106.22488403320312, "p90": 319.1811767578125, "max": 491.9019775390625, "pos_frac": 0.734375, "sample": [-176.20098876953125, -239.4617919921875, -267.6720275878906, 209.71304321289062, 140.8021240234375, 338.6832275390625, -198.00772094726562, 169.9336395263672, 194.67208862304688, -157.44439697265625, 28.037109375, 290.3927307128906, 491.9019775390625, 324.30670166015625, 463.03179931640625, 328.18231201171875, 354.91790771484375, 175.249755859375, 121.58470153808594, -33.93794250488281, -43.7823486328125, 305.6289367675781, 117.80835723876953, 307.22161865234375, 181.69149780273438, 156.04127502441406, 127.78694152832031, 17.770214080810547, -86.43083953857422, 109.93571472167969, 38.36918640136719, 175.30137634277344, 9.113510131835938, 205.52029418945312, 223.16006469726562, 95.04000091552734, 114.09415435791016, 81.03298950195312, 34.25569152832031, 35.114654541015625, 82.32003021240234, 238.105712890625, -86.50491333007812, -17.45220947265625, 102.51405334472656, 99.60755157470703, 444.8551940917969, 19.92766571044922, 83.60380554199219, 58.377647399902344, 265.9571533203125, -8.416812896728516, -24.0538330078125, 198.93798828125, 234.22142028808594, 163.5211181640625, 206.5637969970703, 111.43157196044922, -113.92627716064453, -24.440277099609375, -19.88990020751953, 24.907791137695312, -154.039794921875, -50.976829528808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000228.npy"}
{"epoch": 0.33480176211453744, "step": 229, "batch_size": 64, "mean": 110.69569396972656, "std": 158.15969848632812, "min": -375.85211181640625, "p10": -40.04754104614257, "median": 97.5522346496582, "p90": 336.72923278808594, "max": 541.8796997070312, "pos_frac": 0.765625, "sample": [-13.071670532226562, 14.316545486450195, -104.029052734375, 406.7159423828125, 53.386898040771484, -172.21017456054688, 109.3499984741211, 179.93682861328125, 71.82183837890625, 214.75735473632812, 264.0787658691406, 342.6813049316406, 98.60060119628906, 348.9560852050781, 90.35951232910156, 96.50386810302734, 228.16275024414062, 107.44689178466797, 36.50258255004883, 334.8208312988281, -97.2099609375, 60.908721923828125, 0.6357231140136719, 218.3274383544922, 140.98585510253906, 44.210296630859375, 541.8796997070312, -156.64215087890625, 50.17811584472656, 239.9111328125, 155.88217163085938, 233.4991455078125, 173.93104553222656, 365.70867919921875, 337.547119140625, 46.280555725097656, 307.348388671875, 28.95136260986328, 115.386962890625, -43.401824951171875, 57.469581604003906, 127.39753723144531, 45.304344177246094, 200.15182495117188, -30.6213436126709, 166.2002716064453, 197.58746337890625, 222.69334411621094, -29.832366943359375, -135.96612548828125, 119.66565704345703, 428.1336669921875, 30.458349227905273, 74.5635986328125, 74.61905670166016, 155.2834930419922, -9.832695007324219, 180.3023681640625, -14.562393188476562, 172.28167724609375, -375.85211181640625, -0.4893035888671875, -11.616928100585938, -32.22087860107422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000229.npy"}
{"epoch": 0.33627019089574156, "step": 230, "batch_size": 64, "mean": 160.3502655029297, "std": 157.3489227294922, "min": -262.7828674316406, "p10": -24.905437278747556, "median": 148.70856475830078, "p90": 396.46502685546875, "max": 502.27984619140625, "pos_frac": 0.84375, "sample": [76.22685241699219, 356.3814392089844, -4.439451217651367, 292.730224609375, 215.55267333984375, 75.65074920654297, -26.348100662231445, 239.8948516845703, 102.91413879394531, 134.65736389160156, 201.93736267089844, 165.54580688476562, 13.300682067871094, 213.637939453125, 28.200241088867188, 113.69534301757812, -51.571571350097656, 75.2376708984375, 93.45907592773438, 135.07403564453125, 53.69012451171875, -89.54042053222656, -21.539222717285156, 199.35397338867188, 139.381591796875, 176.31805419921875, 203.7239990234375, 339.07196044921875, 306.2477111816406, 156.8143310546875, 80.64468383789062, 8.806968688964844, 312.39599609375, 450.1211853027344, 160.5964813232422, -91.7301254272461, 119.62689971923828, 170.32614135742188, 232.57705688476562, 462.56500244140625, -34.087093353271484, -6.2042694091796875, -262.7828674316406, -54.453590393066406, 30.562057495117188, 49.31597137451172, 391.0821533203125, 122.51019287109375, 371.022216796875, 426.2254638671875, 152.4287567138672, 105.96136474609375, 106.45350646972656, 18.750978469848633, 398.77197265625, 258.3953857421875, 478.4866638183594, 187.5776824951172, 411.6588134765625, 198.8474884033203, 217.97471618652344, 144.98837280273438, 225.4612579345703, 502.27984619140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000230.npy"}
{"epoch": 0.3377386196769457, "step": 231, "batch_size": 64, "mean": 114.06205749511719, "std": 151.45257568359375, "min": -136.669921875, "p10": -48.24774894714355, "median": 86.78449249267578, "p90": 331.6016998291017, "max": 645.1551513671875, "pos_frac": 0.8125, "sample": [458.481201171875, 83.19657897949219, -56.96697235107422, 285.437744140625, 19.675498962402344, 78.79273986816406, 340.751708984375, 7.253143310546875, 68.49578094482422, 110.2251968383789, 103.99732971191406, 347.31280517578125, -66.9654541015625, 79.46908569335938, 480.8500061035156, 136.2273712158203, 202.49794006347656, 91.621826171875, 145.54766845703125, 209.20822143554688, 394.6103820800781, 62.70474624633789, 111.32776641845703, 8.008071899414062, 42.87377166748047, -56.87989044189453, 310.2516784667969, -50.54072952270508, 208.5305938720703, 645.1551513671875, 448.3498840332031, 229.495849609375, 122.15681457519531, 26.44258689880371, -136.669921875, 68.61373901367188, 104.685546875, 157.94369506835938, 90.37240600585938, -42.8974609375, -5.524749755859375, 41.88520050048828, 211.2935791015625, 66.32625579833984, -21.92644500732422, 114.8645248413086, -25.574081420898438, -100.5413818359375, 51.20283126831055, 124.9671630859375, 290.81097412109375, 159.7866973876953, 81.01995086669922, 9.004268646240234, -30.865833282470703, 119.98519897460938, -94.00740051269531, 114.58879089355469, 22.95263671875, 17.004241943359375, 10.950836181640625, 138.557373046875, 97.819091796875, 35.74784851074219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000231.npy"}
{"epoch": 0.3392070484581498, "step": 232, "batch_size": 64, "mean": 93.53819274902344, "std": 160.38485717773438, "min": -161.5782012939453, "p10": -71.82919540405273, "median": 54.66155815124512, "p90": 300.40176086425794, "max": 613.5557861328125, "pos_frac": 0.71875, "sample": [175.28543090820312, 110.67205047607422, 8.675317764282227, 42.77204132080078, 17.2567138671875, -138.34852600097656, 313.4955139160156, 253.11294555664062, -31.60662841796875, 251.72442626953125, 43.320716857910156, 441.30059814453125, 82.08092498779297, -70.26142120361328, -25.78582763671875, 58.514854431152344, -36.37799072265625, 188.74716186523438, -104.79377746582031, 68.60660552978516, 148.6934051513672, 587.9395141601562, 67.04336547851562, 85.03260803222656, 133.146484375, -37.556819915771484, 613.5557861328125, -3.9091033935546875, 30.9715576171875, 253.86590576171875, 145.21380615234375, 221.92971801757812, -58.480438232421875, 74.9107666015625, -91.90121459960938, 137.48680114746094, -118.43901824951172, 19.825424194335938, -72.5010986328125, 423.8602294921875, 31.866973876953125, -51.498069763183594, 3.74700927734375, -96.15766906738281, 40.0500373840332, 78.66377258300781, 44.43743896484375, -161.5782012939453, 269.2220458984375, 124.82223510742188, 339.48236083984375, 23.144023895263672, -1.907470703125, 326.92047119140625, 203.64080810546875, -30.383378982543945, 112.76212310791016, 269.84967041015625, -20.103063583374023, 51.37165069580078, 57.15445327758789, 31.10037612915039, 78.58897399902344, 52.168663024902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000232.npy"}
{"epoch": 0.3406754772393539, "step": 233, "batch_size": 64, "mean": 146.382568359375, "std": 211.47802734375, "min": -224.47433471679688, "p10": -67.9090103149414, "median": 100.92501831054688, "p90": 465.07917785644537, "max": 675.9614868164062, "pos_frac": 0.734375, "sample": [13.337663650512695, 12.632991790771484, 480.40081787109375, -33.7633056640625, 191.441650390625, 187.42132568359375, -162.41505432128906, -121.86551666259766, 2.1025848388671875, -177.95326232910156, 113.66529846191406, 28.8157958984375, 166.7107696533203, 260.4913330078125, 252.54293823242188, 675.9614868164062, 119.02509307861328, 76.45792388916016, 525.2583618164062, 335.76776123046875, 46.44273376464844, 159.26109313964844, 81.01298522949219, 550.517333984375, 15.755546569824219, 276.8924560546875, 0.259368896484375, -72.77593994140625, -174.9437255859375, -51.93062210083008, 120.00575256347656, 489.5055847167969, 213.35765075683594, 154.43589782714844, 300.35247802734375, 449.9139709472656, 38.18516540527344, 443.8704833984375, -56.55284118652344, 40.06202697753906, 125.65569305419922, 409.0858154296875, -35.81162643432617, 326.86285400390625, 63.71241760253906, 387.698974609375, -47.07705307006836, -224.47433471679688, -134.26805114746094, -20.678543090820312, 47.30949401855469, -27.07514190673828, 332.58154296875, 88.18473815917969, -47.23077392578125, 202.07020568847656, -22.57632064819336, 471.57855224609375, -38.556217193603516, 598.3289794921875, 378.2581787109375, 53.84504699707031, 309.32275390625, 202.07308959960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000233.npy"}
{"epoch": 0.342143906020558, "step": 234, "batch_size": 64, "mean": 148.38348388671875, "std": 170.77529907226562, "min": -139.5249481201172, "p10": -49.31187133789061, "median": 132.557861328125, "p90": 370.84226074218753, "max": 640.353759765625, "pos_frac": 0.796875, "sample": [-71.80308532714844, 262.98895263671875, 281.28509521484375, 238.1875, 338.6470642089844, 70.63265228271484, 155.7034912109375, 181.75796508789062, 393.5873718261719, -36.29930114746094, 281.2586975097656, 156.90037536621094, -139.5249481201172, 11.909488677978516, 79.8155746459961, 253.25289916992188, 58.05230712890625, -81.09252166748047, 58.251800537109375, 284.6294250488281, 640.353759765625, 140.94146728515625, -12.556894302368164, -33.656028747558594, 66.59551239013672, -23.765335083007812, 177.06634521484375, 363.6763610839844, 5.392723083496094, 349.59161376953125, -25.72974967956543, 157.40032958984375, 430.8734130859375, 103.71405792236328, 487.73846435546875, 104.845458984375, 32.387184143066406, 131.0101318359375, 62.08561325073242, 267.07513427734375, 272.8082580566406, 138.65174865722656, 237.412353515625, 64.70708465576172, 23.52802276611328, 81.72955322265625, -93.6668701171875, 1.8185195922851562, 226.2418212890625, 87.66496276855469, 86.75971221923828, 463.14813232421875, 585.4630126953125, -95.80298614501953, 298.9860534667969, 163.63650512695312, -54.88868713378906, -11.246316909790039, 174.76329040527344, 49.24028015136719, 134.1055908203125, -101.35690307617188, 373.9133605957031, 185.74551391601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000234.npy"}
{"epoch": 0.3436123348017621, "step": 235, "batch_size": 64, "mean": 98.07133483886719, "std": 169.71798706054688, "min": -408.2266845703125, "p10": -74.24182968139648, "median": 106.67793273925781, "p90": 299.18798828125006, "max": 508.32550048828125, "pos_frac": 0.734375, "sample": [32.26353454589844, 227.41030883789062, -408.2266845703125, 10.367095947265625, 161.50320434570312, 508.32550048828125, 165.0023193359375, 311.7880859375, -52.398685455322266, 202.92921447753906, -160.71139526367188, -26.012359619140625, 171.18185424804688, 279.25396728515625, 58.960784912109375, 34.63032531738281, -85.97525024414062, 17.09954071044922, 108.00685119628906, -287.7318420410156, 19.213672637939453, 17.109054565429688, -5.3837738037109375, 70.2997817993164, 334.016845703125, 231.7519073486328, -28.84081268310547, -57.115966796875, 157.31692504882812, 217.59104919433594, 123.60308837890625, 105.34901428222656, 307.73114013671875, -65.28839111328125, 127.40495300292969, 200.06390380859375, -4.051591873168945, 232.55645751953125, 24.205184936523438, 162.97921752929688, 29.969188690185547, 50.72515869140625, 502.5500183105469, 237.10888671875, 178.53465270996094, 159.1005096435547, 167.88824462890625, -39.549095153808594, -160.5421142578125, 140.28448486328125, 474.77374267578125, 259.20867919921875, -134.10400390625, 2.37066650390625, 113.39376831054688, -11.472494125366211, -78.07901763916016, 180.82485961914062, 96.25051879882812, 14.293365478515625, 189.68582153320312, 125.84400939941406, 404.4510192871094, -65.12358093261719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000235.npy"}
{"epoch": 0.34508076358296624, "step": 236, "batch_size": 64, "mean": 155.16970825195312, "std": 142.1499786376953, "min": -97.21076202392578, "p10": -15.90657997131347, "median": 127.36352157592773, "p90": 352.3702087402344, "max": 465.1845397949219, "pos_frac": 0.859375, "sample": [371.7690124511719, 85.56258392333984, 17.310836791992188, 157.80905151367188, 189.07847595214844, 128.2619171142578, 427.6526794433594, 21.97430419921875, 72.47076416015625, 126.00645446777344, 198.123291015625, 350.6183776855469, 97.50897216796875, 193.59701538085938, 308.69671630859375, 334.7117919921875, 56.86289978027344, 260.279541015625, -8.205802917480469, 378.430419921875, 138.54934692382812, -38.89201354980469, 82.02687072753906, 318.0270080566406, 224.29693603515625, 18.244075775146484, 130.70217895507812, 330.0325927734375, 242.11465454101562, 45.449913024902344, 465.1845397949219, 99.78644561767578, -48.71440124511719, 246.35704040527344, -2.6620216369628906, 241.14093017578125, 130.84036254882812, 444.90350341796875, 99.18961334228516, -19.206912994384766, 126.46512603759766, 110.0248794555664, 27.8861141204834, 49.20044708251953, 16.03982925415039, 191.0510711669922, 114.15756225585938, 94.02713775634766, -67.666015625, -97.21076202392578, 147.02728271484375, 450.5398254394531, 352.542236328125, 120.11285400390625, 215.24801635742188, 109.8724365234375, 315.9181213378906, 97.53936767578125, 2.0198211669921875, 190.8413543701172, -65.62326049804688, 351.96881103515625, -28.864307403564453, 191.8539581298828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000236.npy"}
{"epoch": 0.3465491923641703, "step": 237, "batch_size": 64, "mean": 119.95465850830078, "std": 176.00682067871094, "min": -343.18902587890625, "p10": -78.68788375854491, "median": 101.71115493774414, "p90": 362.80506591796893, "max": 681.1165161132812, "pos_frac": 0.765625, "sample": [203.70240783691406, 203.10682678222656, 183.17977905273438, 154.48526000976562, -3.8710174560546875, 101.71092987060547, 95.86713409423828, 147.11502075195312, 37.861385345458984, 166.38943481445312, 63.282691955566406, 30.94195556640625, 391.27423095703125, 147.38674926757812, 220.02566528320312, -71.05809020996094, -106.12687683105469, 141.3370819091797, -80.86676788330078, 100.90149688720703, 141.60012817382812, -108.36344909667969, 384.3065185546875, 681.1165161132812, -84.41557312011719, 403.3546142578125, 312.635009765625, 14.244842529296875, 16.801712036132812, 13.632793426513672, 474.31072998046875, 101.71138000488281, 228.36549377441406, 214.9481658935547, 14.164772033691406, 241.32875061035156, 22.525169372558594, 150.1800537109375, 290.2857666015625, -41.51293182373047, 241.99331665039062, 86.02425384521484, 147.1114501953125, 273.2406921386719, 424.70697021484375, 43.886627197265625, 0.482421875, -5.273780822753906, -343.18902587890625, 277.16094970703125, -45.841468811035156, 80.95927429199219, -179.05160522460938, -43.92576599121094, 191.22552490234375, 283.59771728515625, -73.60382080078125, 451.57183837890625, 7.493125915527344, -116.53028869628906, -48.95233154296875, 122.55176544189453, 68.902587890625, 234.69253540039062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000237.npy"}
{"epoch": 0.34801762114537443, "step": 238, "batch_size": 64, "mean": 119.5045394897461, "std": 177.6407470703125, "min": -203.0545654296875, "p10": -34.43104171752929, "median": 76.63698959350586, "p90": 291.06929626464853, "max": 772.7348022460938, "pos_frac": 0.78125, "sample": [-6.5024261474609375, -27.90979766845703, 4.9058837890625, 139.05064392089844, 20.54556655883789, -6.317161560058594, 134.2519989013672, 252.663818359375, -120.1318359375, 423.17498779296875, 72.51382446289062, 159.82229614257812, 66.84854125976562, 61.08743667602539, -6.810464859008789, 185.12393188476562, 301.0137634277344, -69.90475463867188, 56.10950469970703, 47.968143463134766, 12.243947982788086, 742.4027099609375, 32.98222351074219, 141.30091857910156, -37.225860595703125, 519.509765625, 103.57527160644531, 10.655586242675781, 138.81497192382812, -102.65782165527344, 115.31591796875, 110.61355590820312, 149.98724365234375, 115.47942352294922, 357.0601806640625, 17.20700454711914, 55.89126205444336, 121.26399230957031, 772.7348022460938, 142.9581298828125, -15.403003692626953, 10.03717041015625, 62.726383209228516, -107.54546356201172, -10.332351684570312, 265.0463562011719, 252.71270751953125, 178.086669921875, 63.85155487060547, 461.4979553222656, 217.47720336914062, 12.02044677734375, 195.2230224609375, 228.32699584960938, -203.0545654296875, 267.86553955078125, 208.29119873046875, 38.32763671875, 0.11865997314453125, -17.65521240234375, 157.90878295898438, 156.55377197265625, -62.168609619140625, 80.7601547241211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000238.npy"}
{"epoch": 0.34948604992657856, "step": 239, "batch_size": 64, "mean": 131.09304809570312, "std": 163.37149047851562, "min": -261.03778076171875, "p10": -40.41120529174805, "median": 120.54230117797852, "p90": 365.1321990966797, "max": 479.6341857910156, "pos_frac": 0.796875, "sample": [375.73175048828125, 177.7367401123047, -149.5550079345703, 360.0448303222656, -108.98342895507812, 164.51824951171875, 57.373382568359375, 142.67686462402344, -29.309825897216797, -147.5812530517578, 44.44093704223633, 41.868865966796875, 75.01487731933594, 273.3723449707031, 78.79024505615234, 4.189491271972656, 479.6341857910156, 122.69712829589844, 175.309326171875, 133.080322265625, 206.94322204589844, 193.05540466308594, 245.63304138183594, -61.29513931274414, 21.62969207763672, 38.25679016113281, 183.62474060058594, 376.0406799316406, 238.1562042236328, 106.98637390136719, -6.465490341186523, 7.352203369140625, 329.9210205078125, 434.32574462890625, -187.1276397705078, 57.85846710205078, -38.05187225341797, 334.82525634765625, -9.750045776367188, 369.3348693847656, 30.05301284790039, 351.78057861328125, 327.1694030761719, 145.124755859375, -41.42234802246094, 43.347938537597656, -261.03778076171875, 155.4609375, -37.01559066772461, 146.9720001220703, 146.01156616210938, 276.18218994140625, 301.1116943359375, 66.15383911132812, 445.32427978515625, 211.28671264648438, 367.3125, 79.780517578125, 101.67066955566406, 118.3874740600586, 60.053619384765625, -22.02326202392578, 247.98048400878906, 17.986684799194336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000239.npy"}
{"epoch": 0.3509544787077827, "step": 240, "batch_size": 64, "mean": 109.98140716552734, "std": 180.91506958007812, "min": -237.15078735351562, "p10": -162.76088104248043, "median": 123.6515884399414, "p90": 349.58030090332034, "max": 534.5574951171875, "pos_frac": 0.765625, "sample": [139.39151000976562, 163.59730529785156, 156.99331665039062, 97.1645278930664, 125.04951477050781, 346.87628173828125, 132.5084228515625, 418.85980224609375, -204.42208862304688, -50.45305633544922, 176.3710174560547, 145.3549041748047, -237.15078735351562, 185.57504272460938, 350.7391662597656, 77.93291473388672, -173.31243896484375, 69.09095764160156, 31.230567932128906, 239.84918212890625, 175.87918090820312, 397.30859375, 147.86160278320312, 146.28936767578125, 64.70494079589844, 8.550411224365234, -215.2071533203125, 117.59395599365234, 76.97279357910156, 489.969970703125, 180.9499053955078, 111.23431396484375, 89.25099182128906, 17.51927947998047, -221.7755126953125, 313.5220642089844, 230.8910369873047, 177.70672607421875, -202.00830078125, 73.16027069091797, -59.21926498413086, 281.2469482421875, -138.1405792236328, 46.272823333740234, 534.5574951171875, -103.69094848632812, 273.80316162109375, -30.060762405395508, 252.79354858398438, 175.39996337890625, 122.253662109375, 449.8582763671875, -44.81111145019531, 100.5678939819336, -108.35200500488281, 149.6134033203125, 88.6707763671875, -41.316741943359375, 135.44570922851562, 178.7573699951172, 46.949188232421875, 135.38717651367188, 454.3317565917969, -233.1282501220703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000240.npy"}
{"epoch": 0.3524229074889868, "step": 241, "batch_size": 64, "mean": 172.10183715820312, "std": 201.54823303222656, "min": -232.1362762451172, "p10": -31.772038650512695, "median": 142.94709014892578, "p90": 425.83910217285165, "max": 854.660400390625, "pos_frac": 0.8125, "sample": [96.16447448730469, 84.20851135253906, -197.29278564453125, -17.031702041625977, 50.088958740234375, 243.43218994140625, 257.7747802734375, 434.54541015625, 74.29855346679688, 178.7938232421875, 121.08198547363281, -52.5291748046875, -5.540088653564453, 497.57183837890625, -2.8612070083618164, 163.27578735351562, -32.22593307495117, 337.47216796875, 173.31138610839844, 152.8022003173828, 351.045166015625, 125.13519287109375, 90.30695343017578, 121.85426330566406, 854.660400390625, 405.5243835449219, 201.63162231445312, 134.59103393554688, 579.2998046875, 41.690086364746094, 794.8746948242188, 437.9044189453125, 319.05377197265625, 66.52765655517578, 83.17413330078125, 151.3031463623047, 211.01846313476562, 94.63716125488281, -30.71295166015625, 170.4824981689453, 96.9808120727539, 49.633872985839844, 108.0772705078125, -9.961502075195312, 228.06475830078125, 57.38551330566406, 283.154541015625, -49.90614700317383, 210.81150817871094, 199.568603515625, 71.75601196289062, 44.463600158691406, 241.40109252929688, 493.0579528808594, 288.83935546875, 165.3017578125, -181.59121704101562, 280.23382568359375, 257.8651428222656, 284.23992919921875, -232.1362762451172, 367.6618347167969, 89.43685913085938, -91.16458129882812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000241.npy"}
{"epoch": 0.35389133627019087, "step": 242, "batch_size": 64, "mean": 123.77261352539062, "std": 171.72618103027344, "min": -266.82562255859375, "p10": -44.64950714111327, "median": 93.36262130737305, "p90": 304.6214569091797, "max": 665.9413452148438, "pos_frac": 0.8125, "sample": [-32.863304138183594, 15.013423919677734, 162.23880004882812, 272.06866455078125, 41.25393295288086, -266.82562255859375, 13.579437255859375, -9.222068786621094, 303.764892578125, 133.69818115234375, 21.701704025268555, 243.00547790527344, 621.5587158203125, 23.355573654174805, -25.77735137939453, 75.3346939086914, 120.43248748779297, 139.28640747070312, 57.191650390625, 12.640012741088867, 185.3268280029297, 66.50462341308594, 240.1885223388672, 212.58181762695312, 258.5177001953125, -49.70073699951172, 105.52581024169922, 39.128623962402344, 114.45830535888672, -98.38131713867188, 160.25570678710938, 209.71115112304688, 87.47570037841797, -134.144287109375, 257.3517150878906, 125.53690338134766, 304.9885559082031, 440.8326416015625, 19.018287658691406, 73.48265075683594, 82.0946044921875, 200.5940399169922, -4.9667816162109375, 665.9413452148438, 41.857154846191406, 75.95146942138672, 467.5885009765625, 99.24954223632812, 17.437576293945312, 2.7351856231689453, -108.23897552490234, 436.30517578125, 172.3732147216797, -18.113563537597656, 69.72700500488281, 114.83908081054688, 152.26014709472656, -138.27532958984375, 244.19923400878906, 220.2896728515625, 7.945978164672852, 468.3483581542969, 162.67822265625, -51.46813201904297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000242.npy"}
{"epoch": 0.355359765051395, "step": 243, "batch_size": 64, "mean": 151.74656677246094, "std": 174.80267333984375, "min": -281.2373962402344, "p10": -51.313291168212885, "median": 129.51422882080078, "p90": 377.4388671875, "max": 700.3162841796875, "pos_frac": 0.859375, "sample": [340.46630859375, 18.883220672607422, 80.7927017211914, 452.65252685546875, 191.52273559570312, 103.7191390991211, 127.50529479980469, 174.51409912109375, -79.11592102050781, -94.19864654541016, -54.192169189453125, 85.58718872070312, 231.4882049560547, 46.305686950683594, -131.8547821044922, 26.957427978515625, -89.64056396484375, 336.411865234375, 138.84654235839844, 153.77322387695312, -281.2373962402344, 36.560394287109375, 170.87115478515625, 25.300437927246094, 29.47263526916504, 297.89129638671875, 58.24688720703125, 188.349365234375, 91.02117156982422, 380.46966552734375, 286.7960510253906, 262.2003173828125, -44.595909118652344, 138.74034118652344, -80.265380859375, 370.36700439453125, 260.3088073730469, 513.7442626953125, 333.80517578125, 236.179931640625, 289.41802978515625, 120.1987533569336, 405.32855224609375, 700.3162841796875, 400.18682861328125, 42.364593505859375, 131.52316284179688, 50.72255325317383, 368.19390869140625, -17.153884887695312, 157.77456665039062, 198.18246459960938, 34.3741455078125, 119.11334228515625, 505.3362121582031, 46.39209747314453, 139.5130615234375, 82.50349426269531, 15.573768615722656, 230.10211181640625, 17.465118408203125, 77.11517333984375, 61.68864440917969, 200.89654541015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000243.npy"}
{"epoch": 0.3568281938325991, "step": 244, "batch_size": 64, "mean": 152.10162353515625, "std": 157.76722717285156, "min": -154.99642944335938, "p10": -14.802527618408202, "median": 134.16181182861328, "p90": 375.84345703125007, "max": 563.912109375, "pos_frac": 0.859375, "sample": [87.00822448730469, 13.211524963378906, 112.49960327148438, 149.59909057617188, 17.597667694091797, 54.2895622253418, 435.6525573730469, 132.69500732421875, -58.135345458984375, 5.3062744140625, 178.9972686767578, 207.11001586914062, 5.8408203125, 302.4193115234375, 41.46075439453125, 136.59193420410156, -11.827991485595703, 449.7615051269531, 41.895877838134766, 94.2616958618164, -154.99642944335938, 125.30389404296875, 271.5987548828125, -63.369354248046875, 195.77346801757812, 33.07921600341797, 154.50350952148438, 238.28118896484375, 121.31796264648438, 28.26409912109375, 246.11219787597656, 116.56281280517578, 320.88330078125, 301.2042541503906, 468.7296142578125, 536.6138305664062, 175.6708221435547, 380.8768615722656, 47.82145309448242, 563.912109375, 235.05459594726562, -13.293937683105469, 357.8541259765625, 168.01437377929688, 153.9585723876953, -69.0533676147461, 135.6286163330078, -20.564485549926758, -15.449066162109375, 234.9597930908203, 36.68370056152344, 9.564643859863281, 151.72669982910156, 80.96479797363281, 4.244873046875, 113.58174133300781, 364.0988464355469, 201.572021484375, -72.52430725097656, 138.51107788085938, 87.95576477050781, 444.7708740234375, 203.0986785888672, 298.7674865722656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000244.npy"}
{"epoch": 0.35829662261380324, "step": 245, "batch_size": 64, "mean": 122.78467559814453, "std": 177.676513671875, "min": -430.5545349121094, "p10": -36.12038002014159, "median": 105.30636978149414, "p90": 326.4335388183595, "max": 618.2938842773438, "pos_frac": 0.8125, "sample": [118.41913604736328, 404.84136962890625, 176.79576110839844, 133.90370178222656, 183.9715576171875, 143.32261657714844, 54.71971893310547, -23.855621337890625, 212.6109619140625, 123.71316528320312, 89.73806762695312, 113.08024597167969, 161.2880096435547, 44.798221588134766, 134.49407958984375, 173.81094360351562, 217.18392944335938, 289.43475341796875, 98.59786987304688, 24.161029815673828, 540.0714111328125, 139.7532196044922, -19.95812225341797, 354.94598388671875, 59.60368347167969, -3.5493927001953125, 216.8884735107422, -41.376705169677734, -192.64901733398438, -11.721092224121094, 293.4581298828125, 153.56817626953125, 618.2938842773438, -158.8011016845703, 183.95718383789062, 12.332160949707031, 18.69243621826172, 90.97702026367188, 285.3485107421875, 69.08651733398438, -16.58365249633789, 214.664794921875, 156.9178009033203, -62.2138671875, 66.07549285888672, -430.5545349121094, 276.9249267578125, 285.88543701171875, 25.19928741455078, 213.70457458496094, 340.56585693359375, 20.4779052734375, 584.5487670898438, -89.57450866699219, 1.7106361389160156, 497.54058837890625, 77.18605041503906, 112.0148696899414, 9.151382446289062, -117.342529296875, 65.42269897460938, 61.43080139160156, 67.33722686767578, 13.777862548828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000245.npy"}
{"epoch": 0.35976505139500736, "step": 246, "batch_size": 64, "mean": 125.66952514648438, "std": 183.41656494140625, "min": -237.39915466308594, "p10": -91.32632675170898, "median": 82.90713119506836, "p90": 359.1256530761719, "max": 744.229248046875, "pos_frac": 0.796875, "sample": [188.40670776367188, 354.54595947265625, 129.90371704101562, 84.4279556274414, 408.36138916015625, 209.93983459472656, 744.229248046875, -156.6378173828125, 40.57430648803711, -237.39915466308594, 46.57878875732422, 84.84972381591797, 73.40655517578125, 0.8738269805908203, 169.19651794433594, 361.08837890625, 300.1036376953125, 60.938148498535156, 40.76829528808594, 51.933128356933594, 45.960845947265625, 383.0762023925781, 147.5111846923828, -92.0387191772461, 113.05746459960938, -102.42654418945312, 232.24635314941406, -47.800933837890625, 338.3894958496094, -101.6919174194336, 4.7303009033203125, -25.340599060058594, 114.28260803222656, 107.81596374511719, 315.3775634765625, 32.271690368652344, 18.92915916442871, -14.24410629272461, 3.973602294921875, 225.10049438476562, 383.5530090332031, 113.56766510009766, 38.736427307128906, 225.6210479736328, 81.38630676269531, 154.83389282226562, 77.6246337890625, -89.66407775878906, -0.7698402404785156, 209.4220733642578, -94.66789245605469, 90.38104248046875, 315.4533996582031, 175.7812042236328, 25.16796875, 64.53861999511719, 11.869260787963867, 582.1163330078125, 325.5617370605469, 575.3150634765625, -32.09619903564453, 243.540283203125, -96.28318786621094, 6.591953277587891], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000246.npy"}
{"epoch": 0.36123348017621143, "step": 247, "batch_size": 64, "mean": 177.672119140625, "std": 198.25411987304688, "min": -311.0759582519531, "p10": -25.11617660522458, "median": 152.26233673095703, "p90": 450.8974426269533, "max": 660.6763916015625, "pos_frac": 0.890625, "sample": [-241.1409454345703, 91.35310363769531, -80.08229064941406, 398.2657470703125, 90.22966003417969, 498.39935302734375, 154.6768798828125, 393.93701171875, -59.27777862548828, 342.2962951660156, 376.8852844238281, 501.0763244628906, 211.12380981445312, 285.4226379394531, 54.056793212890625, 342.22161865234375, 472.58978271484375, 44.05823516845703, 348.22760009765625, 310.1246337890625, 165.99942016601562, 35.07000732421875, 26.602127075195312, 113.55577850341797, 14.221939086914062, 268.69842529296875, 28.84918975830078, 77.36964416503906, 91.4234848022461, 111.13424682617188, 233.3101806640625, 346.98370361328125, 64.86640167236328, 159.77728271484375, 41.02305603027344, 397.0753173828125, 514.574462890625, 332.3514404296875, 105.51649475097656, -144.69747924804688, 15.00161361694336, 202.72462463378906, 3.28717041015625, 182.69659423828125, 51.203529357910156, 18.707923889160156, 149.84779357910156, 400.281982421875, 646.435791015625, 90.4975357055664, 43.68122100830078, -115.70260620117188, 93.59281921386719, 660.6763916015625, 217.350830078125, -37.289039611816406, 169.11557006835938, 291.61798095703125, 210.9570770263672, -311.0759582519531, 474.32440185546875, 15.736495971679688, 140.44044494628906, 238.75689697265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000247.npy"}
{"epoch": 0.36270190895741555, "step": 248, "batch_size": 64, "mean": 153.25155639648438, "std": 143.7351837158203, "min": -198.61439514160156, "p10": -2.752876281738279, "median": 139.29306030273438, "p90": 348.92448425292974, "max": 459.1712646484375, "pos_frac": 0.859375, "sample": [320.06195068359375, 388.7419738769531, 99.3051986694336, 181.727294921875, 196.61575317382812, 0.852569580078125, 154.3885955810547, 354.2827453613281, 121.92054748535156, -87.87261962890625, 24.210403442382812, 264.86602783203125, 185.78179931640625, 36.75114440917969, 232.76849365234375, 49.631072998046875, 21.942201614379883, 48.47938537597656, 150.29676818847656, -3.641773223876953, 140.00778198242188, 246.50750732421875, 239.598388671875, 418.8082580566406, 9.86590576171875, -43.938873291015625, 138.57833862304688, 336.421875, 122.41334533691406, 227.66165161132812, 182.71119689941406, -198.61439514160156, 129.53060913085938, 304.61566162109375, 104.03810119628906, 175.0661163330078, 75.67230224609375, 17.458724975585938, -17.219619750976562, 48.17979049682617, 212.4202117919922, 258.9971008300781, 190.94476318359375, 433.319091796875, 104.92271423339844, 100.53083801269531, 433.03387451171875, 313.0353698730469, 45.5955810546875, 459.1712646484375, 9.182319641113281, 167.66806030273438, 327.7327575683594, 256.05499267578125, 264.3916015625, 366.4124755859375, -11.371444702148438, 35.56257629394531, 122.99409484863281, 289.1420593261719, 117.49324798583984, -0.5967826843261719, -0.6787834167480469, -86.33119201660156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000248.npy"}
{"epoch": 0.3641703377386197, "step": 249, "batch_size": 64, "mean": 84.31858825683594, "std": 187.11886596679688, "min": -346.63909912109375, "p10": -145.40407257080076, "median": 72.0584602355957, "p90": 320.5450073242189, "max": 663.2310180664062, "pos_frac": 0.671875, "sample": [-21.515398025512695, 285.6722412109375, -109.73591613769531, -66.43421936035156, 109.3516616821289, -122.86624908447266, 20.907089233398438, 153.8886260986328, 61.55029296875, 382.0148010253906, 32.22509765625, 32.2266845703125, 70.27892303466797, 39.54161834716797, 240.9038543701172, -346.63909912109375, -98.60740661621094, 380.388916015625, 170.16172790527344, -75.50775146484375, 138.70428466796875, 213.3951873779297, -158.239990234375, 100.85298919677734, -31.865432739257812, 356.1627197265625, -151.93260192871094, 269.78564453125, 130.01693725585938, 78.81050109863281, -25.836132049560547, -22.052608489990234, 213.20379638671875, 61.35389709472656, 111.84613800048828, -23.598541259765625, 183.66561889648438, -324.12158203125, 105.73611450195312, 262.80035400390625, 215.40634155273438, -22.122589111328125, 663.2310180664062, -93.98200988769531, -229.62515258789062, -159.34510803222656, 48.920387268066406, 25.11386489868164, 101.30844116210938, 205.34104919433594, 114.97357940673828, 266.1944580078125, -194.93930053710938, 244.90682983398438, 335.490478515625, 267.28326416015625, 73.83799743652344, -130.17083740234375, -52.680015563964844, 217.79153442382812, 62.62610626220703, 405.70306396484375, 393.4813232421875, 11.152305603027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000249.npy"}
{"epoch": 0.3656387665198238, "step": 250, "batch_size": 64, "mean": 156.46585083007812, "std": 164.75741577148438, "min": -176.26544189453125, "p10": -35.59816875457763, "median": 152.68402099609375, "p90": 378.2839111328126, "max": 541.095703125, "pos_frac": 0.796875, "sample": [256.8785095214844, 26.074506759643555, 45.49784851074219, -4.215915679931641, -39.49676513671875, 206.1325225830078, 79.67276763916016, 492.7003173828125, 160.44277954101562, -139.04893493652344, 58.90648651123047, 517.444091796875, 243.16180419921875, 143.58631896972656, 227.4666748046875, 162.3251953125, 27.106651306152344, 139.67041015625, 320.3244323730469, 359.195068359375, 183.76190185546875, -93.7012710571289, -54.064674377441406, 98.02335357666016, 89.5062026977539, 185.86383056640625, 63.569297790527344, -18.438772201538086, 74.13026428222656, -21.8763427734375, -26.50144386291504, 39.17108154296875, 137.1707000732422, 278.8458251953125, -45.77666473388672, 541.095703125, 127.01119995117188, 198.85443115234375, -131.887939453125, -6.750572204589844, 203.96559143066406, 393.45806884765625, 302.5469970703125, 169.17234802246094, 167.94390869140625, 57.13941192626953, 269.15948486328125, 144.92526245117188, 276.58074951171875, 135.1442108154297, 57.20864486694336, -4.8638458251953125, 196.80764770507812, 261.6518249511719, -176.26544189453125, 227.869384765625, 386.46484375, 201.63372802734375, 177.31527709960938, 449.2886047363281, 337.2808837890625, 38.4388427734375, 522.1738891601562, 316.94378662109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000250.npy"}
{"epoch": 0.3671071953010279, "step": 251, "batch_size": 64, "mean": 106.71569061279297, "std": 190.78912353515625, "min": -395.91748046875, "p10": -57.57828140258789, "median": 86.34718322753906, "p90": 348.1887817382813, "max": 702.21240234375, "pos_frac": 0.734375, "sample": [431.6037292480469, 12.004627227783203, 357.01531982421875, -29.68682098388672, 217.67910766601562, -290.504638671875, -395.91748046875, 702.21240234375, 0.9529190063476562, 187.88302612304688, 171.10903930664062, -145.36062622070312, 323.7607116699219, 42.396705627441406, 86.47439575195312, -42.68544006347656, -291.55450439453125, 452.8337097167969, -28.828161239624023, 170.99905395507812, 20.332836151123047, 283.5262145996094, -14.440483093261719, -55.184288024902344, 46.886627197265625, 194.2434539794922, -131.60768127441406, -58.604278564453125, 69.51947021484375, 267.96160888671875, 73.55059814453125, -142.18124389648438, 332.0162353515625, -34.090965270996094, 63.51782989501953, 210.67471313476562, 54.367897033691406, 125.91265106201172, 149.6048126220703, 135.9031524658203, 2.4980316162109375, 209.54693603515625, -29.242990493774414, 279.94708251953125, -3.51287841796875, 360.9638366699219, 214.5795440673828, 185.5936737060547, 88.64069366455078, 137.3358154296875, -17.172216415405273, 14.641586303710938, 582.7741088867188, 86.219970703125, 355.119873046875, 191.81155395507812, -24.68915367126465, 233.5306396484375, 90.98825073242188, 4.424510955810547, 34.35277557373047, 127.98275756835938, 36.386512756347656, 142.7867889404297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000251.npy"}
{"epoch": 0.368575624082232, "step": 252, "batch_size": 64, "mean": 144.67434692382812, "std": 151.98562622070312, "min": -492.47039794921875, "p10": -15.025075721740716, "median": 155.98380279541016, "p90": 329.14268188476564, "max": 485.35552978515625, "pos_frac": 0.859375, "sample": [261.88226318359375, 313.99102783203125, 7.005054473876953, 198.629638671875, 255.41990661621094, 75.3917236328125, 156.36257934570312, 339.014892578125, 87.61029052734375, 135.44711303710938, 229.4307098388672, 1.265960693359375, 177.216796875, 58.524322509765625, 112.30381774902344, 159.9115447998047, 43.71133804321289, 134.39039611816406, 187.42807006835938, -72.1195297241211, 273.44500732421875, 155.6050262451172, 13.994029998779297, 417.2018127441406, 278.1014099121094, -129.92041015625, -20.16084861755371, 111.08029174804688, -17.469039916992188, 148.7529296875, -37.37541580200195, 192.4393310546875, 39.14558410644531, 160.37777709960938, 62.0010986328125, 131.52349853515625, 129.18310546875, 416.926513671875, 368.10784912109375, 331.95703125, 47.650047302246094, 180.4384002685547, 177.0301971435547, 31.303207397460938, 215.96066284179688, 322.57586669921875, 283.41217041015625, 268.0062561035156, 206.02761840820312, 173.3421173095703, -2.1912078857421875, -492.47039794921875, 346.8018798828125, 280.15411376953125, 129.074462890625, 91.72560119628906, -9.322492599487305, 177.9344482421875, 485.35552978515625, -63.306880950927734, 84.1849365234375, 57.506690979003906, 175.15447998046875, 205.07623291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000252.npy"}
{"epoch": 0.3700440528634361, "step": 253, "batch_size": 64, "mean": 142.34326171875, "std": 181.11199951171875, "min": -409.5155944824219, "p10": -71.95600814819335, "median": 130.23860931396484, "p90": 394.96443481445317, "max": 591.736572265625, "pos_frac": 0.796875, "sample": [-64.58843231201172, 20.08556365966797, 287.9301452636719, 11.387985229492188, -6.992340087890625, 130.15057373046875, 227.474609375, 54.632652282714844, 183.91944885253906, 66.38645935058594, 105.87490844726562, 591.736572265625, 220.87884521484375, 59.289756774902344, 241.11575317382812, 116.05209350585938, 384.7509460449219, 91.79954528808594, -75.11354064941406, 103.07131958007812, -12.686979293823242, -409.5155944824219, 229.56341552734375, 399.3416442871094, 416.7218933105469, 10.192289352416992, 452.147216796875, 24.58118438720703, -20.466392517089844, 36.07234573364258, 17.648277282714844, -81.73466491699219, 138.802490234375, 281.11285400390625, 311.6953430175781, 206.42446899414062, 302.47723388671875, -134.44888305664062, 303.97076416015625, 87.83368682861328, 246.5553741455078, 340.59613037109375, 171.72265625, 297.20050048828125, 432.4451904296875, 121.37680053710938, 130.32664489746094, 402.3338928222656, 136.60430908203125, -239.96710205078125, 378.9443359375, 180.36416625976562, -88.37066650390625, 63.51189422607422, -77.336669921875, 90.67083740234375, 164.95664978027344, 228.6575164794922, 210.2644805908203, -34.65771484375, -53.136173248291016, 60.647796630859375, 455.2276916503906, 181.4539794921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000253.npy"}
{"epoch": 0.37151248164464024, "step": 254, "batch_size": 64, "mean": 120.19171142578125, "std": 206.7840118408203, "min": -275.5563659667969, "p10": -125.05795898437499, "median": 92.30457305908203, "p90": 411.04849243164074, "max": 861.0596923828125, "pos_frac": 0.75, "sample": [128.24163818359375, -24.099838256835938, 140.43017578125, 99.94353485107422, 99.55702209472656, 103.63276672363281, 17.883071899414062, -84.32078552246094, 144.04058837890625, 25.901451110839844, 150.91757202148438, 207.1630096435547, 239.38955688476562, 43.920623779296875, -128.1127166748047, -275.5563659667969, 200.75482177734375, -28.351116180419922, 80.267333984375, 201.27597045898438, 8.972415924072266, 215.12234497070312, 523.8284912109375, 316.4114990234375, 368.2648010253906, -175.2191162109375, 426.00665283203125, 289.53314208984375, -12.210685729980469, 3.694314956665039, 79.84993743896484, 197.8444061279297, 42.18523406982422, -207.17242431640625, 23.612689971923828, 193.24209594726562, -55.399330139160156, 441.2327880859375, -131.9722900390625, 54.57429885864258, -29.48387908935547, -208.40020751953125, 62.03216552734375, 465.15045166015625, 9.459985733032227, 219.08120727539062, 5.257047653198242, 376.1461181640625, 52.539390563964844, 173.75396728515625, 861.0596923828125, 159.03871154785156, 85.0521240234375, -12.831062316894531, -176.43988037109375, 571.4072265625, 151.89132690429688, 232.4122314453125, 147.58055114746094, 56.88911437988281, -117.93019104003906, 551.3594970703125, 144.20654296875, -32.241851806640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000254.npy"}
{"epoch": 0.37298091042584436, "step": 255, "batch_size": 64, "mean": 128.564697265625, "std": 184.40765380859375, "min": -419.718994140625, "p10": -64.73961029052734, "median": 91.57090377807617, "p90": 342.8689270019532, "max": 610.95361328125, "pos_frac": 0.765625, "sample": [610.95361328125, -89.42938232421875, 24.957061767578125, 347.64385986328125, 186.3548583984375, -58.132965087890625, -53.615623474121094, -37.37690734863281, -419.718994140625, 48.32883834838867, 295.181396484375, 86.72727966308594, -8.565071105957031, 23.79030990600586, 239.05125427246094, 257.2611999511719, -164.44781494140625, -226.544921875, 153.46823120117188, 288.0411682128906, 277.5509338378906, 31.749162673950195, 80.74132537841797, 14.437366485595703, 78.60795593261719, -60.216270446777344, -128.36898803710938, 382.4629821777344, -21.824748992919922, 193.21873474121094, 212.7825164794922, 231.44866943359375, 81.48724365234375, 317.1583557128906, 366.9393310546875, 316.8148193359375, 262.4499206542969, 371.97674560546875, 331.7274169921875, 540.3192138671875, 51.53382110595703, -13.561115264892578, 273.0704650878906, 134.33822631835938, 27.389598846435547, 210.64210510253906, 66.5526123046875, 242.02264404296875, -114.93629455566406, 325.91864013671875, 426.99554443359375, 57.01520538330078, 135.29864501953125, 52.80933380126953, -66.67818450927734, 138.26901245117188, 13.240089416503906, 96.4145278930664, 37.106651306152344, 150.7699432373047, -47.129310607910156, 66.20059204101562, 302.5494384765625, 276.9180908203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000255.npy"}
{"epoch": 0.3744493392070485, "step": 256, "batch_size": 64, "mean": 138.02523803710938, "std": 178.59033203125, "min": -295.26885986328125, "p10": -60.070536804199214, "median": 117.975830078125, "p90": 348.223226928711, "max": 614.30908203125, "pos_frac": 0.765625, "sample": [93.57368469238281, 554.0758666992188, 113.08545684814453, 259.88330078125, 207.6414794921875, 255.45278930664062, 34.815773010253906, 185.78732299804688, -25.55472183227539, 25.289291381835938, 308.952392578125, 237.0809326171875, 180.52542114257812, 349.25439453125, 263.5935363769531, -139.62428283691406, 52.68197250366211, 173.95191955566406, -295.26885986328125, 36.964290618896484, 274.71636962890625, 112.45169067382812, -145.5374298095703, 286.67279052734375, 60.84370803833008, 275.13140869140625, -68.50562286376953, -62.524658203125, -80.93902587890625, 172.7145538330078, 371.67706298828125, -8.350341796875, 221.26025390625, 91.39752197265625, 63.37388610839844, 122.86620330810547, 387.58551025390625, -54.34425354003906, 49.06846618652344, 50.93675231933594, -196.7401885986328, 31.628265380859375, 29.168487548828125, 3.272003173828125, 179.40023803710938, -36.209598541259766, 207.8677978515625, 122.94567108154297, -30.5902099609375, 268.2147216796875, -38.88340759277344, 112.54684448242188, -7.572059631347656, 201.5146484375, 345.3843994140625, 406.3730773925781, 156.8079071044922, 58.87590408325195, 614.30908203125, 532.1319580078125, 225.83084106445312, 345.8171691894531, -21.851232528686523, 330.7159729003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000256.npy"}
{"epoch": 0.37591776798825255, "step": 257, "batch_size": 64, "mean": 146.7384033203125, "std": 190.81143188476562, "min": -290.0919494628906, "p10": -45.609196090698234, "median": 110.5522575378418, "p90": 444.30572204589845, "max": 672.8951416015625, "pos_frac": 0.78125, "sample": [465.19696044921875, -39.31649398803711, 11.528121948242188, 122.13528442382812, 367.9627685546875, -7.3175811767578125, 232.67092895507812, 444.16094970703125, 243.7921142578125, 157.28732299804688, -15.473012924194336, -48.306068420410156, 100.10610961914062, 306.5118408203125, 74.13329315185547, 23.9111328125, 223.5296630859375, 232.1204376220703, 19.89760971069336, 444.3677673339844, 36.85270690917969, 421.0351867675781, 280.46533203125, -128.6175994873047, 87.69620513916016, 113.0079345703125, -84.73149108886719, 13.629180908203125, -30.224178314208984, 508.7122497558594, 168.1192169189453, 224.9080352783203, 262.7593994140625, 672.8951416015625, 79.04302215576172, -53.80792236328125, 108.0965805053711, 6.724252700805664, 126.0753402709961, -290.0919494628906, 97.44229888916016, 451.0923156738281, 38.214054107666016, 240.45654296875, 202.5486297607422, 220.81341552734375, 70.33122253417969, 3.3286514282226562, 541.65478515625, 251.9122314453125, 243.16641235351562, 321.121337890625, -9.212181091308594, 115.23595428466797, 48.565948486328125, 536.9139404296875, -93.21467590332031, -13.423471450805664, 164.95187377929688, -177.92845153808594, 36.13002014160156, 29.90484619140625, -25.23748779296875, 215.04437255859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000257.npy"}
{"epoch": 0.37738619676945667, "step": 258, "batch_size": 64, "mean": 142.62551879882812, "std": 200.61253356933594, "min": -491.8069763183594, "p10": -62.15648574829101, "median": 131.598388671875, "p90": 374.7330261230469, "max": 697.4442138671875, "pos_frac": 0.796875, "sample": [296.6089172363281, 36.76647186279297, -107.60423278808594, 276.14532470703125, 54.85246276855469, 127.89203643798828, 262.71038818359375, 243.36294555664062, 231.525634765625, -7.596435546875, -36.69689178466797, 2.2198715209960938, -60.02112579345703, 50.27484130859375, 95.49774932861328, -39.84971618652344, -14.929649353027344, 113.8118896484375, 163.99803161621094, -0.6903152465820312, 62.85102844238281, 261.5208435058594, 540.89501953125, 219.3005828857422, 485.6518249511719, 271.38824462890625, 166.67950439453125, 21.77886199951172, 154.03411865234375, 363.7843322753906, 184.83453369140625, 282.06793212890625, 313.5269775390625, 171.1054229736328, -230.24862670898438, 102.25375366210938, 277.3509826660156, -362.4944152832031, 25.257118225097656, 130.59182739257812, 405.98809814453125, 12.072803497314453, -88.58729553222656, 391.74664306640625, 31.082889556884766, -491.8069763183594, 697.4442138671875, 132.60494995117188, -139.86599731445312, 101.6415023803711, 236.35809326171875, -63.07164001464844, 309.22613525390625, 363.64581298828125, 379.4253234863281, 42.563194274902344, 291.86767578125, 437.2114562988281, 146.87635803222656, 269.1217041015625, 75.49031066894531, 111.4172592163086, 269.3918151855469, 75.77970123291016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000258.npy"}
{"epoch": 0.3788546255506608, "step": 259, "batch_size": 64, "mean": 120.01094055175781, "std": 182.5396728515625, "min": -230.28294372558594, "p10": -100.46448287963867, "median": 94.53255462646484, "p90": 358.37385864257817, "max": 550.184326171875, "pos_frac": 0.75, "sample": [209.77745056152344, -17.04542350769043, 137.17916870117188, 190.98216247558594, 17.724788665771484, -97.38491821289062, 320.0017395019531, 432.886474609375, 433.8523254394531, 204.56008911132812, 49.501869201660156, -100.72106170654297, 108.00617980957031, -80.98307037353516, 8.492156982421875, -112.19709777832031, 348.9779357910156, -52.282867431640625, 467.5465087890625, 182.139404296875, 138.43707275390625, 142.4501953125, 81.05892944335938, 71.16629028320312, 309.52850341796875, 23.299087524414062, 57.9635009765625, 40.73307800292969, 550.184326171875, -190.26812744140625, 69.7425537109375, 29.891244888305664, 501.413330078125, 177.2235107421875, 40.345664978027344, -161.8345184326172, 158.36505126953125, 325.0473937988281, -151.01718139648438, 326.04925537109375, -164.24002075195312, 16.71410369873047, 350.9739990234375, -99.86579895019531, 112.98442077636719, 49.3446044921875, 361.54522705078125, 285.9265441894531, -230.28294372558594, 142.0412139892578, -36.450286865234375, 294.1412353515625, 277.93634033203125, -1.08416748046875, 374.17218017578125, -77.3438491821289, 118.81716918945312, 311.946044921875, 144.11923217773438, -45.17675018310547, 60.514827728271484, 34.55172348022461, 151.3675537109375, 57.254493713378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000259.npy"}
{"epoch": 0.3803230543318649, "step": 260, "batch_size": 64, "mean": 162.99392700195312, "std": 183.4407196044922, "min": -182.16232299804688, "p10": -24.095032501220697, "median": 126.67322158813477, "p90": 401.55622558593757, "max": 990.910400390625, "pos_frac": 0.859375, "sample": [233.16192626953125, -41.620140075683594, 39.00043487548828, -104.31278991699219, 4.196615219116211, 159.58416748046875, 124.90055084228516, 182.35533142089844, 329.0870361328125, 56.937408447265625, 65.0716552734375, 458.53460693359375, 118.15135192871094, 105.09367370605469, 146.961669921875, 990.910400390625, 156.9473419189453, 238.83047485351562, 65.32989501953125, 446.0662841796875, -182.16232299804688, 155.2213134765625, -32.882476806640625, 174.8439483642578, -96.81925201416016, -26.84986114501953, 152.79237365722656, -17.667098999023438, 247.42013549804688, 128.44589233398438, 110.87196350097656, 366.89373779296875, -10.117683410644531, 215.71556091308594, 154.8955535888672, 382.47259521484375, 374.9098205566406, 143.68292236328125, 117.65632629394531, 87.98822021484375, 88.00086975097656, 68.45526123046875, 337.405029296875, 112.73693084716797, 146.79904174804688, 40.1453971862793, 44.31480407714844, 83.29345703125, 165.17779541015625, 207.09263610839844, 207.332763671875, 191.2808837890625, 120.77494049072266, 419.73974609375, 24.89978790283203, -27.48798370361328, 60.6295166015625, 409.73492431640625, 22.066329956054688, 598.269287109375, 111.93419647216797, 416.4234924316406, 55.63600158691406, 304.45684814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000260.npy"}
{"epoch": 0.38179148311306904, "step": 261, "batch_size": 64, "mean": 150.76947021484375, "std": 202.81057739257812, "min": -327.8468017578125, "p10": -51.86358985900879, "median": 123.24789428710938, "p90": 404.6301544189453, "max": 886.6013793945312, "pos_frac": 0.78125, "sample": [407.0530700683594, 360.1178894042969, 47.757362365722656, -225.8705291748047, 128.1289520263672, 886.6013793945312, 43.25523376464844, 348.62237548828125, -29.577625274658203, 212.31619262695312, 47.788726806640625, 70.1692886352539, -95.01483154296875, 409.83575439453125, 118.36683654785156, 211.19528198242188, 221.13687133789062, 367.5283508300781, 412.55133056640625, -24.127845764160156, 117.57432556152344, 83.66677856445312, 151.44183349609375, 169.36196899414062, 175.95272827148438, 320.99456787109375, 491.72869873046875, 28.621490478515625, 306.1730041503906, 200.38267517089844, 60.707611083984375, 40.39215850830078, -4.069206237792969, 89.11825561523438, 242.76321411132812, 58.043983459472656, 280.6630859375, 376.3714599609375, 245.6988067626953, 105.25348663330078, 231.61019897460938, -51.671871185302734, 454.0933532714844, 398.9766845703125, -12.694635391235352, 343.50286865234375, -199.45103454589844, 193.38546752929688, -10.320030212402344, 251.77984619140625, 111.02620697021484, 495.43310546875, 7.229957580566406, -327.8468017578125, -187.68634033203125, 114.2120361328125, -172.97509765625, -18.186254501342773, 137.04803466796875, 203.96885681152344, 59.356056213378906, 70.74376678466797, 150.98220825195312, -51.94575500488281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000261.npy"}
{"epoch": 0.3832599118942731, "step": 262, "batch_size": 64, "mean": 123.46087646484375, "std": 169.67532348632812, "min": -250.1248321533203, "p10": -77.14892272949217, "median": 124.75458908081055, "p90": 301.89546203613287, "max": 754.2808837890625, "pos_frac": 0.796875, "sample": [41.29656982421875, 404.8092956542969, 109.38134002685547, 216.30995178222656, -250.1248321533203, 309.5757751464844, 472.3333740234375, 103.18638610839844, 156.23785400390625, 247.75222778320312, -147.7748260498047, -244.66976928710938, 157.99200439453125, 754.2808837890625, 132.91531372070312, -169.47967529296875, 171.29730224609375, 283.9747314453125, -43.41266632080078, 182.58584594726562, 157.8315887451172, 58.789337158203125, -87.23338317871094, 259.9944152832031, 139.49229431152344, -17.015396118164062, 47.928096771240234, 426.9357604980469, 104.20474243164062, 310.7300109863281, 47.629722595214844, 85.67086029052734, 84.30682373046875, -57.95487594604492, 63.73695373535156, 73.2999038696289, 137.2725830078125, -30.574081420898438, 177.59494018554688, 144.48464965820312, 138.8951416015625, 151.33860778808594, 10.015281677246094, 227.1995849609375, 238.83033752441406, 88.6618881225586, 240.21817016601562, 116.59386444091797, -81.6473617553711, -66.6525650024414, 73.78303527832031, 71.83798217773438, 49.91471862792969, 340.8292541503906, -158.40570068359375, -33.72661590576172, 95.7470703125, 161.78411865234375, 263.870849609375, 258.8070983886719, 182.6749267578125, 8.352767944335938, 275.54791259765625, 231.43429565429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000262.npy"}
{"epoch": 0.38472834067547723, "step": 263, "batch_size": 64, "mean": 176.84181213378906, "std": 206.23117065429688, "min": -349.71905517578125, "p10": -55.26733779907226, "median": 160.72258758544922, "p90": 457.2254791259767, "max": 684.4710693359375, "pos_frac": 0.796875, "sample": [316.3098449707031, -349.71905517578125, 164.41107177734375, 121.79576110839844, 361.511962890625, 56.25176239013672, 334.2662353515625, 72.2086181640625, 25.077774047851562, 100.94927215576172, -238.63079833984375, 167.4268035888672, 82.83301544189453, 301.0321044921875, 302.180908203125, 173.9053192138672, 19.11893081665039, 362.6259765625, 96.47612762451172, 165.47434997558594, -3.777261734008789, -67.8909912109375, 50.86634063720703, 315.8717346191406, -27.153247833251953, -72.02581787109375, 278.1826171875, 157.0341033935547, -18.819175720214844, 98.64213562011719, 41.99028015136719, 26.07581901550293, 403.98199462890625, 39.45732879638672, -31.493927001953125, 509.45794677734375, 283.4762268066406, 100.1907958984375, 387.57171630859375, 468.22412109375, 101.11588287353516, 481.45928955078125, 69.86123657226562, 279.722412109375, 337.5019836425781, 165.1357879638672, 476.24505615234375, -57.44557189941406, 238.94134521484375, 276.7911682128906, 377.0596008300781, 431.5619812011719, 663.874267578125, -172.75303649902344, -14.834548950195312, 555.1986083984375, -50.184791564941406, -75.86817169189453, 223.29612731933594, 239.76812744140625, 153.6053009033203, 684.4710693359375, 139.9862518310547, 217.99801635742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000263.npy"}
{"epoch": 0.38619676945668135, "step": 264, "batch_size": 64, "mean": 151.50254821777344, "std": 204.70956420898438, "min": -239.03762817382812, "p10": -77.88779983520507, "median": 124.66874694824219, "p90": 455.6923309326174, "max": 650.49462890625, "pos_frac": 0.796875, "sample": [215.01123046875, 321.53704833984375, 47.395023345947266, 650.49462890625, 101.28860473632812, -230.7722625732422, -151.6864013671875, 401.2326965332031, 83.6036376953125, 480.7540283203125, -52.76593780517578, 45.87389373779297, 104.43505859375, -144.3209228515625, 158.46212768554688, 186.26339721679688, 268.585693359375, 410.1776123046875, 405.0624694824219, 362.36187744140625, 85.42730712890625, 237.31668090820312, 260.593505859375, 178.1504669189453, 62.43890380859375, 39.47206115722656, -78.4226303100586, 11.152633666992188, -5.369071960449219, 197.62245178222656, -76.63986206054688, 18.38666534423828, 57.449913024902344, 576.2228393554688, 3.3828125, 475.1986389160156, 135.0526123046875, 195.51187133789062, 14.762100219726562, 251.15638732910156, 53.8287353515625, 211.0301055908203, 120.21194458007812, 129.12554931640625, -2.3448104858398438, 623.80419921875, 370.2771911621094, -239.03762817382812, 232.27676391601562, -137.37863159179688, 191.81809997558594, 1.6734962463378906, 229.59170532226562, 485.68646240234375, -36.522857666015625, 138.3149871826172, 5.223884582519531, 65.90802001953125, -202.2793731689453, 493.7960510253906, 324.1158142089844, 279.65521240234375, -6.823581695556641, 62.35221862792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000264.npy"}
{"epoch": 0.3876651982378855, "step": 265, "batch_size": 64, "mean": 144.70460510253906, "std": 237.8543701171875, "min": -485.34771728515625, "p10": -83.21995239257811, "median": 135.7381134033203, "p90": 418.0658203125, "max": 811.109130859375, "pos_frac": 0.75, "sample": [138.04232788085938, 408.45428466796875, 351.78106689453125, 226.2429656982422, 84.40462493896484, 151.16201782226562, 344.9509582519531, 19.39008331298828, 626.327392578125, 81.46307373046875, 258.8924865722656, -305.9717102050781, 6.624622344970703, -34.34476089477539, -136.97640991210938, 50.8065071105957, 302.3857727050781, -127.08059692382812, 105.06334686279297, -43.98675537109375, 254.94049072265625, 811.109130859375, -485.34771728515625, 524.8056640625, -71.57296752929688, 197.85195922851562, 184.5784912109375, 460.3172912597656, -71.16677856445312, 761.2927856445312, -260.01416015625, 327.45703125, 62.53593444824219, -54.562557220458984, 70.88662719726562, -88.21151733398438, 7.7374114990234375, -44.732879638671875, 369.9913024902344, -65.42381286621094, 411.2496643066406, 105.91937255859375, 69.84708404541016, 404.9475402832031, -42.427818298339844, 182.45933532714844, 21.758834838867188, 166.4055938720703, -29.490921020507812, 277.1707763671875, 85.29865264892578, 133.9205322265625, 157.18557739257812, 420.9870300292969, 170.12564086914062, 137.55569458007812, 220.20912170410156, 156.51808166503906, 40.031856536865234, -318.07086181640625, 440.7052307128906, 96.94564819335938, 324.77825927734375, 226.96275329589844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000265.npy"}
{"epoch": 0.3891336270190896, "step": 266, "batch_size": 64, "mean": 118.16098022460938, "std": 192.6403350830078, "min": -273.2129211425781, "p10": -95.1062675476074, "median": 77.68882369995117, "p90": 399.7540557861329, "max": 661.5164794921875, "pos_frac": 0.71875, "sample": [-16.503982543945312, 530.0053100585938, 46.56946563720703, 119.66300201416016, 257.8480224609375, 126.90144348144531, -51.39253234863281, 277.86669921875, 75.48895263671875, 419.71661376953125, 126.10737609863281, 316.81878662109375, -45.41117858886719, 275.71575927734375, -101.88902282714844, 58.185997009277344, 292.7753601074219, 91.39407348632812, 202.8673858642578, 65.52422332763672, 135.02613830566406, -267.2952575683594, -58.681007385253906, 371.38214111328125, 171.2335205078125, 384.9444885253906, 157.15272521972656, 9.732154846191406, 47.884193420410156, 406.10101318359375, 465.793212890625, 277.73199462890625, 661.5164794921875, 447.439453125, 216.73654174804688, 71.60736083984375, 62.34751510620117, 234.60821533203125, 16.847145080566406, 90.56966400146484, 65.6600341796875, 150.99697875976562, 71.9382095336914, 27.915807723999023, 163.58505249023438, -2.027996063232422, -273.2129211425781, -245.1839599609375, 257.3680419921875, 16.67742347717285, -136.65719604492188, -79.27983856201172, -25.093063354492188, -104.87750244140625, -56.38232421875, -38.3885498046875, -30.081689834594727, -14.359550476074219, -144.75802612304688, 437.6679992675781, 216.65008544921875, 239.94839477539062, 13.379024505615234, 79.8886947631836], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000266.npy"}
{"epoch": 0.39060205580029367, "step": 267, "batch_size": 64, "mean": 145.4833526611328, "std": 206.0981903076172, "min": -513.745361328125, "p10": -43.87514648437499, "median": 121.05089950561523, "p90": 365.76297607421884, "max": 707.6538696289062, "pos_frac": 0.78125, "sample": [78.78534698486328, 187.10867309570312, 333.36865234375, -11.681720733642578, 86.9190673828125, -20.334135055541992, 273.916259765625, 192.84097290039062, 4.193145751953125, 41.14619827270508, -49.506500244140625, -30.735321044921875, -8.840789794921875, 375.1085510253906, 156.8802490234375, 121.0855941772461, -83.1478500366211, 246.02810668945312, 471.47698974609375, 707.6538696289062, 93.54380798339844, 79.91083526611328, 149.84043884277344, 15.09326171875, -104.46192932128906, 98.31991577148438, 79.0970458984375, 93.71704864501953, 316.5667724609375, 215.82542419433594, 522.2479858398438, 400.1219482421875, 680.628662109375, 66.26968383789062, -5.285373687744141, 273.6857604980469, 33.28498458862305, -62.498146057128906, 10.71969223022461, 193.24020385742188, 284.8956604003906, 17.988983154296875, -6.107206344604492, 611.0889892578125, -513.745361328125, -11.725486755371094, -374.3360595703125, 226.85348510742188, 191.384033203125, 121.01620483398438, 221.9241485595703, 234.7789306640625, 81.09912109375, 311.6982727050781, 106.33765411376953, 167.68801879882812, -50.56645202636719, 343.9566345214844, 76.65857696533203, 130.45098876953125, 322.2140197753906, 165.3987579345703, 285.1777038574219, 144.6710968017578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000267.npy"}
{"epoch": 0.3920704845814978, "step": 268, "batch_size": 64, "mean": 137.439208984375, "std": 203.34576416015625, "min": -324.086181640625, "p10": -61.38204803466797, "median": 125.63863754272461, "p90": 337.3724975585938, "max": 769.6926879882812, "pos_frac": 0.796875, "sample": [2.5926895141601562, 460.5184326171875, 93.06241607666016, -68.77501678466797, 39.71537780761719, 71.71936798095703, 172.53855895996094, 71.62054443359375, 342.60693359375, 101.14947509765625, 186.84869384765625, -292.13604736328125, 150.95407104492188, 43.389617919921875, 273.11480712890625, -196.3675537109375, 107.63457489013672, 43.30946350097656, -323.4454040527344, 133.3691864013672, 156.20635986328125, 769.6926879882812, 51.3604850769043, 159.24273681640625, 77.42549133300781, 258.49530029296875, 460.7711181640625, 500.953857421875, -29.610984802246094, 319.6583251953125, -96.51422882080078, 66.42544555664062, -57.485382080078125, -10.678211212158203, 577.3150024414062, 142.5422821044922, 200.12435913085938, 139.25450134277344, 253.07852172851562, 223.8469696044922, 140.4876251220703, 274.14404296875, 6.554023742675781, -63.05204772949219, 312.6588134765625, 30.32696533203125, 196.06556701660156, 301.19207763671875, 16.668319702148438, 266.3427429199219, 188.14407348632812, 111.52830505371094, 674.48828125, 200.2984619140625, 77.79449462890625, 70.92378234863281, 117.90808868408203, -4.609825134277344, 325.1588134765625, -17.659650802612305, 184.02407836914062, -324.086181640625, -28.249195098876953, 163.5326385498047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000268.npy"}
{"epoch": 0.3935389133627019, "step": 269, "batch_size": 64, "mean": 171.92185974121094, "std": 220.90115356445312, "min": -405.1007995605469, "p10": -82.7112937927246, "median": 174.76653289794922, "p90": 446.49113464355474, "max": 650.0057373046875, "pos_frac": 0.734375, "sample": [154.39817810058594, 317.8697509765625, 157.64697265625, 302.93890380859375, -84.92266082763672, 237.24026489257812, 320.7960205078125, 184.91944885253906, -77.55143737792969, -24.325523376464844, 294.518310546875, -15.313278198242188, 487.1415710449219, 35.190887451171875, 339.02777099609375, 270.38201904296875, 2.4937362670898438, 187.81666564941406, -105.91252136230469, -405.1007995605469, 296.0722961425781, 12.35971450805664, 240.88787841796875, -251.4925537109375, -112.63056945800781, 400.19866943359375, -23.21898651123047, 581.5926513671875, 516.7202758789062, 133.16995239257812, 164.61361694335938, 59.177978515625, 405.597900390625, 293.5154113769531, -19.952255249023438, 332.97662353515625, 425.7549743652344, 92.64800262451172, 313.02471923828125, 23.597412109375, 14.495429992675781, 144.21490478515625, -191.7859649658203, 45.420257568359375, 140.5106658935547, 570.0188598632812, 302.0730285644531, 489.70098876953125, 312.9444274902344, -22.894378662109375, 344.3959045410156, 258.4328918457031, -4.343841552734375, 650.0057373046875, -76.03488159179688, 449.7083435058594, -19.96820068359375, 44.11620330810547, -35.07307052612305, 250.12750244140625, 438.98431396484375, 408.0628967285156, -186.67764282226562, 212.696533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000269.npy"}
{"epoch": 0.39500734214390604, "step": 270, "batch_size": 64, "mean": 127.91445922851562, "std": 197.0740509033203, "min": -389.81378173828125, "p10": -72.47053680419921, "median": 110.54150390625, "p90": 375.9945983886719, "max": 683.304443359375, "pos_frac": 0.796875, "sample": [230.44097900390625, 333.7236328125, 479.51690673828125, -305.3886413574219, -389.81378173828125, -24.685287475585938, 55.350852966308594, 257.7352600097656, 99.7512435913086, 307.0756530761719, 378.8245849609375, -45.508331298828125, 121.3317642211914, 183.58689880371094, 26.063812255859375, 54.345924377441406, 80.80724334716797, 139.5728759765625, -147.02682495117188, 198.86758422851562, 279.9561462402344, 144.1031951904297, -29.100242614746094, 206.1239013671875, 444.12579345703125, 267.42669677734375, 15.897420883178711, 78.8833999633789, 208.68710327148438, 99.04840850830078, -235.48036193847656, 391.97314453125, 683.304443359375, -74.25367736816406, -138.86557006835938, 273.913330078125, 336.13916015625, 40.875762939453125, 81.69517517089844, 3.6075401306152344, 588.199462890625, -199.1700439453125, 187.38998413085938, 6.561748504638672, 146.14651489257812, 125.17784881591797, 51.524566650390625, 206.41000366210938, -68.30987548828125, -10.513774871826172, 206.24722290039062, 336.13726806640625, 91.66064453125, 23.43958854675293, 1.2839813232421875, 159.63442993164062, 14.789108276367188, 60.11283874511719, -31.93659210205078, 369.39129638671875, 152.96937561035156, 151.39797973632812, 90.44184875488281, 414.90716552734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000270.npy"}
{"epoch": 0.3964757709251101, "step": 271, "batch_size": 64, "mean": 155.0682830810547, "std": 175.69522094726562, "min": -124.81519317626953, "p10": -26.984109878540025, "median": 109.7655258178711, "p90": 408.9490203857423, "max": 678.5303344726562, "pos_frac": 0.796875, "sample": [678.5303344726562, 373.6170959472656, 96.78616333007812, -33.59342575073242, 216.43736267089844, -14.511981964111328, 382.7764587402344, 371.3409729003906, 36.59613800048828, -10.746406555175781, 320.2752685546875, -124.27667236328125, 420.16583251953125, -124.81519317626953, 83.81153106689453, -32.329307556152344, 75.78274536132812, -0.06322479248046875, 142.77162170410156, 2.584259033203125, -81.3681411743164, 7.7426910400390625, 463.552490234375, 337.41455078125, 460.3779296875, 367.25311279296875, 65.91368865966797, -41.04881286621094, 68.98885345458984, 116.93695831298828, -7.763240814208984, 227.1538543701172, 144.61199951171875, -8.192512512207031, -46.724395751953125, 130.73399353027344, 58.511810302734375, 22.95299530029297, 328.0540771484375, 109.77949523925781, 12.787605285644531, 496.5714111328125, 66.17765045166016, -5.496721267700195, 95.87828063964844, 35.66108322143555, 109.75155639648438, 122.74961853027344, 325.9971008300781, 194.46884155273438, 462.9224853515625, 263.0326232910156, 89.81372833251953, 25.734146118164062, 457.39959716796875, 22.697242736816406, 211.59481811523438, 344.052001953125, 29.73492431640625, 194.6819305419922, 135.64108276367188, 291.96649169921875, 150.92135620117188, 203.6096954345703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000271.npy"}
{"epoch": 0.39794419970631423, "step": 272, "batch_size": 64, "mean": 193.73599243164062, "std": 186.1200714111328, "min": -178.7349090576172, "p10": -45.670285034179685, "median": 197.97667694091797, "p90": 460.6904479980469, "max": 605.284912109375, "pos_frac": 0.8125, "sample": [288.03460693359375, 198.75244140625, -12.83824348449707, -107.35073852539062, -23.989028930664062, 349.46746826171875, 223.5023956298828, 476.9642333984375, 466.92755126953125, 281.76312255859375, 322.1748352050781, 353.3435974121094, -45.60487365722656, 159.850830078125, -164.04061889648438, 81.46409606933594, 97.06816864013672, 203.39964294433594, 161.81399536132812, 143.32070922851562, -66.48793029785156, 273.54656982421875, 312.5921325683594, 206.1044921875, 316.9849853515625, 92.77291107177734, 0.2643928527832031, 285.2188415527344, 123.0867691040039, 599.0555419921875, 48.24229431152344, 192.56295776367188, 203.9054718017578, 154.28628540039062, 68.86305236816406, 375.5572814941406, 485.1900939941406, -178.7349090576172, 214.78720092773438, 236.45458984375, 605.284912109375, 15.1973876953125, 496.41424560546875, 151.5072021484375, 281.1353759765625, -69.7238998413086, 287.83929443359375, 62.04023361206055, 219.23008728027344, 400.4822692871094, -45.69831848144531, 197.20091247558594, -11.147171020507812, -106.57167053222656, -41.43488311767578, 269.4427490234375, 304.6540832519531, 48.753868103027344, 422.66668701171875, 160.46876525878906, 183.11904907226562, 446.13720703125, 535.5469970703125, 188.28152465820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000272.npy"}
{"epoch": 0.39941262848751835, "step": 273, "batch_size": 64, "mean": 188.71389770507812, "std": 199.29367065429688, "min": -254.57606506347656, "p10": -52.05585632324218, "median": 173.64932250976562, "p90": 445.71768798828145, "max": 673.5010986328125, "pos_frac": 0.796875, "sample": [243.44012451171875, 290.5838623046875, 98.88140869140625, 155.36338806152344, 530.8624267578125, 471.6142578125, 173.58749389648438, -96.08808135986328, 659.5315551757812, -47.21870422363281, 380.3014831542969, 196.40383911132812, 2.17510986328125, 73.01439666748047, -1.5878276824951172, 159.5023193359375, -1.8426399230957031, 259.0570068359375, 187.73934936523438, 359.93084716796875, 233.1846466064453, 52.96697998046875, 70.29840087890625, 156.9845733642578, 98.63056945800781, 60.683326721191406, 344.712646484375, 182.9138946533203, 173.71115112304688, 182.45401000976562, 332.16607666015625, -10.969413757324219, 358.3202819824219, 319.39788818359375, -14.914865493774414, 606.2015380859375, 551.8880004882812, 234.08216857910156, 165.1299285888672, 282.46527099609375, -199.63357543945312, -105.43211364746094, 276.49334716796875, -90.29953002929688, 257.52984619140625, 290.3841552734375, 366.8726806640625, 55.25889587402344, 144.02377319335938, -254.57606506347656, 673.5010986328125, 162.22119140625, 95.10801696777344, -102.21316528320312, -18.02557373046875, 328.8329162597656, 53.97322082519531, 122.90982055664062, 277.0185241699219, -54.12892150878906, 349.8601379394531, 105.9997329711914, 401.9978942871094, 464.4547424316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000273.npy"}
{"epoch": 0.4008810572687225, "step": 274, "batch_size": 64, "mean": 144.90960693359375, "std": 188.8855438232422, "min": -217.79246520996094, "p10": -106.46806640624999, "median": 147.07884216308594, "p90": 430.7862121582033, "max": 566.89892578125, "pos_frac": 0.75, "sample": [90.17487335205078, 87.44947814941406, 35.025299072265625, -156.9467315673828, 215.18190002441406, 160.72323608398438, 214.14903259277344, -35.917335510253906, 116.39495086669922, -3.1107940673828125, -120.19412231445312, 445.97021484375, 340.83636474609375, 227.40859985351562, 285.5811462402344, 160.4265594482422, 33.15776824951172, 460.21307373046875, -217.79246520996094, 264.008056640625, -132.3209991455078, 148.16966247558594, -59.4010009765625, 74.59756469726562, 60.569297790527344, 262.03216552734375, 450.5819396972656, -100.80517578125, 98.79183197021484, 561.51171875, -39.0276985168457, 156.41036987304688, 313.72528076171875, 167.79122924804688, 2.1289615631103516, 190.3794708251953, 463.8938903808594, -10.798248291015625, 198.7481231689453, 92.98112487792969, 293.4228820800781, -2.6534271240234375, 145.98802185058594, 275.3815002441406, 390.54119873046875, -82.08513641357422, 122.32020568847656, 44.19480895996094, 187.02142333984375, 544.23388671875, 286.4640197753906, 59.22364807128906, 8.861358642578125, 152.57119750976562, -173.05239868164062, 311.298828125, -125.43890380859375, -108.89501953125, 566.89892578125, 203.38674926757812, 125.29098510742188, 395.35687255859375, -31.021392822265625, 182.20700073242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000274.npy"}
{"epoch": 0.4023494860499266, "step": 275, "batch_size": 64, "mean": 153.61099243164062, "std": 190.96343994140625, "min": -226.67861938476562, "p10": -76.83283538818358, "median": 151.7874298095703, "p90": 406.15877990722663, "max": 596.7031860351562, "pos_frac": 0.796875, "sample": [186.65272521972656, 55.74253845214844, 307.1148376464844, 12.236812591552734, 1.4790706634521484, -100.4480972290039, 386.8209533691406, 138.2969207763672, 495.7926940917969, -102.92012786865234, 263.0954284667969, 441.3367614746094, 548.6967163085938, 82.75682830810547, 175.72946166992188, 263.29046630859375, -17.531864166259766, 4.9331207275390625, 139.98504638671875, 190.86170959472656, -93.2625503540039, 228.97314453125, 454.6976013183594, -34.72167205810547, 55.475318908691406, -194.4312744140625, 275.93084716796875, 177.34426879882812, 169.61764526367188, 62.58092498779297, -79.64920806884766, 59.68260192871094, 337.14080810546875, 217.46005249023438, 3.967527389526367, 5.222259521484375, -219.7823486328125, -17.016876220703125, -25.5706844329834, 35.50761413574219, 77.23110961914062, 358.78240966796875, -226.67861938476562, 154.8441925048828, 247.47096252441406, 410.86199951171875, 208.75680541992188, 596.7031860351562, 202.97915649414062, 467.02008056640625, 395.1846008300781, -36.46776580810547, 224.91036987304688, 380.6585693359375, 47.47870635986328, 359.15673828125, 148.7306671142578, 355.8638610839844, -70.26129913330078, 200.55374145507812, 41.84516143798828, 305.2115173339844, 57.161376953125, 30.018341064453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000275.npy"}
{"epoch": 0.40381791483113066, "step": 276, "batch_size": 64, "mean": 153.95571899414062, "std": 207.44520568847656, "min": -262.9680480957031, "p10": -68.5997604370117, "median": 157.39041900634766, "p90": 429.074154663086, "max": 831.7674560546875, "pos_frac": 0.8125, "sample": [111.30170440673828, 831.7674560546875, 144.16847229003906, 449.91729736328125, 437.5475158691406, 13.124944686889648, -13.095535278320312, 538.4051513671875, 25.52357292175293, 158.74110412597656, 166.71131896972656, 19.34345245361328, -41.50958251953125, 255.81680297851562, 303.3663635253906, 253.63180541992188, 217.44894409179688, -262.9680480957031, 139.6118621826172, 1.8088417053222656, 38.281673431396484, 352.18841552734375, 194.4160614013672, 335.7856750488281, -76.7470703125, -225.87759399414062, -19.264179229736328, 81.61253356933594, 197.0264892578125, 342.7472839355469, -49.58937072753906, 190.4007568359375, 40.723899841308594, 167.36752319335938, 321.8873291015625, 278.6158447265625, -138.17495727539062, -206.68385314941406, 2.912332534790039, 7.584774017333984, -137.07846069335938, 12.708099365234375, 28.771907806396484, 458.5301818847656, 94.12317657470703, 161.7047119140625, 253.5865020751953, 218.66505432128906, 287.1382141113281, 34.544349670410156, 504.71630859375, -28.206192016601562, 162.19573974609375, 245.25527954101562, 35.11126708984375, 238.8011932373047, 234.2398223876953, 156.03973388671875, 375.9478759765625, 65.21197509765625, -176.07867431640625, 595.843505859375, 36.215240478515625, 409.302978515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000276.npy"}
{"epoch": 0.4052863436123348, "step": 277, "batch_size": 64, "mean": 158.27908325195312, "std": 207.5612030029297, "min": -299.20843505859375, "p10": -106.64529113769531, "median": 149.47364807128906, "p90": 442.4332489013672, "max": 564.89892578125, "pos_frac": 0.796875, "sample": [3.8423118591308594, 313.1016845703125, -110.49794006347656, 174.4444580078125, 35.14046859741211, 315.41754150390625, 375.201171875, 159.17010498046875, -97.65577697753906, 201.53094482421875, 172.87847900390625, -41.39203643798828, 364.0240478515625, -34.45966339111328, 220.7613067626953, 382.10565185546875, 60.078453063964844, 138.64382934570312, 205.37417602539062, 303.60809326171875, -111.0749740600586, 399.0570983886719, 37.58717346191406, 51.444679260253906, -219.3553466796875, -75.23912811279297, 523.0684204101562, 43.15435791015625, -18.292724609375, 167.62387084960938, -299.20843505859375, 527.0547485351562, 166.10964965820312, 298.20599365234375, -214.78073120117188, -186.36431884765625, 564.89892578125, 279.574462890625, 138.58596801757812, -36.25579833984375, 242.62451171875, 21.149200439453125, 537.4544677734375, 33.46525955200195, 12.059234619140625, 112.28714752197266, 227.57528686523438, 363.7116394042969, 436.9892578125, 499.54534912109375, -155.30056762695312, 293.00762939453125, 59.61304473876953, 92.6358871459961, 12.679611206054688, 3.4791603088378906, 279.43402099609375, 139.77719116210938, 444.7663879394531, 481.9825439453125, 104.95957946777344, 181.0447998046875, 105.58988189697266, 422.2189636230469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000277.npy"}
{"epoch": 0.4067547723935389, "step": 278, "batch_size": 64, "mean": 183.9108123779297, "std": 208.61668395996094, "min": -162.496826171875, "p10": -56.50035820007324, "median": 141.71035766601562, "p90": 518.2839721679687, "max": 675.9649047851562, "pos_frac": 0.796875, "sample": [-94.4345703125, 354.9749450683594, -73.77880096435547, 30.69534683227539, 322.2574157714844, 332.61541748046875, 234.80618286132812, 68.82920837402344, 164.37179565429688, 108.0872573852539, 292.3514709472656, 175.22763061523438, 30.637136459350586, -58.12213134765625, 123.09344482421875, 245.89027404785156, -5.669708251953125, 337.4587097167969, 516.046142578125, 10.778678894042969, 0.2273693084716797, 605.597412109375, 401.3319091796875, 201.04702758789062, 279.7080383300781, 308.87042236328125, 132.16781616210938, -25.160133361816406, 181.32223510742188, 533.7365112304688, 493.8556213378906, -70.08372497558594, 111.64370727539062, 21.4351806640625, 587.2288208007812, 519.2430419921875, 42.375465393066406, -42.95098876953125, 151.25289916992188, 196.99603271484375, 98.52689361572266, 431.512939453125, 184.13119506835938, 62.37419128417969, 39.158599853515625, 546.5181884765625, 122.10623931884766, 236.32350158691406, -60.349552154541016, 610.3607177734375, 193.3105926513672, 120.88705444335938, 14.745147705078125, 30.00586700439453, 346.2662048339844, 302.1500244140625, -39.16248321533203, -26.68428611755371, 675.9649047851562, -162.496826171875, 367.58428955078125, 116.85623168945312, -133.04190063476562, -52.71622085571289], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000278.npy"}
{"epoch": 0.40822320117474303, "step": 279, "batch_size": 64, "mean": 180.64385986328125, "std": 241.22213745117188, "min": -390.16357421875, "p10": -47.90715713500976, "median": 143.60595703125, "p90": 502.3202117919922, "max": 917.455322265625, "pos_frac": 0.78125, "sample": [73.35765838623047, 59.62189483642578, 53.2892951965332, 35.929107666015625, -38.37823486328125, -49.98876190185547, 398.4627685546875, 509.8562927246094, -34.12415313720703, -42.620540618896484, 23.37981414794922, 245.95555114746094, 161.896484375, 33.90765380859375, 506.6279602050781, 649.5421142578125, 54.50519943237305, 177.91571044921875, 270.13555908203125, 295.92047119140625, 704.46533203125, 95.01250457763672, 32.50574493408203, -274.259521484375, 41.059722900390625, 83.26604461669922, -15.387664794921875, 205.48373413085938, 86.76248168945312, 579.8175048828125, 118.1973876953125, 41.48204803466797, 235.36068725585938, -32.53905487060547, 262.73004150390625, 468.7352600097656, 250.50064086914062, 917.455322265625, 492.268798828125, 300.50579833984375, -27.694931030273438, -43.050079345703125, -171.6376495361328, -390.16357421875, 152.1464080810547, 465.5022888183594, 95.1434326171875, 267.4342956542969, 363.18756103515625, 135.0655059814453, 398.20806884765625, 319.35479736328125, 80.13626861572266, -117.98391723632812, 506.9062194824219, 208.67837524414062, 424.25115966796875, 278.6318359375, -106.37047576904297, -188.62574768066406, 381.63604736328125, 282.64581298828125, 2.886821746826172, 266.3047180175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000279.npy"}
{"epoch": 0.40969162995594716, "step": 280, "batch_size": 64, "mean": 186.25331115722656, "std": 195.02511596679688, "min": -303.6202087402344, "p10": -65.63065948486327, "median": 174.75436401367188, "p90": 440.5317626953125, "max": 573.4691162109375, "pos_frac": 0.796875, "sample": [133.70372009277344, 313.12774658203125, 121.73624420166016, 237.7592315673828, 32.13887023925781, -43.192420959472656, 377.6420593261719, 489.42547607421875, 499.22283935546875, 36.704620361328125, 219.39573669433594, -22.226524353027344, -86.36180114746094, 373.6611328125, 402.3571472167969, 389.860595703125, 130.7431640625, 272.145263671875, 169.81781005859375, 279.5368957519531, 104.19741821289062, 535.5711059570312, 438.05621337890625, 196.52554321289062, 261.1910095214844, 100.89755249023438, 357.18115234375, 345.24591064453125, -1.64263916015625, 236.83197021484375, 184.94546508789062, 152.27911376953125, 560.4501953125, 156.29891967773438, -52.6446533203125, 151.64476013183594, -176.68109130859375, 388.3701477050781, 217.7863006591797, 83.43470001220703, 219.35153198242188, 112.47473907470703, -103.61857604980469, 573.4691162109375, -71.19609069824219, -303.6202087402344, 139.90261840820312, 503.9786682128906, 179.69091796875, 130.90176391601562, 194.49510192871094, 441.59271240234375, -44.87068176269531, 153.18202209472656, -28.115936279296875, -79.3259506225586, 222.52825927734375, 131.5486297607422, 114.56863403320312, 351.2685546875, 318.4876708984375, -180.7540283203125, 13.064346313476562, 364.07110595703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000280.npy"}
{"epoch": 0.4111600587371512, "step": 281, "batch_size": 64, "mean": 169.2798614501953, "std": 192.94300842285156, "min": -600.8914794921875, "p10": -30.411274337768536, "median": 153.67955780029297, "p90": 408.5805084228516, "max": 563.1823120117188, "pos_frac": 0.84375, "sample": [136.01312255859375, 311.94091796875, 148.25164794921875, 232.33143615722656, 48.85782241821289, 185.37188720703125, 60.07818603515625, 210.10952758789062, 203.0427703857422, 54.230445861816406, 239.046142578125, 435.9656677246094, 281.6234436035156, 81.13184356689453, 244.5526885986328, -99.12237548828125, 197.13943481445312, 563.1823120117188, 55.857940673828125, 159.1074676513672, -12.085479736328125, 398.81884765625, 293.766845703125, 552.8014526367188, 223.65310668945312, 268.0579528808594, 191.96371459960938, 268.805908203125, -38.26518630981445, 543.38623046875, 363.2693176269531, 110.24288940429688, 189.47030639648438, 399.9371337890625, 200.0406494140625, -600.8914794921875, 65.05552673339844, 93.25663757324219, 94.07015228271484, 50.601654052734375, 492.08843994140625, 20.142379760742188, -3.7266693115234375, 316.43377685546875, 140.64117431640625, 248.2052001953125, 49.02607727050781, 127.13447570800781, 506.19232177734375, 87.12952423095703, -83.13973236083984, 137.38095092773438, -136.42576599121094, 90.21963500976562, -59.742950439453125, -2.4308547973632812, -58.09336853027344, 400.3551025390625, 235.63987731933594, 65.5416488647461, 137.02597045898438, 299.85589599609375, 7.683897018432617, 412.1056823730469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000281.npy"}
{"epoch": 0.41262848751835535, "step": 282, "batch_size": 64, "mean": 129.77859497070312, "std": 213.11900329589844, "min": -215.1263427734375, "p10": -111.02870483398436, "median": 113.74681091308594, "p90": 351.6037017822266, "max": 1146.318603515625, "pos_frac": 0.75, "sample": [432.5049743652344, 147.73851013183594, 72.85404968261719, -209.92404174804688, 185.62303161621094, 270.8675231933594, 84.6458740234375, 41.74650573730469, 148.4884033203125, 356.0662841796875, 268.3674621582031, 170.097900390625, 316.6797790527344, 87.41155242919922, -26.092147827148438, 240.26397705078125, 1146.318603515625, 162.69595336914062, 224.374755859375, 116.89459228515625, 149.52410888671875, 207.90505981445312, 28.64260482788086, -134.2603759765625, 571.288818359375, 378.754150390625, 96.38145446777344, 159.37782287597656, 157.01333618164062, 520.6638793945312, 311.58056640625, 194.9647979736328, 304.27734375, -18.012863159179688, 12.804973602294922, 122.51547241210938, -117.06938171386719, -105.33992767333984, 65.61767578125, -113.35403442382812, -45.60577392578125, 42.451812744140625, 138.85691833496094, 131.09756469726562, 219.21205139160156, -14.632179260253906, -105.60293579101562, 386.1100158691406, 206.36822509765625, 110.59902954101562, -52.822364807128906, -59.26106262207031, 26.030807495117188, 51.44894790649414, 97.05641174316406, -175.47344970703125, 286.9488525390625, 5.39137077331543, -120.8541488647461, 341.1910095214844, -215.1263427734375, 36.31339645385742, -55.94261932373047, 41.17564392089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000282.npy"}
{"epoch": 0.41409691629955947, "step": 283, "batch_size": 64, "mean": 190.0657196044922, "std": 202.4391326904297, "min": -438.3199462890625, "p10": -40.5438907623291, "median": 180.05704498291016, "p90": 470.91626586914066, "max": 629.000732421875, "pos_frac": 0.828125, "sample": [-108.00448608398438, -71.07347869873047, 212.9485321044922, 34.304283142089844, 310.82470703125, 321.46954345703125, -38.947509765625, 257.1757507324219, 371.6214904785156, 71.8259048461914, 67.74840545654297, 155.8154754638672, -41.22805404663086, 91.70497131347656, 110.55431365966797, 173.6273193359375, -438.3199462890625, 45.78580856323242, 309.124267578125, -61.581336975097656, 494.8941345214844, 446.9156188964844, 176.8542938232422, 305.10528564453125, 26.809154510498047, 134.65765380859375, -57.93333435058594, 467.480224609375, 362.43865966796875, 510.4625244140625, 43.07708740234375, 293.74603271484375, -129.2599334716797, 200.1275634765625, 472.38885498046875, 45.42268371582031, 58.985565185546875, -25.505603790283203, 89.35026550292969, -28.548952102661133, 480.35791015625, 207.0680694580078, 314.163818359375, 205.8447265625, 282.3589172363281, 183.25979614257812, 92.90351104736328, 191.40478515625, 426.9664306640625, 269.7940673828125, 579.9229736328125, 31.948387145996094, 377.50665283203125, 153.57211303710938, 66.21778106689453, 305.7341613769531, 565.5855712890625, 213.26954650878906, 312.45245361328125, 629.000732421875, 435.174560546875, 108.76532745361328, 102.0114517211914, -33.92152404785156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000283.npy"}
{"epoch": 0.4155653450807636, "step": 284, "batch_size": 64, "mean": 184.56295776367188, "std": 199.60781860351562, "min": -205.73837280273438, "p10": -33.03383140563963, "median": 163.32579040527344, "p90": 484.10634460449245, "max": 727.7727661132812, "pos_frac": 0.859375, "sample": [-89.77310180664062, 135.98825073242188, 325.4407958984375, 38.6646728515625, 121.98090362548828, 72.34381866455078, 370.97210693359375, 17.62731170654297, 268.4465637207031, 422.56494140625, 367.087158203125, -38.595890045166016, 510.4812316894531, 3.3359012603759766, 279.61968994140625, 20.283679962158203, 41.84593963623047, 241.54141235351562, 282.39434814453125, 517.6112060546875, 56.7120361328125, 668.6159057617188, 170.04049682617188, 289.3481140136719, 158.76358032226562, -11.261886596679688, 168.31907653808594, 84.334228515625, 267.400390625, -20.055694580078125, 22.875457763671875, 727.7727661132812, 173.82762145996094, -45.756065368652344, 152.7130126953125, 100.15179443359375, -79.4003677368164, 79.8955307006836, 228.69467163085938, 725.6181640625, 76.74479675292969, 200.46817016601562, 192.44638061523438, 72.08871459960938, -86.68118286132812, -205.73837280273438, 271.3328857421875, 312.53277587890625, 123.27207946777344, 266.5393371582031, 527.6796264648438, 167.88800048828125, 313.4736633300781, 205.79588317871094, 51.05769348144531, 15.105335235595703, -51.34709930419922, 8.678642272949219, 355.08148193359375, 245.4727325439453, 38.185829162597656, 513.08935546875, 22.20610809326172, 348.1865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000284.npy"}
{"epoch": 0.4170337738619677, "step": 285, "batch_size": 64, "mean": 165.91085815429688, "std": 198.38706970214844, "min": -507.76763916015625, "p10": -55.93999404907223, "median": 156.27227783203125, "p90": 387.75461425781253, "max": 598.307861328125, "pos_frac": 0.859375, "sample": [377.88916015625, 260.1072082519531, 45.69364929199219, 391.982666015625, 135.0843505859375, 122.6170654296875, 177.85208129882812, 59.285987854003906, 147.28379821777344, 598.307861328125, 279.2724609375, 238.3492889404297, 320.1449890136719, 146.4853515625, 161.6254425048828, 575.1755981445312, 26.55975341796875, 120.18142700195312, 146.40643310546875, 247.24658203125, 451.0813903808594, -228.67465209960938, 89.33296966552734, 313.169189453125, 429.5978698730469, 10.274604797363281, -69.21014404296875, 86.0194091796875, 53.97393798828125, 89.6108169555664, 256.60003662109375, 150.9191131591797, -131.62173461914062, 22.23232650756836, 272.5209045410156, 318.5157470703125, 44.009925842285156, -507.76763916015625, 267.1863098144531, 48.652740478515625, 167.98800659179688, 16.286865234375, 589.52392578125, 271.8099670410156, -24.97631072998047, 321.95086669921875, 299.87701416015625, 136.8016815185547, 287.24945068359375, 353.13006591796875, 175.48858642578125, -199.5603790283203, 296.5445861816406, 225.30465698242188, -191.17103576660156, -8.284061431884766, 435.0147705078125, 95.44216918945312, 98.5956039428711, 163.3798065185547, -152.26544189453125, 353.845458984375, 218.55734252929688, 143.7869873046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000285.npy"}
{"epoch": 0.4185022026431718, "step": 286, "batch_size": 64, "mean": 180.1011199951172, "std": 218.9644012451172, "min": -343.2037658691406, "p10": -76.42151412963867, "median": 172.2771759033203, "p90": 488.15527343750006, "max": 647.7030029296875, "pos_frac": 0.78125, "sample": [61.30107879638672, -62.124107360839844, 499.9543762207031, 126.77288818359375, 171.53237915039062, 170.95724487304688, 55.53026580810547, 260.5977783203125, 347.49493408203125, -343.2037658691406, 286.9820251464844, 242.16983032226562, 44.75285339355469, -72.70439910888672, 466.5471496582031, 177.52517700195312, 543.50146484375, -30.54327392578125, 173.02197265625, 175.18019104003906, 491.61468505859375, 66.82095336914062, -304.05462646484375, 480.41015625, 309.38909912109375, -55.95794677734375, -66.21580505371094, 317.50201416015625, 331.2931213378906, 550.7698364257812, 431.19232177734375, -173.33578491210938, 175.77464294433594, 92.40890502929688, 23.234359741210938, 87.02296447753906, 377.88116455078125, 146.9044952392578, 5.909088134765625, -76.77836608886719, 197.27561950683594, -75.58885955810547, 244.49227905273438, -89.85787200927734, 99.15042114257812, 439.23529052734375, 476.61578369140625, 156.53025817871094, -114.7200927734375, 574.318603515625, -42.207374572753906, 233.7425537109375, 67.18051147460938, 491.474609375, 148.06707763671875, 308.1260681152344, 115.72213745117188, 242.597412109375, 288.1621398925781, -90.13241577148438, 647.7030029296875, 414.5998840332031, 179.35162353515625, 107.6015396118164], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000286.npy"}
{"epoch": 0.4199706314243759, "step": 287, "batch_size": 64, "mean": 145.67831420898438, "std": 192.9538116455078, "min": -273.5018005371094, "p10": -71.45898361206055, "median": 118.87469482421875, "p90": 420.3535522460938, "max": 656.54345703125, "pos_frac": 0.8125, "sample": [124.89500427246094, 53.889251708984375, 3.120443344116211, 75.20500946044922, 144.7632293701172, -62.059417724609375, 299.962158203125, 561.2273559570312, 179.9551239013672, 138.21109008789062, 551.124755859375, 99.92039489746094, 211.0780487060547, 516.6195068359375, -84.5853271484375, -72.30055236816406, 24.82067108154297, 431.79150390625, 118.91778564453125, 334.61126708984375, 212.7745819091797, 6.650808334350586, 147.4498291015625, -273.5018005371094, 656.54345703125, 81.25940704345703, 53.91693878173828, -16.40118408203125, -61.07408142089844, 51.320640563964844, 163.83367919921875, -48.409584045410156, -158.73443603515625, 150.71315002441406, 76.26853942871094, 234.45594787597656, 66.64602661132812, 255.77479553222656, 113.9090576171875, 286.52197265625, 10.307065963745117, 260.12249755859375, 211.9720458984375, 84.50462341308594, 26.923416137695312, 118.83160400390625, 112.1649169921875, 44.21421813964844, 353.32757568359375, 370.626953125, -158.53219604492188, 67.41545867919922, 216.3985137939453, 53.044639587402344, 414.5347595214844, -200.72915649414062, -146.37464904785156, -69.49532318115234, 203.2760009765625, 343.1878356933594, 459.72320556640625, 422.8473205566406, 203.2984161376953, 270.737548828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000287.npy"}
{"epoch": 0.42143906020558003, "step": 288, "batch_size": 64, "mean": 188.2078399658203, "std": 203.32125854492188, "min": -329.3408508300781, "p10": 4.218954086303712, "median": 173.44625091552734, "p90": 413.9374969482422, "max": 720.7265625, "pos_frac": 0.90625, "sample": [171.5308837890625, 173.85427856445312, 3.7950477600097656, 196.10305786132812, 329.119873046875, 406.60101318359375, 24.248624801635742, 720.7265625, 41.27545166015625, 372.631103515625, 267.5583801269531, 170.31173706054688, -105.94165802001953, 16.721389770507812, 101.37667846679688, 173.03822326660156, 130.37416076660156, 412.1664123535156, 252.135009765625, 339.1766052246094, 193.9298095703125, 269.1303405761719, 298.215087890625, 5.20806884765625, 83.61341857910156, 177.85916137695312, 171.177734375, 7.523279190063477, 182.57989501953125, 63.43382263183594, 199.80841064453125, 221.88226318359375, 617.8651733398438, 166.45053100585938, 414.696533203125, 346.3114013671875, 193.18418884277344, 10.87286376953125, 200.17681884765625, 100.61780548095703, -329.3408508300781, 607.47412109375, 109.4442138671875, 284.71923828125, 72.71824645996094, 167.02493286132812, 54.98419952392578, 33.74981689453125, 520.8206176757812, 120.07915496826172, 256.48272705078125, 596.4164428710938, 223.46337890625, -312.52215576171875, 74.43698120117188, 540.1510009765625, 315.0740051269531, 69.20518493652344, 53.359100341796875, 396.33563232421875, -9.723670959472656, 346.863525390625, -136.63218688964844, -130.62094116210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000288.npy"}
{"epoch": 0.42290748898678415, "step": 289, "batch_size": 64, "mean": 207.6798095703125, "std": 227.2295379638672, "min": -207.57064819335938, "p10": -68.68418655395507, "median": 160.5833282470703, "p90": 545.8766662597657, "max": 698.9129028320312, "pos_frac": 0.828125, "sample": [159.03317260742188, -48.003082275390625, 561.4141845703125, 643.187255859375, -165.13235473632812, 568.830810546875, 395.54388427734375, 291.24664306640625, 54.803955078125, 549.379638671875, 527.953125, 423.9447021484375, 280.2190856933594, 537.7030639648438, 360.02099609375, 334.61962890625, 26.494308471679688, 698.9129028320312, -70.45854949951172, 249.61917114257812, 104.42886352539062, 25.494260787963867, 119.00201416015625, -64.54400634765625, -111.9794921875, 54.075679779052734, 315.4248046875, 425.7894592285156, 65.91627502441406, 131.35813903808594, 417.2029724121094, 639.2325439453125, 355.4263610839844, -49.35988998413086, 67.6381607055664, 65.83616638183594, 598.935546875, 93.09999084472656, -109.839599609375, 16.117055892944336, 102.22465515136719, 476.00555419921875, -73.71487426757812, 263.9169921875, 204.3079071044922, 409.9808349609375, 403.83502197265625, -19.216917037963867, 137.15980529785156, -99.72305297851562, 161.08502197265625, 216.47979736328125, 73.67779541015625, 37.36973571777344, 529.6629638671875, 190.2410888671875, 23.227712631225586, 31.99431610107422, 160.08163452148438, 180.75439453125, 233.85214233398438, -207.57064819335938, 261.2351379394531, 56.053733825683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000289.npy"}
{"epoch": 0.4243759177679883, "step": 290, "batch_size": 64, "mean": 212.3250274658203, "std": 258.9597473144531, "min": -316.6517028808594, "p10": -65.00979995727539, "median": 206.65811157226562, "p90": 601.6361145019532, "max": 906.8594970703125, "pos_frac": 0.8125, "sample": [538.8394775390625, 198.92764282226562, 120.63465118408203, 162.58645629882812, -316.6517028808594, 399.7316589355469, 448.2889404296875, -66.86343383789062, -11.385086059570312, 137.45237731933594, -237.067626953125, 393.7013854980469, 227.76123046875, 27.062530517578125, 255.10089111328125, 214.38858032226562, 512.2964477539062, -22.8223876953125, 248.00991821289062, 11.89310073852539, 59.57954406738281, 50.53898620605469, -173.953857421875, 119.0599365234375, 143.83731079101562, -279.78741455078125, 127.20806884765625, 288.7187194824219, 906.8594970703125, 728.7366943359375, 85.15177917480469, 228.7117156982422, 302.4664306640625, 671.9439697265625, 234.22625732421875, 44.85289001464844, 261.2795715332031, 118.80122375488281, 278.4225769042969, 176.5274658203125, 14.74456787109375, 219.0373077392578, 214.66575622558594, 354.1407470703125, 52.2628173828125, 286.74774169921875, -60.684654235839844, -56.555633544921875, 341.50506591796875, -107.61264038085938, 680.6522216796875, 448.7901611328125, 277.1404113769531, 627.3924560546875, 16.767597198486328, 612.7105712890625, 575.7957153320312, 339.0545654296875, -21.443042755126953, 79.90365600585938, 75.70889282226562, 830.559814453125, 251.88250732421875, -79.43107604980469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000290.npy"}
{"epoch": 0.42584434654919234, "step": 291, "batch_size": 64, "mean": 187.67138671875, "std": 224.53297424316406, "min": -446.4739074707031, "p10": -59.19000701904295, "median": 202.54363250732422, "p90": 450.9234619140625, "max": 641.05810546875, "pos_frac": 0.765625, "sample": [198.53221130371094, 206.5550537109375, -15.139724731445312, 37.93921661376953, -8.696395874023438, 294.9317626953125, 641.05810546875, 293.8476867675781, 451.33892822265625, 271.73785400390625, 51.308929443359375, 449.95404052734375, 312.0831604003906, -79.76158905029297, 609.0325317382812, -25.627777099609375, 420.3848876953125, 248.47203063964844, -44.75598907470703, 241.1135711669922, 449.2139892578125, -9.882095336914062, 273.4335632324219, 88.48477172851562, 265.2672119140625, 413.28033447265625, 126.84300994873047, 184.2611541748047, 396.48699951171875, 38.27895736694336, 305.6745300292969, 70.70706939697266, 342.05755615234375, -194.52857971191406, 25.03607177734375, -202.8648681640625, -28.798362731933594, 63.42876434326172, -446.4739074707031, 567.3443603515625, 330.59820556640625, 472.7492980957031, 414.8936462402344, 241.64877319335938, -160.0119171142578, -43.12239074707031, -65.37601470947266, 359.14044189453125, 69.56451416015625, 169.02694702148438, 105.76246643066406, 169.7259521484375, 337.34423828125, 307.6719665527344, 0.929595947265625, 242.75619506835938, 415.2197570800781, 79.96952819824219, 541.798828125, -40.801368713378906, -150.73291015625, 401.49517822265625, 526.99755859375, 2.160938262939453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000291.npy"}
{"epoch": 0.42731277533039647, "step": 292, "batch_size": 64, "mean": 200.83868408203125, "std": 250.30386352539062, "min": -627.4581909179688, "p10": -49.13671150207519, "median": 194.74374389648438, "p90": 530.599642944336, "max": 776.3243408203125, "pos_frac": 0.796875, "sample": [226.65380859375, 61.22708511352539, 6.911102294921875, 202.59262084960938, 280.1916809082031, 156.67141723632812, 129.58148193359375, 121.87471771240234, 191.99349975585938, -627.4581909179688, 731.957763671875, 68.97737884521484, 590.400146484375, 121.64183044433594, 349.7788391113281, -52.054443359375, -237.2799072265625, 429.3392639160156, 69.00772857666016, 318.2546081542969, -9.221122741699219, 734.6650390625, 415.2121887207031, 216.39797973632812, 451.1550598144531, 551.1634521484375, 254.003173828125, 436.531982421875, 149.19174194335938, 133.69540405273438, -123.978515625, 297.824951171875, 9.250873565673828, 483.609130859375, -12.189735412597656, 120.81475067138672, -72.2608642578125, 179.51844787597656, 118.01179504394531, 125.39317321777344, 39.09257507324219, 541.10498046875, 232.828125, 200.06712341308594, 212.53067016601562, 197.49398803710938, 449.9974060058594, 344.3947448730469, 776.3243408203125, -11.671785354614258, 158.39437866210938, 285.9985656738281, 237.16725158691406, -219.3016815185547, 246.7356414794922, -141.97015380859375, 710.1710815429688, -33.02132797241211, -42.328670501708984, 506.0871887207031, 240.12635803222656, 223.55157470703125, 112.48915100097656, -11.636537551879883], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000292.npy"}
{"epoch": 0.4287812041116006, "step": 293, "batch_size": 64, "mean": 154.96490478515625, "std": 243.51544189453125, "min": -295.077880859375, "p10": -135.36778564453124, "median": 115.64213180541992, "p90": 451.2542266845703, "max": 909.344970703125, "pos_frac": 0.71875, "sample": [39.31490707397461, 60.67474365234375, 29.145605087280273, 55.09419250488281, 239.87307739257812, 209.7352752685547, 481.0083923339844, 145.9055633544922, 449.2423095703125, 47.022605895996094, 451.3182678222656, -68.20939636230469, 236.45925903320312, 13.57095718383789, 909.344970703125, -267.60015869140625, 311.6972961425781, 197.9483642578125, 42.045501708984375, 282.4296875, 362.2346496582031, -160.28936767578125, 237.42385864257812, -75.98391723632812, 43.768409729003906, 38.96038055419922, -72.06915283203125, 134.16380310058594, -14.691360473632812, 511.21649169921875, 451.10479736328125, 331.6040954589844, 328.3575439453125, 417.8521423339844, 296.40277099609375, -144.1178741455078, 45.86949157714844, 163.58074951171875, 180.60107421875, -7.221151351928711, -114.82439422607422, 88.2726821899414, -8.886962890625, 297.5903625488281, 351.9783935546875, 366.881103515625, 220.2375030517578, 336.65704345703125, 91.10050964355469, -243.01644897460938, -22.371856689453125, -295.077880859375, -157.7848358154297, 468.61285400390625, 836.0484619140625, 97.1204605102539, -138.50967407226562, 149.2635955810547, 62.19968032836914, -101.69328308105469, -128.03671264648438, 360.2841491699219, 518.0690307617188, -51.149681091308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000293.npy"}
{"epoch": 0.4302496328928047, "step": 294, "batch_size": 64, "mean": 172.348388671875, "std": 235.93817138671875, "min": -309.9860534667969, "p10": -110.36974792480461, "median": 105.10205078125, "p90": 497.7941772460938, "max": 862.931396484375, "pos_frac": 0.875, "sample": [46.23468780517578, 45.199737548828125, 312.35693359375, 417.0404052734375, 90.1424560546875, 26.323253631591797, -164.45046997070312, 140.40380859375, 4.941802978515625, 69.91675567626953, 19.685317993164062, 40.799285888671875, -45.74050521850586, 536.8935546875, 64.29549407958984, 272.2579650878906, 211.51878356933594, 422.0863952636719, 49.76716995239258, 171.52955627441406, 678.0009155273438, -175.01234436035156, 291.1832275390625, 102.70457458496094, 643.7473754882812, 485.2727966308594, 294.1457824707031, 161.42002868652344, 187.18093872070312, -138.0679931640625, 246.98092651367188, 291.1806335449219, 307.19049072265625, 862.931396484375, -217.348876953125, -165.48123168945312, 643.3287353515625, 172.69561767578125, 426.1440124511719, 19.880592346191406, -309.9860534667969, -178.42669677734375, 224.54132080078125, 303.0498962402344, 587.7620239257812, 107.49952697753906, 15.32602310180664, 165.83843994140625, 492.78839111328125, 7.024126052856445, 320.3539123535156, 342.0048522949219, 41.494651794433594, 147.9695281982422, 46.9458122253418, 43.391510009765625, 58.66714096069336, 39.68433380126953, 17.298587799072266, 69.08088684082031, 499.93951416015625, 77.82913208007812, 1.309722900390625, 59.63050079345703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000294.npy"}
{"epoch": 0.43171806167400884, "step": 295, "batch_size": 64, "mean": 191.00411987304688, "std": 254.74459838867188, "min": -323.26336669921875, "p10": -137.48369522094725, "median": 191.28479766845703, "p90": 527.3473999023438, "max": 927.6494140625, "pos_frac": 0.78125, "sample": [-252.50808715820312, -2.436918258666992, 619.3445434570312, 12.830646514892578, -323.26336669921875, 205.30718994140625, -186.55323791503906, 329.8701477050781, 306.4640808105469, -244.08529663085938, 333.08477783203125, -19.975296020507812, 159.73983764648438, 397.84136962890625, 398.7914733886719, 264.5549011230469, 257.15374755859375, 239.00009155273438, -100.91382598876953, 77.79509735107422, 318.51239013671875, -28.89790916442871, 207.92901611328125, 390.7030944824219, 28.757667541503906, 162.22744750976562, 533.4234619140625, -146.6759490966797, 733.88037109375, 190.14627075195312, 412.6797180175781, 49.65220642089844, 3.1736717224121094, -173.77536010742188, 441.533203125, 571.427734375, 71.56145477294922, -116.03510284423828, -254.3517608642578, 79.43370056152344, 243.04934692382812, 375.49285888671875, 374.64947509765625, 107.66154479980469, -51.31456756591797, 62.9964599609375, 238.67884826660156, 90.5022964477539, 210.13299560546875, 566.211669921875, 325.28521728515625, 314.30657958984375, 5.116668701171875, 927.6494140625, 513.169921875, 469.6180725097656, 155.34844970703125, 192.42332458496094, 205.66542053222656, 178.7318878173828, 31.41974449157715, 115.62374877929688, 651.906982421875, -27.410911560058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000295.npy"}
{"epoch": 0.4331864904552129, "step": 296, "batch_size": 64, "mean": 131.2469024658203, "std": 275.9085388183594, "min": -720.2102661132812, "p10": -221.3554901123047, "median": 138.17303466796875, "p90": 509.07607421875, "max": 788.1875610351562, "pos_frac": 0.734375, "sample": [130.79034423828125, 69.75684356689453, 185.21868896484375, 782.8721923828125, -154.6099853515625, 571.685302734375, -257.51495361328125, -73.68751525878906, -54.65033721923828, -720.2102661132812, 90.02542114257812, 204.326416015625, -372.153076171875, 214.23211669921875, 156.66819763183594, -248.81210327148438, 154.58538818359375, 337.94598388671875, 624.3265380859375, 393.26300048828125, 203.885498046875, 256.7511901855469, 44.80290985107422, -79.42132568359375, 97.87103271484375, 360.3409423828125, 60.13629150390625, -221.0090789794922, 18.8438720703125, -221.5039520263672, 613.662109375, 18.948165893554688, 278.0021057128906, 41.231040954589844, 227.20846557617188, 531.9388427734375, 172.9557647705078, 84.85531616210938, -398.2484436035156, 247.30609130859375, -333.5221862792969, 294.56927490234375, -100.91138458251953, -64.260986328125, 508.35223388671875, 108.9986343383789, 117.43941497802734, 218.36572265625, 145.55572509765625, 174.11550903320312, 90.70496368408203, -6.694786071777344, -7.591209411621094, 788.1875610351562, 166.29930114746094, 226.85916137695312, 93.7767105102539, 60.687095642089844, 509.38629150390625, -84.32266235351562, 167.1721649169922, 162.91305541992188, 431.947509765625, 359.159423828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000296.npy"}
{"epoch": 0.434654919236417, "step": 297, "batch_size": 64, "mean": 159.77444458007812, "std": 213.42166137695312, "min": -282.5634765625, "p10": -48.653538131713866, "median": 115.60364532470703, "p90": 448.447900390625, "max": 674.572509765625, "pos_frac": 0.734375, "sample": [159.6690216064453, 254.17027282714844, 351.0067443847656, 565.1622924804688, 40.48392868041992, -0.16379356384277344, 308.210693359375, 204.04071044921875, -147.21090698242188, -49.92155838012695, 18.656816482543945, 136.48568725585938, 240.57247924804688, 457.69866943359375, 387.03057861328125, -15.889062881469727, -45.69482421875, 31.010467529296875, -112.6318359375, -11.708076477050781, 319.69793701171875, -175.05787658691406, 72.79237365722656, 1.0817089080810547, 561.9644775390625, 92.94027709960938, 293.5522766113281, 88.03228759765625, 216.7940673828125, 339.1690368652344, 181.1514892578125, 109.82028198242188, 451.5799560546875, 202.71043395996094, 441.1397705078125, -10.763736724853516, 63.12516784667969, -20.02739715576172, 549.0946044921875, 276.7366027832031, 3.4754180908203125, 180.72450256347656, -282.5634765625, 374.08270263671875, 86.56331634521484, -40.203224182128906, 121.38700866699219, 61.84161376953125, 674.572509765625, -35.07649230957031, 131.73043823242188, 441.106689453125, 248.09912109375, -14.2525634765625, -257.3648376464844, 358.77978515625, -16.32469940185547, 63.16505432128906, 239.54574584960938, 614.6197509765625, 97.8358154296875, 396.64093017578125, -65.40547943115234, 16.07328987121582], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000297.npy"}
{"epoch": 0.43612334801762115, "step": 298, "batch_size": 64, "mean": 211.74114990234375, "std": 320.229248046875, "min": -681.2748413085938, "p10": -108.95877304077149, "median": 165.17342376708984, "p90": 627.5915405273438, "max": 1450.043212890625, "pos_frac": 0.734375, "sample": [116.06485748291016, -152.11834716796875, 30.49911117553711, -681.2748413085938, 73.83859252929688, -112.13397216796875, 5.34996223449707, 604.5726928710938, 297.7282409667969, 19.84783935546875, 189.87457275390625, 490.58026123046875, -30.743820190429688, -160.12635803222656, -196.13760375976562, 88.80181121826172, 235.4100341796875, 90.70419311523438, 102.81888580322266, 290.6580505371094, 845.2205810546875, -8.920700073242188, 375.9326171875, -203.95285034179688, 668.9693603515625, 272.118896484375, -108.75492858886719, 586.4313354492188, 681.1785888671875, 691.0714721679688, -109.04613494873047, -83.31112670898438, -4.561912536621094, 7.7701568603515625, -39.214454650878906, 635.0018920898438, 672.8978271484375, 130.5837860107422, 407.33685302734375, 184.10443115234375, -6.910127639770508, 3.34710693359375, 1450.043212890625, 262.7469482421875, 610.3007202148438, 185.46914672851562, 491.08184814453125, 422.662109375, -20.27883529663086, 109.33816528320312, 210.70108032226562, 436.89080810546875, 329.63104248046875, 124.21109771728516, 559.5897827148438, 146.24241638183594, -45.660614013671875, 376.0252990722656, -95.18287658691406, 249.37429809570312, 186.56484985351562, 263.2008056640625, 60.847198486328125, 336.12725830078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000298.npy"}
{"epoch": 0.43759177679882527, "step": 299, "batch_size": 64, "mean": 177.01646423339844, "std": 266.0360412597656, "min": -422.37158203125, "p10": -113.37960815429686, "median": 142.68121337890625, "p90": 566.3854736328127, "max": 877.2796020507812, "pos_frac": 0.75, "sample": [-98.61756134033203, -286.45648193359375, -422.37158203125, 379.3682556152344, 57.36822509765625, -45.02530288696289, 369.73468017578125, 130.994140625, 60.546104431152344, 103.89713287353516, 597.2152099609375, 583.8833618164062, 785.6749267578125, -98.47689056396484, 175.1819610595703, 731.6331176757812, -239.40484619140625, 351.4867248535156, 144.32284545898438, 254.68275451660156, -119.7061996459961, 332.9346008300781, 141.03958129882812, 156.99981689453125, 448.6408386230469, 117.48956298828125, 877.2796020507812, -12.957836151123047, 237.00155639648438, -11.84115982055664, -40.37818145751953, 129.52247619628906, -161.13534545898438, 36.85002136230469, 192.41439819335938, 525.5570678710938, 137.49522399902344, 160.0773468017578, -120.77154541015625, 170.80160522460938, 70.76766204833984, 323.63922119140625, 305.42120361328125, -59.371158599853516, 202.95025634765625, -95.73987579345703, -234.27467346191406, 455.2471618652344, 7.4130859375, 793.388427734375, -95.29084014892578, 65.8722152709961, 264.4104309082031, 327.5933532714844, 248.7022705078125, 306.096923828125, 70.81625366210938, 618.1846923828125, 223.37548828125, 135.8903045654297, 72.545166015625, 372.5207214355469, 23.088706970214844, 192.85647583007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000299.npy"}
{"epoch": 0.4390602055800294, "step": 300, "batch_size": 64, "mean": 220.83203125, "std": 299.98291015625, "min": -418.0345764160156, "p10": -202.52694396972655, "median": 226.97035217285156, "p90": 643.4610290527344, "max": 788.778564453125, "pos_frac": 0.765625, "sample": [331.3648681640625, 302.77911376953125, 134.53550720214844, 342.53167724609375, 648.443603515625, -407.6434326171875, -208.94677734375, 631.8350219726562, 421.79638671875, 23.165157318115234, -96.6637954711914, 350.33984375, 95.39775085449219, 651.4542236328125, 357.2851257324219, 409.2925720214844, 104.19561767578125, 524.6304321289062, 57.23176574707031, 80.68113708496094, -88.3907470703125, 190.57455444335938, 246.92391967773438, -13.781044006347656, 471.9151916503906, 373.95233154296875, -121.8720703125, 103.51612854003906, 196.9787139892578, 298.28863525390625, 72.59278869628906, 671.940185546875, 280.748046875, -418.0345764160156, 525.2275390625, -269.2140197753906, 583.3447875976562, 313.2753601074219, 524.3448486328125, 196.22955322265625, 89.31058502197266, 452.10540771484375, 667.804443359375, 375.33441162109375, -7.611537933349609, 405.1294250488281, -58.7325325012207, -187.54733276367188, 754.5455322265625, 439.5588073730469, 463.4033203125, -332.720947265625, 259.2463073730469, -265.82916259765625, 8.41867446899414, 205.52474975585938, 516.7739868164062, 79.10467529296875, 672.8201904296875, 788.778564453125, -290.21990966796875, 28.561542510986328, 207.01678466796875, -29.792617797851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000300.npy"}
{"epoch": 0.44052863436123346, "step": 301, "batch_size": 64, "mean": 210.38327026367188, "std": 264.5196838378906, "min": -209.25933837890625, "p10": -67.87928161621093, "median": 159.7206573486328, "p90": 575.5082977294925, "max": 926.3017578125, "pos_frac": 0.796875, "sample": [168.0690155029297, 320.91156005859375, -61.471412658691406, 343.10260009765625, 431.7069396972656, 301.47406005859375, 820.745361328125, 64.34126281738281, 488.53900146484375, -34.76924133300781, 926.3017578125, 364.96661376953125, 52.14966583251953, 252.99143981933594, -139.84046936035156, 153.41677856445312, 98.70653533935547, 602.82275390625, 60.72230529785156, 344.01580810546875, 768.5917358398438, 32.55327224731445, -90.78009796142578, 66.62974548339844, 434.4326477050781, 56.934051513671875, 37.953269958496094, 43.90779113769531, 259.5697326660156, 370.6318054199219, 306.7742919921875, 321.3946533203125, -4.739871978759766, 168.14877319335938, 59.659767150878906, 77.49276733398438, 213.49534606933594, -161.11537170410156, 10.65815544128418, -13.757080078125, 93.65960693359375, 32.062461853027344, 230.936767578125, -95.17717742919922, 49.66157531738281, 916.3878784179688, 305.63751220703125, 248.13796997070312, 664.6815185546875, 26.422149658203125, 511.7745666503906, 166.0245361328125, 262.2304382324219, 489.92718505859375, 486.26800537109375, 73.53268432617188, -69.1859130859375, -207.4108123779297, 621.469482421875, -209.25933837890625, 101.91349792480469, -64.83047485351562, -20.027584075927734, 332.3538513183594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000301.npy"}
{"epoch": 0.4419970631424376, "step": 302, "batch_size": 64, "mean": 175.31198120117188, "std": 236.5364227294922, "min": -231.7837677001953, "p10": -107.33299179077147, "median": 125.2342529296875, "p90": 462.0748809814454, "max": 848.609375, "pos_frac": 0.75, "sample": [-12.943771362304688, 419.90496826171875, 738.871826171875, -7.736843109130859, 213.37942504882812, -8.13654899597168, 235.36978149414062, 12.662689208984375, 187.71624755859375, 7.612152099609375, 238.30776977539062, 19.34942626953125, 74.9332046508789, 91.51996612548828, 156.79248046875, 66.16511535644531, 436.51708984375, -6.273464202880859, 355.01849365234375, 356.0323791503906, 198.3616485595703, 191.17840576171875, 179.44552612304688, 260.15435791015625, -109.48360443115234, -101.60285186767578, 431.1251220703125, 569.942626953125, 96.6626205444336, -231.7837677001953, 244.72854614257812, -75.2021255493164, 119.5474853515625, -28.899215698242188, -102.31489562988281, 516.0572509765625, 848.609375, 471.6588134765625, 130.9210205078125, -111.24459838867188, -57.71551513671875, 424.95684814453125, -231.50860595703125, 225.45944213867188, 398.7926025390625, 41.498252868652344, 117.32414245605469, 659.6351928710938, 242.98765563964844, 349.6693115234375, 23.888254165649414, 279.2855224609375, -160.71292114257812, 320.7174072265625, 90.56388854980469, 258.6011962890625, -119.24964904785156, 439.7123718261719, 86.01182556152344, 648.6448364257812, -149.53878784179688, 50.63200759887695, 111.63297271728516, 95.75457000732422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000302.npy"}
{"epoch": 0.4434654919236417, "step": 303, "batch_size": 64, "mean": 167.9858856201172, "std": 255.2613983154297, "min": -458.0278015136719, "p10": -132.74627227783202, "median": 156.90269470214844, "p90": 560.5878906250001, "max": 683.1484985351562, "pos_frac": 0.703125, "sample": [303.10565185546875, -0.3652076721191406, 78.9000244140625, 650.2935791015625, -131.37213134765625, 373.43023681640625, 534.8480224609375, 653.5825805664062, -20.49339485168457, -87.90638732910156, 400.5132141113281, 191.88677978515625, 313.614501953125, -133.33518981933594, 64.48722076416016, 171.9658203125, 85.98179626464844, 29.10106086730957, -59.69237518310547, 381.52215576171875, -458.0278015136719, -54.86466598510742, -92.95775604248047, 373.3025817871094, 462.5373229980469, 616.9047241210938, -139.28807067871094, 293.52593994140625, -252.67068481445312, 99.21903991699219, -87.39141845703125, 344.64080810546875, 683.1484985351562, 229.582763671875, 506.51263427734375, 270.833251953125, -163.84677124023438, 184.40167236328125, 117.98108673095703, 639.23974609375, 571.6192626953125, -49.79351806640625, 180.04336547851562, 472.39910888671875, 163.0426025390625, 324.9820556640625, 240.3843994140625, 307.4632568359375, 150.76278686523438, -67.6830825805664, 41.017669677734375, -58.63782501220703, 216.93348693847656, 14.946277618408203, 100.69873046875, 615.4763793945312, -160.77813720703125, -35.98039245605469, 18.853656768798828, 42.075809478759766, 252.63243103027344, 9.485687255859375, -139.80435180664062, 168.10653686523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000303.npy"}
{"epoch": 0.44493392070484583, "step": 304, "batch_size": 64, "mean": 184.502197265625, "std": 170.46478271484375, "min": -284.1424560546875, "p10": -10.14099884033203, "median": 178.8809356689453, "p90": 415.28298950195324, "max": 580.5339965820312, "pos_frac": 0.875, "sample": [39.81819152832031, 277.25164794921875, 296.55426025390625, 93.73961639404297, -7.7712249755859375, -28.515090942382812, 7.910064697265625, 65.63423156738281, 178.45071411132812, 108.79257202148438, 293.7076110839844, 390.948974609375, 179.3111572265625, 337.493408203125, 53.62834167480469, 289.9340515136719, 298.7943420410156, 326.0852355957031, -91.03736114501953, 16.19351577758789, 157.69427490234375, 388.0185546875, 42.26564025878906, 70.59307861328125, 304.68798828125, 102.14077758789062, 300.52984619140625, 42.891815185546875, 327.816650390625, 108.41197204589844, 130.79653930664062, 95.72750854492188, 580.5339965820312, 115.5517349243164, -42.58673858642578, -284.1424560546875, 240.86953735351562, -11.1566162109375, 41.14790344238281, 275.211669921875, -60.095001220703125, 477.53350830078125, 59.24443054199219, 66.22850036621094, 468.84954833984375, 168.90431213378906, 11.85113525390625, 77.29469299316406, 225.91732788085938, 360.7188720703125, 288.137939453125, -94.16368103027344, 184.1127166748047, 433.6474304199219, 425.71185302734375, 70.90888977050781, 261.3125305175781, 223.67630004882812, 427.02130126953125, 502.45361328125, 344.123046875, 237.75399780273438, 288.5048828125, 244.56521606445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000304.npy"}
{"epoch": 0.44640234948604995, "step": 305, "batch_size": 64, "mean": 162.76901245117188, "std": 236.83709716796875, "min": -296.9483337402344, "p10": -85.91730041503905, "median": 114.39013290405273, "p90": 458.6659332275391, "max": 1068.2845458984375, "pos_frac": 0.796875, "sample": [430.0046081542969, 304.91961669921875, 83.03257751464844, 214.5762481689453, 170.81419372558594, 665.6951904296875, 271.3879699707031, 27.53577995300293, 266.13751220703125, 476.38800048828125, 136.5858612060547, 81.99614715576172, 98.7258529663086, -55.5592041015625, -67.91009521484375, 19.538169860839844, 321.4137268066406, 459.7471618652344, 69.0149917602539, 71.67253875732422, 1068.2845458984375, 322.947265625, 24.318889617919922, 280.8582763671875, 314.341796875, 12.538911819458008, 2.9226741790771484, 232.4722442626953, 247.3848419189453, -93.63467407226562, -42.836647033691406, 257.9334716796875, -119.38423156738281, 329.75518798828125, 83.630859375, 137.52310180664062, 539.9095458984375, 308.3555908203125, 15.705720901489258, 156.49114990234375, 130.05441284179688, -27.50292205810547, 89.23887634277344, 80.46160125732422, 576.3175048828125, 543.3403930664062, 254.3340301513672, -45.482269287109375, -270.27398681640625, 135.57293701171875, -281.6495056152344, 456.14306640625, 444.723388671875, 51.32848358154297, -95.64495849609375, 32.15838623046875, -116.76798248291016, 42.9842529296875, 265.3509826660156, -296.9483337402344, 63.909584045410156, 266.76690673828125, -23.399612426757812, 16.96613311767578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000305.npy"}
{"epoch": 0.447870778267254, "step": 306, "batch_size": 64, "mean": 157.5745849609375, "std": 196.25537109375, "min": -367.13897705078125, "p10": -47.228285980224605, "median": 109.64873123168945, "p90": 419.04808044433594, "max": 860.457763671875, "pos_frac": 0.796875, "sample": [13.362384796142578, -19.703990936279297, 90.56661987304688, 24.11309814453125, 34.66455078125, -58.90943908691406, 306.8067321777344, 22.77227020263672, 88.438232421875, -5.609992980957031, 202.24391174316406, 372.2161865234375, 131.07577514648438, 415.328369140625, 63.16325378417969, 74.67513275146484, 35.362918853759766, 456.4422912597656, 70.1847152709961, 45.07429504394531, 79.41063690185547, -55.88677215576172, -40.91937255859375, 429.1218566894531, 246.03271484375, 308.36285400390625, -18.236270904541016, -6.90532112121582, 420.6422424316406, 123.99308776855469, 105.10107421875, 155.65634155273438, 251.02154541015625, 414.04010009765625, 57.99920654296875, 151.2555389404297, 114.1963882446289, -82.69799041748047, 37.3828239440918, 269.819580078125, -367.13897705078125, 323.086181640625, 103.56507110595703, -69.67658996582031, 11.67119026184082, 266.6155700683594, -49.932106018066406, 128.51193237304688, 547.7578125, 497.8104248046875, 187.22412109375, 336.6318359375, 246.6328887939453, -85.85968780517578, 423.84912109375, -7.874042510986328, 94.47259521484375, 29.449913024902344, 264.90606689453125, 122.11117553710938, 860.457763671875, 205.16607666015625, 317.7061767578125, 375.9708557128906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000306.npy"}
{"epoch": 0.44933920704845814, "step": 307, "batch_size": 64, "mean": 160.48297119140625, "std": 205.5437774658203, "min": -429.8045349121094, "p10": -74.17445144653318, "median": 112.9671630859375, "p90": 440.72926940917984, "max": 763.2802124023438, "pos_frac": 0.8125, "sample": [387.2052307128906, 124.94925689697266, 157.82992553710938, 177.2010498046875, 176.474365234375, 151.14134216308594, -119.90142822265625, 55.81951141357422, -81.05409240722656, 567.437744140625, 74.5582275390625, 42.620582580566406, 84.22322082519531, -139.22415161132812, 371.5474853515625, -429.8045349121094, 516.22412109375, -160.3684844970703, 240.33889770507812, -58.12195587158203, 233.34329223632812, 281.89178466796875, 88.24730682373047, 154.88250732421875, 480.4180908203125, 282.5980529785156, 83.53639221191406, 110.36083984375, 75.08416748046875, 453.4730529785156, 108.46837615966797, 458.1215515136719, 73.4466781616211, 71.75212097167969, -54.53849792480469, 111.13076782226562, -45.976959228515625, 71.28962707519531, 393.1002197265625, 410.9937744140625, -21.899986267089844, 344.8119812011719, 39.153221130371094, 268.492919921875, 246.8872833251953, 26.738571166992188, 313.0819091796875, 248.83013916015625, 23.9985294342041, 89.49943542480469, 222.07041931152344, 14.469833374023438, 354.63653564453125, 535.0789794921875, 260.43994140625, 763.2802124023438, 102.33556365966797, -95.41168212890625, 236.70420837402344, 114.80355834960938, -34.00248718261719, 301.22552490234375, 60.73347473144531, -125.76729583740234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000307.npy"}
{"epoch": 0.45080763582966227, "step": 308, "batch_size": 64, "mean": 181.93313598632812, "std": 187.9801483154297, "min": -204.05499267578125, "p10": -37.27806453704833, "median": 166.92604064941406, "p90": 471.67670593261727, "max": 511.4571533203125, "pos_frac": 0.828125, "sample": [167.16880798339844, 423.07733154296875, -164.54852294921875, 224.45407104492188, 31.584861755371094, 508.90972900390625, -204.05499267578125, -40.63534164428711, 44.6905517578125, 107.60394287109375, 497.97991943359375, 0.5864601135253906, 214.61572265625, 114.38381958007812, 336.5497741699219, 478.1545104980469, -115.40568542480469, 186.52671813964844, 110.47075653076172, 6.547370910644531, 166.6832733154297, 505.9543151855469, 317.90643310546875, 448.28900146484375, -83.33064270019531, 269.4281005859375, 132.59835815429688, 111.17774200439453, 129.30686950683594, -19.203413009643555, 243.7918701171875, 125.50423431396484, 196.59347534179688, 248.84226989746094, 217.7777862548828, 456.56182861328125, 511.4571533203125, 146.86529541015625, -120.50318908691406, 72.784912109375, 73.67977905273438, 344.23626708984375, 314.04119873046875, 166.00778198242188, 154.6599578857422, 497.8224182128906, 226.17489624023438, 45.70790100097656, -8.952095031738281, 361.81072998046875, 503.2022705078125, 275.8133850097656, 173.65115356445312, -14.990461349487305, -29.44441795349121, 67.680419921875, 364.8878173828125, 361.1903076171875, 280.2271728515625, 30.27014923095703, 396.5123596191406, 181.1597900390625, -134.1455078125, 5.372001647949219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000308.npy"}
{"epoch": 0.4522760646108664, "step": 309, "batch_size": 64, "mean": 187.72882080078125, "std": 224.6634979248047, "min": -584.6229248046875, "p10": -69.63152465820312, "median": 174.41378784179688, "p90": 458.5450683593751, "max": 722.2818603515625, "pos_frac": 0.8125, "sample": [171.04702758789062, 77.13032531738281, 107.02344512939453, -94.90322875976562, 282.5778503417969, 177.78054809570312, 124.69417572021484, -17.154260635375977, 542.7493896484375, 31.35284423828125, 256.8533935546875, 432.23077392578125, 94.77066040039062, 518.024169921875, 30.03321075439453, 58.751686096191406, -73.36759948730469, 306.5274963378906, 467.64434814453125, 319.88885498046875, 259.2192077636719, -584.6229248046875, 235.42324829101562, -98.59931945800781, 67.41366577148438, 164.74099731445312, 247.60482788085938, 323.32635498046875, 49.25379943847656, 37.376007080078125, -137.88796997070312, 657.9246215820312, 66.53997802734375, 21.108139038085938, 166.70236206054688, 437.1705322265625, 722.2818603515625, 429.3567810058594, 135.62704467773438, -23.864791870117188, -5.184444427490234, 81.77182006835938, 64.35832214355469, 437.31341552734375, 305.94110107421875, 302.37457275390625, 479.2789306640625, 398.49200439453125, 306.37469482421875, -2.3034095764160156, 424.1830139160156, -172.69923400878906, 229.76181030273438, 313.33465576171875, -120.96121215820312, 80.35844421386719, 509.9768981933594, 423.01123046875, -60.91401672363281, 184.77426147460938, 188.2503662109375, 32.56235122680664, 402.9874572753906, 221.8521270751953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000309.npy"}
{"epoch": 0.45374449339207046, "step": 310, "batch_size": 64, "mean": 173.33135986328125, "std": 186.2578887939453, "min": -265.1800537109375, "p10": -30.06421203613281, "median": 162.25074768066406, "p90": 432.7359130859375, "max": 575.5327758789062, "pos_frac": 0.796875, "sample": [-26.15615463256836, 171.03497314453125, 361.080810546875, -45.57451629638672, 86.62660217285156, 322.24468994140625, 34.10518264770508, -0.47893333435058594, 19.61998176574707, 214.54104614257812, -210.49859619140625, 425.865966796875, 447.8362121582031, -36.361576080322266, 46.68883514404297, 307.78692626953125, 202.7443084716797, 245.74778747558594, 92.08401489257812, 153.46652221679688, -31.739093780517578, 82.08547973632812, 288.372314453125, 84.29743957519531, 152.163818359375, 522.351806640625, 354.56475830078125, -14.82607650756836, 23.07029914855957, 230.57473754882812, -265.1800537109375, 575.5327758789062, -25.273822784423828, 152.86331176757812, 561.4688720703125, 200.79412841796875, 265.3238525390625, -21.480680465698242, -21.321876525878906, 38.58617401123047, 85.11062622070312, 178.81689453125, 332.0533447265625, 235.0816192626953, 43.22235870361328, 311.9674072265625, 435.68017578125, -92.16852569580078, 342.62518310546875, 227.63241577148438, 309.4784240722656, -69.63532257080078, 268.3162536621094, 141.15513610839844, 62.33848571777344, 59.01178741455078, 75.88650512695312, 512.6906127929688, 551.24462890625, 247.7004852294922, 87.43954467773438, 366.92974853515625, 241.0688018798828, 174.9286346435547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000310.npy"}
{"epoch": 0.4552129221732746, "step": 311, "batch_size": 64, "mean": 201.25680541992188, "std": 257.14312744140625, "min": -261.5953369140625, "p10": -110.47822875976561, "median": 170.77682495117188, "p90": 563.5325927734375, "max": 880.1209716796875, "pos_frac": 0.765625, "sample": [474.0645751953125, 393.4759826660156, 423.3760986328125, 5.405769348144531, -144.3044891357422, 558.177490234375, -132.95928955078125, 304.97650146484375, 198.47381591796875, 565.82763671875, 298.559326171875, 88.3843994140625, 351.44024658203125, 761.0247802734375, 303.63995361328125, 142.9100341796875, 515.7208862304688, 129.45928955078125, -0.8264541625976562, 627.014404296875, 188.5150909423828, 210.86074829101562, 527.8035888671875, 227.3570098876953, 43.297203063964844, 152.76988220214844, -54.99549865722656, -179.74923706054688, 198.131103515625, 120.77831268310547, -115.13870239257812, 323.8172302246094, -160.29257202148438, 153.03855895996094, 276.57952880859375, -99.60379028320312, -6.0626678466796875, 880.1209716796875, -28.40362548828125, 273.99334716796875, 20.91345977783203, 87.43970489501953, 237.47134399414062, 43.351806640625, 621.6148681640625, 396.3698425292969, 23.85995864868164, -54.143585205078125, 6.742160797119141, 94.4168701171875, -22.456722259521484, 523.6220703125, 118.76785278320312, 15.419242858886719, -261.5953369140625, 457.1051025390625, 190.14645385742188, 239.72396850585938, 14.388999938964844, 226.50575256347656, 642.4058227539062, -176.5442657470703, -44.39922332763672, 682.6516723632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000311.npy"}
{"epoch": 0.4566813509544787, "step": 312, "batch_size": 64, "mean": 180.94058227539062, "std": 254.26980590820312, "min": -352.5306701660156, "p10": -74.02250289916991, "median": 124.98529052734375, "p90": 495.41860961914074, "max": 1050.4365234375, "pos_frac": 0.828125, "sample": [129.42105102539062, 1050.4365234375, 505.15350341796875, 196.9716796875, 127.71064758300781, 79.84699249267578, -104.77667999267578, 83.52975463867188, 97.01114654541016, 351.11334228515625, -258.14349365234375, 416.6282043457031, 888.0867919921875, 342.50341796875, 238.8301544189453, 253.39730834960938, -203.75701904296875, 242.906982421875, 408.46923828125, 147.64874267578125, 540.5845336914062, 84.84504699707031, 122.25993347167969, 295.1146545410156, 73.51351928710938, 13.28550910949707, 41.214561462402344, 749.5452880859375, 342.7039794921875, 472.703857421875, 104.56863403320312, 411.1335754394531, 182.11679077148438, 106.12391662597656, -144.03152465820312, 92.64533996582031, 165.8885955810547, 520.302734375, -2.9032745361328125, 31.102123260498047, 269.865966796875, 122.24024200439453, -30.140443801879883, 235.51576232910156, 177.75067138671875, 0.6852874755859375, -33.58160400390625, 648.7410888671875, -58.83088684082031, 38.13159942626953, 213.2996826171875, 105.88078308105469, -230.49325561523438, 8.188705444335938, -80.53319549560547, 130.6779022216797, 120.76158142089844, 26.802993774414062, -352.5306701660156, 379.2291564941406, 255.98117065429688, 18.2982177734375, 48.57045364379883, 369.9797668457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000312.npy"}
{"epoch": 0.4581497797356828, "step": 313, "batch_size": 64, "mean": 182.31651306152344, "std": 248.3165283203125, "min": -420.2560729980469, "p10": -116.57959289550777, "median": 157.30615234375, "p90": 470.58243713378914, "max": 827.203369140625, "pos_frac": 0.8125, "sample": [59.5074462890625, 320.31683349609375, 143.28591918945312, 325.57684326171875, -145.23196411132812, 9.213241577148438, 436.3840026855469, 157.5430908203125, 380.7296447753906, -77.56074523925781, 16.153060913085938, -186.8415069580078, 447.15826416015625, 151.9889678955078, 21.42620277404785, 192.44789123535156, 480.6213684082031, 199.6255645751953, 82.19417572021484, 200.96514892578125, 43.241676330566406, 337.3827209472656, -420.2560729980469, -133.3019561767578, 193.53460693359375, 561.854248046875, 279.78125, 90.26133728027344, 194.65219116210938, 827.203369140625, 343.44647216796875, 142.5164794921875, -12.340360641479492, 164.0637664794922, -250.65066528320312, 122.98579406738281, -37.39263916015625, -415.90240478515625, 414.9588317871094, 251.0156707763672, -12.866539001464844, 222.45059204101562, 57.812496185302734, 295.47235107421875, 306.8353576660156, 145.64207458496094, -10.940757751464844, 308.35137939453125, 295.20098876953125, -135.57968139648438, 445.685302734375, 94.76683044433594, 157.0692138671875, 16.284996032714844, 154.85226440429688, 584.864013671875, 650.7071533203125, 12.569015502929688, 12.850959777832031, 77.60861206054688, 296.1963195800781, 373.0699768066406, 619.7521362304688, 787.0438842773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000313.npy"}
{"epoch": 0.45961820851688695, "step": 314, "batch_size": 64, "mean": 173.25335693359375, "std": 271.2396240234375, "min": -365.3646240234375, "p10": -141.51600952148436, "median": 130.89640045166016, "p90": 485.02927246093753, "max": 887.8380126953125, "pos_frac": 0.6875, "sample": [764.2523193359375, -300.527587890625, 119.33153533935547, 17.655784606933594, 32.471343994140625, 241.91986083984375, 54.27610778808594, 345.6970520019531, 61.58290481567383, -132.7556915283203, 322.05438232421875, -3.1286468505859375, 242.91314697265625, 98.12107849121094, 183.53692626953125, -365.3646240234375, -163.08323669433594, 29.919754028320312, 694.5687866210938, 765.0856323242188, 32.586639404296875, -33.54682159423828, -147.52902221679688, 88.56013488769531, 181.27459716796875, -175.7891387939453, -32.57218933105469, -141.9693603515625, -47.427940368652344, 422.67169189453125, 452.40191650390625, -1.4244384765625, 273.1623840332031, 489.678466796875, 297.6040954589844, 161.6211700439453, -90.07689666748047, 141.08383178710938, 47.52960205078125, 746.8968505859375, 439.5848693847656, 338.7437744140625, 887.8380126953125, 451.8348388671875, -50.16067886352539, 361.69384765625, -140.45819091796875, 474.18115234375, -23.806732177734375, 236.51290893554688, 312.521728515625, 30.46600341796875, 297.9964599609375, 328.7671203613281, 409.3424987792969, 533.0502319335938, 120.70896911621094, 209.75701904296875, 249.7381134033203, -27.333961486816406, -133.6951141357422, -52.12295150756836, 349.8794860839844, -190.08714294433594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000314.npy"}
{"epoch": 0.461086637298091, "step": 315, "batch_size": 64, "mean": 158.88729858398438, "std": 208.8935089111328, "min": -199.1038818359375, "p10": -63.13768272399902, "median": 126.416748046875, "p90": 459.37054748535166, "max": 700.4134521484375, "pos_frac": 0.75, "sample": [551.4957885742188, -34.74018096923828, 264.945068359375, 6.194427490234375, -43.314205169677734, 52.91007995605469, 57.123138427734375, 139.6239776611328, 278.9600830078125, -37.429931640625, 330.9937744140625, 20.869050979614258, -60.40092849731445, 257.3433837890625, 77.4679183959961, 533.7546997070312, 306.35333251953125, 186.59255981445312, -139.29119873046875, 354.4273376464844, 181.2244873046875, 495.622314453125, 467.6072998046875, 15.658416748046875, -116.15438842773438, 191.10556030273438, 223.6011962890625, 161.61380004882812, 391.29034423828125, 156.64517211914062, 440.1514587402344, 100.92250061035156, 100.45489501953125, 115.75634765625, 195.6613006591797, 323.50830078125, -100.10467529296875, -21.43255615234375, 246.19309997558594, 19.359405517578125, 75.67950439453125, -64.31057739257812, 700.4134521484375, 47.761390686035156, 222.40524291992188, -20.37720489501953, -168.50906372070312, 0.4236927032470703, -60.27442932128906, 610.8419189453125, 101.59097290039062, 209.84812927246094, 626.4784545898438, 137.0771484375, 175.3196258544922, -73.29293823242188, 288.5735168457031, -16.07162094116211, 397.7153625488281, 436.0617370605469, -199.1038818359375, -50.74684143066406, 61.90890884399414, 36.812095642089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000315.npy"}
{"epoch": 0.46255506607929514, "step": 316, "batch_size": 64, "mean": 224.96531677246094, "std": 250.58079528808594, "min": -245.8822479248047, "p10": -9.84074745178222, "median": 162.79576873779297, "p90": 601.9384765625001, "max": 918.0781860351562, "pos_frac": 0.875, "sample": [328.64111328125, 124.97325134277344, 477.449462890625, 63.272552490234375, 67.2729721069336, 903.3135986328125, 145.69329833984375, 664.416259765625, 443.7734375, 643.1572265625, 374.717041015625, 334.1180419921875, 36.1317138671875, 37.67343521118164, 43.790245056152344, 219.61293029785156, 78.54954528808594, 125.65766906738281, 25.688594818115234, 416.92169189453125, 175.42208862304688, 44.58487319946289, 609.0640258789062, 420.91436767578125, 276.9035339355469, 94.6482162475586, 37.96847915649414, 651.9628295898438, 173.9756622314453, 521.5457763671875, 12.493513107299805, 94.15471649169922, 46.560760498046875, 142.9098663330078, 213.3238525390625, 319.61468505859375, 392.9947509765625, 70.34297943115234, 335.1807861328125, 21.850608825683594, 585.3121948242188, 319.6151428222656, 37.5509033203125, 362.4851379394531, 203.85089111328125, 151.61587524414062, -245.8822479248047, -4.826435089111328, 918.0781860351562, 128.55870056152344, 810.1741943359375, -239.07254028320312, 185.25289916992188, 183.9728240966797, 336.2525634765625, -11.989738464355469, -72.28023529052734, -84.16493225097656, -34.014503479003906, 111.12996673583984, 230.22479248046875, -102.83531951904297, 93.2882080078125, 324.24334716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000316.npy"}
{"epoch": 0.46402349486049926, "step": 317, "batch_size": 64, "mean": 245.63772583007812, "std": 233.3943328857422, "min": -157.65711975097656, "p10": -42.48965911865234, "median": 231.04463958740234, "p90": 563.2071166992188, "max": 720.0057983398438, "pos_frac": 0.796875, "sample": [354.3599853515625, 54.53949737548828, 101.02799224853516, 458.7669982910156, 452.1285705566406, 77.23849487304688, 429.13677978515625, 388.81451416015625, -157.65711975097656, 569.3294677734375, 702.959228515625, 140.14393615722656, 248.65521240234375, -2.991455078125, 138.22909545898438, 383.279541015625, 206.7770233154297, -11.970390319824219, 109.01367950439453, 400.7760925292969, -41.9866943359375, 346.961181640625, 342.4244689941406, 539.1707763671875, 35.87055587768555, 509.4734191894531, -8.904476165771484, 126.6679916381836, 720.0057983398438, 412.68902587890625, 651.5906372070312, 483.71478271484375, 190.42835998535156, 548.921630859375, 410.24139404296875, -57.08628845214844, 240.54273986816406, 103.47273254394531, 254.85882568359375, 107.34039306640625, 195.435546875, 531.9823608398438, -27.917768478393555, -42.70521545410156, 622.4234008789062, 221.54653930664062, 587.150634765625, 315.0496826171875, 267.8262939453125, -76.94133758544922, 97.06974792480469, 96.56244659423828, 256.65118408203125, -6.042655944824219, -100.88875579833984, 115.32706451416016, 76.65524291992188, -153.22671508789062, 647.4622802734375, 401.52862548828125, 94.85974884033203, 395.7562561035156, -143.8397979736328, 390.1358947753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000317.npy"}
{"epoch": 0.4654919236417034, "step": 318, "batch_size": 64, "mean": 186.2266387939453, "std": 234.27757263183594, "min": -319.85076904296875, "p10": -62.631797790527344, "median": 159.20938873291016, "p90": 526.458905029297, "max": 852.8662109375, "pos_frac": 0.796875, "sample": [64.5230484008789, -63.07200622558594, 852.8662109375, 100.85198974609375, 246.72763061523438, 121.87702941894531, -57.749122619628906, 180.40982055664062, 120.35381317138672, 667.6541748046875, 25.190149307250977, 108.30499267578125, 315.9975280761719, 538.0452880859375, 575.864501953125, 256.92572021484375, -14.137802124023438, 118.86344909667969, -124.6205062866211, -42.477943420410156, 573.5653686523438, 66.96065521240234, -123.87382507324219, 499.42401123046875, 772.1852416992188, 308.9371337890625, -11.677242279052734, -98.18461608886719, 226.4342498779297, 334.3711853027344, 61.886138916015625, 330.0455627441406, 72.72469329833984, 404.7071838378906, 231.03977966308594, 136.48468017578125, 564.1841430664062, 15.946823120117188, -61.604644775390625, 445.2053527832031, 429.7861328125, 185.97409057617188, 5.266986846923828, -74.0499038696289, 329.442138671875, 207.71444702148438, 119.67879486083984, 174.09194946289062, 166.9113006591797, -45.92962646484375, -312.56451416015625, 33.621498107910156, 226.29852294921875, 201.2122802734375, 356.2892761230469, 370.44500732421875, 236.29214477539062, 138.408447265625, 56.32865905761719, 282.1841125488281, 151.50747680664062, 239.3583984375, -319.85076904296875, 18.928062438964844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000318.npy"}
{"epoch": 0.4669603524229075, "step": 319, "batch_size": 64, "mean": 219.60240173339844, "std": 253.29730224609375, "min": -367.7530822753906, "p10": -107.34338073730468, "median": 227.53179931640625, "p90": 534.9291259765625, "max": 970.5200805664062, "pos_frac": 0.8125, "sample": [301.8963623046875, 158.89198303222656, 297.8945617675781, 265.0700378417969, 67.55793762207031, 271.9200439453125, 356.1446228027344, 468.27099609375, 245.46023559570312, 85.8905258178711, -96.06246185302734, -259.2591857910156, 323.8655090332031, 133.71067810058594, 272.4207458496094, 146.70921325683594, 62.166847229003906, 245.05165100097656, 266.4062194824219, 176.02236938476562, 182.78346252441406, 437.51800537109375, 212.08685302734375, 242.97674560546875, 306.108154296875, 22.714218139648438, 364.1629638671875, 816.449951171875, 425.1189270019531, 373.40185546875, -127.162841796875, 178.68850708007812, 176.5687713623047, 537.6466674804688, -306.40216064453125, 429.3546142578125, -367.7530822753906, -110.77044677734375, -99.34689331054688, 387.01068115234375, 562.82421875, 440.34320068359375, 596.2792358398438, -10.070371627807617, 159.4928436279297, 20.386978149414062, 559.1244506835938, 621.150390625, 269.4964294433594, 528.5881958007812, 970.5200805664062, -137.4399871826172, 151.93899536132812, 435.9188232421875, -34.47417068481445, 38.27838134765625, -211.67254638671875, -47.27056884765625, 358.22210693359375, 160.32005310058594, 145.0286865234375, 205.55996704101562, 245.69818115234375, 155.12538146972656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000319.npy"}
{"epoch": 0.4684287812041116, "step": 320, "batch_size": 64, "mean": 242.403564453125, "std": 263.6028137207031, "min": -271.4315490722656, "p10": -60.97780227661132, "median": 221.56637573242188, "p90": 567.2474914550781, "max": 1061.0784912109375, "pos_frac": 0.8125, "sample": [171.90206909179688, 189.29904174804688, 906.0963134765625, 120.59162902832031, 290.9598693847656, -119.3473129272461, 184.18325805664062, -271.4315490722656, 809.1620483398438, 164.5697479248047, 440.385986328125, 373.2234191894531, -42.43327331542969, 385.8063049316406, 231.49288940429688, 561.7720336914062, 613.0866088867188, 412.4479675292969, 370.63671875, -164.4927978515625, 325.668212890625, 364.01312255859375, -45.99998474121094, 200.17596435546875, 59.710182189941406, 14.858390808105469, 314.97637939453125, 295.1100769042969, -42.06361389160156, 569.5941162109375, -83.81095886230469, 251.62738037109375, -23.920143127441406, 137.28070068359375, 463.9329833984375, 130.40870666503906, 160.27764892578125, 216.9696044921875, 1061.0784912109375, 334.2908020019531, 394.1602783203125, 86.11438751220703, 113.031005859375, -55.207763671875, 656.4837036132812, 59.815162658691406, 376.0074157714844, 219.0248260498047, 3.216432571411133, 25.4625244140625, 354.9186706542969, 389.24725341796875, 306.10968017578125, 432.00543212890625, 497.215087890625, 611.921875, 526.0003662109375, 12.246931076049805, -118.6368408203125, -63.45067596435547, 48.879730224609375, 224.10792541503906, 255.90924072265625, -172.843994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000320.npy"}
{"epoch": 0.4698972099853157, "step": 321, "batch_size": 64, "mean": 203.71778869628906, "std": 270.4161071777344, "min": -381.04852294921875, "p10": -96.24809265136719, "median": 180.65164184570312, "p90": 555.2553466796876, "max": 981.005615234375, "pos_frac": 0.78125, "sample": [681.4703369140625, 116.54573822021484, 276.34173583984375, 259.6280212402344, -63.068397521972656, 295.74884033203125, 347.0451354980469, 273.3326110839844, 131.84173583984375, 506.84234619140625, 430.14007568359375, 376.47052001953125, 82.02920532226562, 59.20307540893555, -236.9705352783203, 249.6709747314453, 436.85919189453125, 249.5599365234375, 215.17050170898438, 563.7841796875, 744.059814453125, 157.65802001953125, -381.04852294921875, 305.3675231933594, 219.4534912109375, 319.7798156738281, 753.695556640625, -317.3807067871094, -124.51322937011719, 150.59449768066406, 86.67762756347656, 535.354736328125, -93.74581909179688, 379.020263671875, 177.90615844726562, 426.7970886230469, 224.7941131591797, -262.0306396484375, 981.005615234375, 96.99646759033203, -55.6973876953125, 93.29500579833984, 14.422830581665039, 110.275146484375, -30.093727111816406, 142.15480041503906, -97.32049560546875, -15.859058380126953, 148.69992065429688, 253.8889923095703, 693.989501953125, 655.0325927734375, -2.5167198181152344, -311.2030334472656, 183.39712524414062, 191.08001708984375, 153.54571533203125, 167.02342224121094, 2.442201614379883, 469.65924072265625, 169.22930908203125, 284.603759765625, 228.527099609375, -42.725006103515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000321.npy"}
{"epoch": 0.4713656387665198, "step": 322, "batch_size": 64, "mean": 226.1587371826172, "std": 238.70956420898438, "min": -280.9570617675781, "p10": -60.75288429260253, "median": 261.537109375, "p90": 525.302899169922, "max": 791.6328125, "pos_frac": 0.78125, "sample": [-198.65777587890625, 791.6328125, 296.1373596191406, 760.8656005859375, 104.83414459228516, -2.4238319396972656, 511.56988525390625, 377.9096374511719, 306.675048828125, -63.383724212646484, -29.659652709960938, -18.572044372558594, 317.4583740234375, 292.3836669921875, 329.8859558105469, 382.5167541503906, -65.62328338623047, 238.05813598632812, -20.672393798828125, -108.67721557617188, 586.2215576171875, 588.379638671875, 279.03533935546875, 607.1903686523438, 16.22869873046875, 205.91513061523438, 341.85540771484375, 531.1884765625, 16.996620178222656, 469.7724609375, -76.65208435058594, 200.24777221679688, -37.58683395385742, 387.0096435546875, -54.6142578125, 7.300746917724609, 285.39739990234375, 175.20901489257812, 1.3445472717285156, 292.80694580078125, 480.23126220703125, 417.3516845703125, 407.0721435546875, -108.88899993896484, 434.5218811035156, 93.54798889160156, 289.24053955078125, -11.509471893310547, 250.40301513671875, 118.91603088378906, 102.09455108642578, 648.1663208007812, 209.09359741210938, 359.88702392578125, 418.0948791503906, 366.18414306640625, 75.53369140625, 37.51624298095703, 16.762697219848633, -280.9570617675781, 20.638275146484375, 431.16094970703125, 400.9215393066406, 272.67120361328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000322.npy"}
{"epoch": 0.47283406754772395, "step": 323, "batch_size": 64, "mean": 191.94479370117188, "std": 249.8715057373047, "min": -394.6297912597656, "p10": -115.26885681152343, "median": 174.6991195678711, "p90": 538.3981262207031, "max": 835.5398559570312, "pos_frac": 0.8125, "sample": [394.0244445800781, 245.75332641601562, 556.8271484375, -394.6297912597656, 619.4996337890625, -316.7319030761719, 604.141357421875, 835.5398559570312, 73.64131164550781, 106.84691619873047, 272.9020690917969, 35.278594970703125, 165.34329223632812, 135.72132873535156, 15.403205871582031, 287.568359375, 96.84830474853516, 307.8632507324219, -113.86280059814453, 624.9342651367188, -5.578529357910156, 80.10917663574219, 527.8211059570312, -147.5908203125, -147.13148498535156, 373.60015869140625, 128.41514587402344, 156.1738739013672, 436.744140625, 31.38037109375, 502.8029479980469, 245.7191162109375, 456.63079833984375, -41.516754150390625, 317.61138916015625, 22.5880126953125, -30.675552368164062, 176.76004028320312, 307.207275390625, 220.9041748046875, 287.69451904296875, 55.385986328125, 250.8321533203125, 530.5488891601562, -293.7981872558594, 172.63819885253906, 301.5118103027344, 607.3116455078125, 388.0820007324219, -66.85332489013672, 203.34024047851562, 541.7620849609375, -115.87145233154297, 290.6608581542969, -241.36256408691406, 210.80316162109375, 136.962158203125, 237.32025146484375, 68.8225326538086, 309.19976806640625, 55.16582489013672, 18.144054412841797, 137.29681396484375, 33.9832763671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000323.npy"}
{"epoch": 0.47430249632892807, "step": 324, "batch_size": 64, "mean": 180.31451416015625, "std": 291.45660400390625, "min": -397.0083312988281, "p10": -180.06527404785155, "median": 138.67731475830078, "p90": 506.7490875244141, "max": 1006.229736328125, "pos_frac": 0.734375, "sample": [-234.25491333007812, 663.7012939453125, 1.5701904296875, 234.478759765625, -13.415475845336914, 103.4578857421875, 242.56207275390625, 290.0104064941406, 115.13340759277344, 204.80694580078125, 810.027099609375, -116.72506713867188, 133.20849609375, 107.46775817871094, -22.289142608642578, 710.6470947265625, -223.0881805419922, -361.854736328125, 326.73651123046875, 233.97238159179688, 25.78485107421875, 122.30733489990234, -188.74591064453125, 202.1531982421875, 325.5386962890625, 896.2196655273438, 462.67645263671875, -148.5441131591797, 104.78118896484375, 492.2455139160156, 454.1695251464844, 144.14613342285156, 112.58238983154297, 77.22486114501953, 389.1972351074219, 329.3262023925781, 151.47840881347656, 320.3235168457031, 215.78948974609375, -16.70389747619629, 407.608642578125, -397.0083312988281, -205.5003662109375, 411.75360107421875, 512.9649047851562, 401.1115417480469, 128.59071350097656, -371.7575378417969, -159.81045532226562, 93.0481948852539, 274.0424499511719, 73.20714569091797, 268.49755859375, -50.29092025756836, 63.26478576660156, 83.138671875, 325.501953125, -91.98895263671875, -41.685489654541016, 1006.229736328125, 687.6212768554688, 155.9226531982422, -30.770843505859375, 318.3352355957031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000324.npy"}
{"epoch": 0.47577092511013214, "step": 325, "batch_size": 64, "mean": 156.92562866210938, "std": 218.8779296875, "min": -369.1091613769531, "p10": -76.42038269042969, "median": 134.3604278564453, "p90": 430.8413696289063, "max": 794.6691284179688, "pos_frac": 0.828125, "sample": [-353.60577392578125, -369.1091613769531, 214.39065551757812, 31.36402130126953, 348.97515869140625, 279.78265380859375, 151.79452514648438, -73.18061828613281, 193.4151611328125, 290.06103515625, 330.95745849609375, 523.0047607421875, 297.3358459472656, 132.9925079345703, 129.68592834472656, 103.19232940673828, 31.586647033691406, 83.71083068847656, 48.53022766113281, 254.55055236816406, -154.24465942382812, 261.6723327636719, 281.8750305175781, 553.7383422851562, 23.999774932861328, 12.235069274902344, 135.7283477783203, 53.98638153076172, 120.42579650878906, 13.290260314941406, 437.3938903808594, -37.73298645019531, 794.6691284179688, 26.75027847290039, 540.3200073242188, 51.37615203857422, 492.4871826171875, -77.80885314941406, 250.70635986328125, 198.46800231933594, -92.0591812133789, 122.12676239013672, 367.9510803222656, -30.778261184692383, 271.71588134765625, 312.2002258300781, 184.21414184570312, 272.1374816894531, 142.96139526367188, 103.4940185546875, 289.92022705078125, -307.1129150390625, 23.114715576171875, 663.6897583007812, 27.1160888671875, -8.076194763183594, 415.5521545410156, 23.188119888305664, -84.39433288574219, 249.000244140625, 153.0713348388672, 43.32033157348633, 59.85797882080078, 212.25894165039062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000325.npy"}
{"epoch": 0.47723935389133626, "step": 326, "batch_size": 64, "mean": 149.17068481445312, "std": 197.0985107421875, "min": -458.8954162597656, "p10": -25.407769393920894, "median": 137.20726013183594, "p90": 405.162924194336, "max": 633.7904052734375, "pos_frac": 0.828125, "sample": [17.079376220703125, -5.773921966552734, 97.62669372558594, 408.7213134765625, 192.6403045654297, -8.46282958984375, 184.43899536132812, 138.52395629882812, 320.7328796386719, -275.7059326171875, -157.12091064453125, -458.8954162597656, 539.1386108398438, 67.6706771850586, -113.8330307006836, 165.8928985595703, 199.04989624023438, 94.17515563964844, 246.756591796875, 228.09201049804688, 14.043731689453125, 62.682525634765625, 204.36366271972656, 207.65725708007812, -27.311264038085938, 114.86836242675781, 633.7904052734375, 189.08705139160156, 61.82867431640625, 45.512325286865234, 396.8600158691406, 202.58834838867188, 246.6842498779297, 135.89056396484375, 225.01834106445312, 13.40472412109375, 47.295074462890625, 393.00732421875, -20.96628189086914, 102.17312622070312, 121.74916076660156, -50.96661376953125, 69.97645568847656, 168.26693725585938, 19.997802734375, 92.78361511230469, 190.77166748046875, 373.7445983886719, 221.74034118652344, 230.3643341064453, -7.2545623779296875, 423.6743469238281, 593.2276611328125, 83.93172454833984, 302.50811767578125, 186.76669311523438, 472.81085205078125, 274.5261535644531, 11.305486679077148, -279.7701416015625, 431.94232177734375, 329.47137451171875, 48.314151763916016, 107.81584930419922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000326.npy"}
{"epoch": 0.4787077826725404, "step": 327, "batch_size": 64, "mean": 229.86044311523438, "std": 232.9683074951172, "min": -306.2003173828125, "p10": -83.32915802001949, "median": 247.24087524414062, "p90": 522.016146850586, "max": 726.2116088867188, "pos_frac": 0.8125, "sample": [206.07510375976562, 193.1074676513672, 470.66162109375, 196.77259826660156, 195.9616241455078, 345.6296081542969, 57.08557891845703, 293.0146484375, 326.27630615234375, 173.40762329101562, 341.54229736328125, 315.51190185546875, 120.22715759277344, 414.474365234375, -112.39038848876953, 458.2393493652344, -9.296199798583984, 485.9425964355469, 354.8942565917969, 326.05853271484375, -202.21514892578125, 620.583251953125, 549.4075927734375, 336.4615478515625, 144.99440002441406, 531.6759033203125, 290.89453125, -158.21397399902344, 247.2503662109375, 332.0082702636719, 385.577880859375, -158.4544677734375, -28.604110717773438, 158.7992401123047, 345.0595703125, 121.41361999511719, 253.64781188964844, 283.699462890625, 247.23138427734375, 151.16757202148438, 19.487747192382812, 68.43966674804688, -46.606048583984375, 459.5046081542969, 207.63333129882812, -97.69108581542969, -7.393047332763672, 357.8509521484375, 383.4527587890625, 119.78146362304688, 661.5736083984375, 95.90261840820312, 726.2116088867188, -306.2003173828125, 499.4767150878906, -49.8179931640625, 99.52726745605469, 593.1016845703125, 54.33872985839844, 414.3206787109375, 366.3774108886719, 645.366455078125, 106.96512603759766, -266.11669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000327.npy"}
{"epoch": 0.4801762114537445, "step": 328, "batch_size": 64, "mean": 211.17984008789062, "std": 263.0715637207031, "min": -394.4688720703125, "p10": -81.3355026245117, "median": 187.1659698486328, "p90": 531.0218200683595, "max": 927.60205078125, "pos_frac": 0.71875, "sample": [190.1484375, -129.87118530273438, 202.15170288085938, 179.74276733398438, -32.70654296875, 380.54864501953125, 328.7908630371094, 145.6961212158203, 26.4219970703125, -394.4688720703125, 107.8040542602539, 468.3753967285156, 504.00213623046875, 375.1091003417969, 300.39080810546875, 472.53021240234375, -9.239921569824219, -123.90483093261719, -49.105560302734375, 225.1019287109375, -70.464599609375, -30.925750732421875, 354.6014404296875, 927.60205078125, 220.21115112304688, 356.46270751953125, 357.8139953613281, 212.74356079101562, 683.8719482421875, 364.5820617675781, -97.67825317382812, 184.18350219726562, 294.7550354003906, -85.99446105957031, 62.056182861328125, 113.54147338867188, 315.3499755859375, 415.8055725097656, 153.31277465820312, 141.61270141601562, -8.41246223449707, 191.45321655273438, -110.60630798339844, 238.05789184570312, -22.12889862060547, 496.9981689453125, 114.1595458984375, 260.72760009765625, 291.15582275390625, 182.26058959960938, 733.3486328125, 542.6016845703125, 19.853012084960938, -22.342193603515625, -15.193248748779297, 795.77783203125, 874.7900390625, 68.41793823242188, 634.088134765625, -31.09450912475586, -4.821586608886719, 302.7570495605469, 73.76150512695312, -131.06005859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000328.npy"}
{"epoch": 0.48164464023494863, "step": 329, "batch_size": 64, "mean": 219.3310546875, "std": 287.4814453125, "min": -695.8074951171875, "p10": -91.39138183593748, "median": 206.87662506103516, "p90": 591.0613769531251, "max": 916.3632202148438, "pos_frac": 0.796875, "sample": [131.31161499023438, 414.84466552734375, 565.8111572265625, 339.9556884765625, 330.5800476074219, -116.3260726928711, 514.926025390625, 31.616004943847656, 520.6757202148438, 214.77801513671875, 14.41278076171875, 415.3094787597656, 81.1987075805664, -70.96134948730469, 84.38255310058594, 131.01547241210938, 244.79383850097656, 166.13491821289062, 55.078155517578125, 817.0081787109375, 107.7937240600586, 101.9073486328125, 181.73301696777344, 155.53692626953125, -695.8074951171875, 304.3763732910156, 63.637474060058594, 916.3632202148438, 640.1202392578125, -99.84727478027344, 241.49124145507812, -49.08907699584961, 392.1007385253906, 90.86542510986328, 236.88555908203125, 403.1288757324219, 582.5524291992188, 395.77886962890625, 764.1805419921875, 241.32884216308594, 3.4068946838378906, -71.66096496582031, -179.84068298339844, 622.3096313476562, 504.2807922363281, -156.65966796875, 291.342529296875, -134.03392028808594, 344.61346435546875, 198.97523498535156, 222.9208984375, -51.47279357910156, 237.52207946777344, 255.6669464111328, 73.70978546142578, 48.355201721191406, 270.84417724609375, 467.74761962890625, 797.0767211914062, -5.84185791015625, -334.6088562011719, 176.82650756835938, 594.7080688476562, -0.5819625854492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000329.npy"}
{"epoch": 0.4831130690161527, "step": 330, "batch_size": 64, "mean": 231.99075317382812, "std": 272.832275390625, "min": -301.3773193359375, "p10": -95.2630905151367, "median": 221.10419464111328, "p90": 507.24488525390626, "max": 925.998779296875, "pos_frac": 0.796875, "sample": [507.5482177734375, -40.46746826171875, -150.52178955078125, -106.11582946777344, 352.7876281738281, 504.49578857421875, 175.67356872558594, 199.08245849609375, -64.0770492553711, 83.35022735595703, -155.12754821777344, 122.12287902832031, 171.34652709960938, 458.50396728515625, 548.2911376953125, 31.83734893798828, 287.8984069824219, 281.6029968261719, 252.84487915039062, 443.8815612792969, -51.95533752441406, 102.30059814453125, 276.9589538574219, 58.34428024291992, 144.56011962890625, 327.9150695800781, -301.3773193359375, 325.9052734375, 232.4055633544922, 191.29031372070312, 421.9158020019531, 1.3176841735839844, -270.4754333496094, 273.4162292480469, 466.2709655761719, 208.5211181640625, 48.42936706542969, 925.998779296875, 36.693763732910156, 130.3629913330078, -18.25214385986328, 474.23541259765625, 137.28887939453125, 919.2200927734375, -231.0321502685547, 863.44970703125, 747.1354370117188, 341.83587646484375, 374.8955078125, 78.11360931396484, 318.91094970703125, 461.64044189453125, 209.80282592773438, 755.2180786132812, -7.651771545410156, 318.74835205078125, 250.895263671875, 506.537109375, -163.37814331054688, 454.9283447265625, 86.39678955078125, 284.278564453125, 300.3741149902344, -69.94003295898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000330.npy"}
{"epoch": 0.4845814977973568, "step": 331, "batch_size": 64, "mean": 227.93499755859375, "std": 194.73362731933594, "min": -175.59027099609375, "p10": 2.8280784606933906, "median": 213.33824157714844, "p90": 427.2332061767579, "max": 850.4680786132812, "pos_frac": 0.890625, "sample": [166.1044158935547, 756.7725219726562, 283.8758544921875, -12.49945068359375, 271.8310241699219, 397.66156005859375, 114.05242919921875, 42.08775329589844, 239.44703674316406, 188.80926513671875, 730.6722412109375, 264.18231201171875, 850.4680786132812, -10.374015808105469, 136.11451721191406, 344.5065002441406, 592.77392578125, 115.01347351074219, 123.87603759765625, 230.63137817382812, 439.9067687988281, 395.2611083984375, 495.41082763671875, -58.4723014831543, 105.64405822753906, 186.96351623535156, 208.6100616455078, 240.78512573242188, 252.32122802734375, -35.79449462890625, 263.9349060058594, -64.05891418457031, 261.47625732421875, 176.18209838867188, 36.328880310058594, 183.25119018554688, -146.97891235351562, 199.86590576171875, 306.6778259277344, 554.1128540039062, 277.8379211425781, 218.06642150878906, 136.08473205566406, 360.68896484375, 87.05962371826172, 74.63311767578125, 292.0384216308594, 152.26626586914062, 295.4801940917969, 119.81547546386719, -175.59027099609375, 279.0188903808594, 129.57118225097656, 98.28178405761719, 369.7711181640625, 319.94305419921875, 304.4711608886719, 61.86731719970703, 197.2277069091797, 342.3577575683594, 148.8490447998047, 285.40301513671875, 351.6298828125, 33.632965087890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000331.npy"}
{"epoch": 0.48604992657856094, "step": 332, "batch_size": 64, "mean": 165.63372802734375, "std": 239.31832885742188, "min": -197.90390014648438, "p10": -103.47993469238281, "median": 128.06750106811523, "p90": 449.2050994873047, "max": 1059.9190673828125, "pos_frac": 0.75, "sample": [698.9796752929688, 157.3255615234375, 152.86387634277344, 289.8193664550781, 23.57830810546875, -12.027847290039062, 77.29618835449219, 439.30694580078125, 386.6502685546875, 192.79551696777344, -80.40071105957031, 53.26869201660156, -104.611572265625, 1059.9190673828125, 192.26158142089844, 73.34468841552734, 151.452392578125, 146.2694549560547, 142.8328857421875, 2.9025402069091797, 102.68621063232422, 96.16014099121094, 443.2127380371094, -117.37452697753906, 360.84197998046875, 37.07246780395508, -197.90390014648438, 120.37483978271484, 623.9464721679688, 214.07981872558594, 170.6220703125, 180.93838500976562, 270.89068603515625, -59.420082092285156, -66.06629180908203, 78.06128692626953, 508.36712646484375, 565.1856689453125, -100.83944702148438, 237.93190002441406, 135.76016235351562, -159.81387329101562, 384.88519287109375, 71.20881652832031, 306.76336669921875, 678.9718627929688, 105.37730407714844, 451.77325439453125, -78.32608032226562, 76.06315612792969, -173.8966064453125, 275.111328125, 369.5798645019531, -143.23876953125, 337.5663146972656, 68.45997619628906, -94.02558898925781, -9.526531219482422, -113.76713562011719, 161.11959838867188, 282.92645263671875, 114.71541595458984, 110.49925231933594, -70.2227783203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000332.npy"}
{"epoch": 0.48751835535976507, "step": 333, "batch_size": 64, "mean": 214.22947692871094, "std": 300.14874267578125, "min": -339.0847473144531, "p10": -177.1277099609375, "median": 197.18736267089844, "p90": 609.0979919433595, "max": 914.2716064453125, "pos_frac": 0.703125, "sample": [-44.93244934082031, 305.4845275878906, 914.2716064453125, -19.870681762695312, 78.4979019165039, 49.44798278808594, 331.384521484375, 207.67945861816406, 62.2470703125, 51.63523483276367, 817.2557983398438, -169.92425537109375, 184.57785034179688, -274.7633972167969, 528.9158325195312, -224.02835083007812, 639.7682495117188, -93.25762939453125, 639.21240234375, 385.3088684082031, -3.1029701232910156, 310.3483581542969, -25.342178344726562, 348.0110778808594, 441.92974853515625, 554.8391723632812, -180.21490478515625, 108.3663330078125, 7.25579833984375, 439.2688293457031, 146.5115203857422, -2.6907520294189453, -79.1759033203125, 477.4477233886719, 619.2545776367188, 410.5421447753906, -241.62478637695312, 9.754852294921875, 548.0527954101562, 438.1987609863281, 446.80419921875, -293.73822021484375, -15.785125732421875, 507.0599060058594, 476.95611572265625, -13.697242736816406, 234.6239013671875, 155.9098358154297, 776.2474975585938, 275.3443603515625, 485.41375732421875, 269.0826416015625, 585.3992919921875, 188.51708984375, 205.85763549804688, -149.2342987060547, 409.54736328125, -0.4069786071777344, 315.8621826171875, -302.8375244140625, 633.0739135742188, -339.0847473144531, 22.604385375976562, 140.6254425048828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000333.npy"}
{"epoch": 0.4889867841409692, "step": 334, "batch_size": 64, "mean": 190.96356201171875, "std": 305.06640625, "min": -543.4904174804688, "p10": -143.72344665527342, "median": 144.93572235107422, "p90": 655.6058715820313, "max": 891.072998046875, "pos_frac": 0.796875, "sample": [626.0465087890625, 48.75689697265625, -147.03085327148438, 490.10980224609375, 246.43377685546875, 41.209312438964844, -122.18657684326172, 45.155303955078125, 234.87161254882812, 30.09844970703125, 230.06753540039062, 43.26549530029297, 360.979736328125, 248.02362060546875, -159.6116485595703, 200.64988708496094, 66.05459594726562, 435.24969482421875, -543.4904174804688, 675.38134765625, 51.9267463684082, 222.08425903320312, -52.80732727050781, -288.9051208496094, 370.84075927734375, 262.648681640625, 495.26202392578125, -403.70635986328125, -227.92311096191406, -17.500350952148438, 35.223785400390625, -119.808349609375, 126.65925598144531, 57.501220703125, 791.8237915039062, 891.072998046875, 291.0646057128906, 15.683496475219727, 50.76321029663086, 518.6268310546875, 89.76195526123047, 242.19158935546875, 546.5099487304688, 410.97686767578125, -92.85420227050781, 81.9720458984375, 809.4134521484375, 24.187530517578125, 767.9150390625, 466.49072265625, -136.00616455078125, -355.01739501953125, 26.90643310546875, 248.67654418945312, 199.3628692626953, 668.274169921875, 487.2865295410156, 101.11601257324219, 51.49449157714844, 346.1928405761719, 68.48582458496094, 691.4879150390625, 193.0653533935547, 163.21218872070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000334.npy"}
{"epoch": 0.49045521292217326, "step": 335, "batch_size": 64, "mean": 150.09857177734375, "std": 276.1180725097656, "min": -312.0198669433594, "p10": -207.45296630859372, "median": 129.47682189941406, "p90": 447.41028442382816, "max": 1013.5608520507812, "pos_frac": 0.703125, "sample": [166.80340576171875, 387.6154479980469, 432.8692626953125, 6.4346923828125, 182.56993103027344, -74.92044830322266, 31.638254165649414, 426.1767272949219, 72.78575134277344, 35.709434509277344, -240.88099670410156, 19.827072143554688, 278.9903564453125, 248.03004455566406, -54.96013641357422, 717.3401489257812, -22.91236114501953, 51.342201232910156, -247.09234619140625, 230.14227294921875, -132.79949951171875, 32.90351867675781, 223.89923095703125, 152.84951782226562, 692.9884033203125, 581.1749877929688, 388.9554138183594, 293.1260986328125, 11.697572708129883, 657.1699829101562, 1013.5608520507812, -284.0589904785156, 136.74368286132812, -312.0198669433594, 392.7287902832031, -29.619516372680664, 395.7568359375, 52.41932678222656, 68.91195678710938, 568.47119140625, -113.2655029296875, 350.6194152832031, -188.08346557617188, 122.2099609375, 282.4351501464844, -104.0693359375, 240.92291259765625, -23.803314208984375, -266.99835205078125, -130.87078857421875, 137.37295532226562, -140.9062042236328, 390.65203857421875, -256.10400390625, -123.14398956298828, 378.2601013183594, 273.741943359375, 453.64215087890625, 5.9597320556640625, -215.75418090820312, 308.07928466796875, 365.3263854980469, 214.58685302734375, 93.13143920898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000335.npy"}
{"epoch": 0.4919236417033774, "step": 336, "batch_size": 64, "mean": 241.56228637695312, "std": 346.3768005371094, "min": -635.6212768554688, "p10": -99.02403411865234, "median": 219.5210952758789, "p90": 754.526531982422, "max": 1147.7218017578125, "pos_frac": 0.828125, "sample": [289.3422546386719, 64.83197021484375, 953.742919921875, -45.45978546142578, 404.1767883300781, 232.45074462890625, 277.3003845214844, -127.17391204833984, -79.70770263671875, 873.2025146484375, 302.24700927734375, 332.183837890625, 144.77496337890625, 453.3960266113281, 1147.7218017578125, -52.34521484375, 750.3787841796875, 374.4394836425781, 213.29029846191406, 397.65533447265625, -100.84288024902344, 75.22166442871094, 121.08210754394531, 284.1637878417969, 160.98492431640625, 394.8376159667969, 129.12405395507812, 29.970279693603516, 769.0692138671875, 225.75189208984375, 200.28248596191406, 62.01611328125, 39.09761047363281, 235.66622924804688, 109.14671325683594, 8.6673583984375, -461.7095947265625, 742.5093383789062, -94.78005981445312, -221.09356689453125, 756.3041381835938, 1.9606399536132812, 818.1976318359375, 65.70915222167969, 456.869873046875, 74.86431884765625, -635.6212768554688, 94.0455551147461, 343.52996826171875, 329.4278564453125, 119.37858581542969, 126.26187896728516, 182.15042114257812, 56.668495178222656, 256.6804504394531, 509.148193359375, 506.5269775390625, 1037.4013671875, -346.94610595703125, 338.24658203125, -459.16876220703125, 378.8330078125, 294.87274169921875, 539.0301513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000336.npy"}
{"epoch": 0.4933920704845815, "step": 337, "batch_size": 64, "mean": 239.69003295898438, "std": 273.3504638671875, "min": -267.20697021484375, "p10": -126.67315139770507, "median": 199.48836517333984, "p90": 606.6661804199221, "max": 1031.8460693359375, "pos_frac": 0.8125, "sample": [928.962646484375, 285.0145568847656, -164.7924346923828, 103.5544662475586, 563.6907348632812, 520.18310546875, -58.524940490722656, 279.30126953125, 23.827281951904297, 429.9686279296875, 12.037605285644531, 248.30062866210938, 306.80548095703125, 415.07965087890625, 392.2155456542969, -122.6221694946289, 230.80442810058594, 177.94003295898438, -128.40928649902344, 519.8432006835938, 177.8056182861328, 195.48626708984375, 11.029012680053711, 632.8392333984375, 173.11439514160156, -139.594970703125, 1031.8460693359375, 75.59765625, -167.76760864257812, -163.99652099609375, 217.016357421875, 196.66639709472656, 279.86822509765625, -185.8261260986328, 186.9737091064453, 140.31040954589844, 503.7095642089844, -32.44804382324219, 293.20281982421875, 367.53387451171875, -22.470020294189453, 707.2698364257812, 85.7393798828125, 182.05908203125, -267.20697021484375, 142.72555541992188, 837.6836547851562, 523.51513671875, 354.10528564453125, 60.506690979003906, 202.31033325195312, 120.64295196533203, 625.084228515625, 360.521728515625, 145.9740753173828, 670.430419921875, 369.88055419921875, 193.47247314453125, 110.3349838256836, 370.27178955078125, 287.7416687011719, 268.3658142089844, -76.29008483886719, 330.9473571777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000337.npy"}
{"epoch": 0.4948604992657856, "step": 338, "batch_size": 64, "mean": 232.6099853515625, "std": 230.09146118164062, "min": -226.08120727539062, "p10": -15.663192749023429, "median": 202.8077392578125, "p90": 531.3968566894531, "max": 804.096435546875, "pos_frac": 0.875, "sample": [84.23390197753906, 533.0377197265625, 102.65052795410156, -213.77838134765625, 210.17929077148438, 128.38308715820312, -109.48028564453125, 244.7578887939453, 667.40625, 244.33432006835938, 108.45771789550781, 76.64982604980469, -108.46295166015625, 515.5517578125, 114.43194580078125, 320.179931640625, 51.457489013671875, 506.5989685058594, 435.4817199707031, 549.32958984375, 678.1541748046875, 13.411460876464844, 248.5391845703125, 213.0561065673828, 283.45343017578125, 14.500473022460938, 439.1395263671875, -19.46672821044922, -31.20825958251953, -6.788276672363281, 233.01467895507812, 512.5732421875, 468.7191467285156, 103.40057373046875, 233.2337646484375, 399.6445617675781, 215.03427124023438, 56.62137222290039, 375.0640563964844, 135.2493133544922, 397.7383117675781, 45.89704895019531, 70.62063598632812, 102.65303039550781, 506.60009765625, 34.175933837890625, 195.43618774414062, 136.05987548828125, 353.60711669921875, 158.08995056152344, 17.98394775390625, 36.82919692993164, 566.284423828125, 804.096435546875, -55.67394256591797, 245.767578125, 508.6734619140625, 182.56622314453125, 360.31341552734375, 157.6590576171875, 662.1586303710938, -226.08120727539062, 71.26921081542969, 527.5681762695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000338.npy"}
{"epoch": 0.49632892804698975, "step": 339, "batch_size": 64, "mean": 229.14202880859375, "std": 247.76565551757812, "min": -250.45767211914062, "p10": -70.49854431152343, "median": 211.84536743164062, "p90": 607.7530883789062, "max": 829.6860961914062, "pos_frac": 0.828125, "sample": [42.671600341796875, -120.46021270751953, -73.9652099609375, -42.55543518066406, 636.1575927734375, 35.84742736816406, 55.12837219238281, 181.1865997314453, 351.845947265625, 28.53443145751953, 829.6860961914062, 59.51002502441406, 89.80020141601562, 159.61087036132812, 358.99951171875, 353.7117919921875, 15.131139755249023, 17.80931854248047, 78.08118438720703, 609.4442138671875, 249.95115661621094, -12.571525573730469, 266.5998229980469, -67.6163101196289, 237.72561645507812, 121.75869750976562, 685.2430419921875, 27.855209350585938, 361.39306640625, 655.20263671875, -151.75592041015625, 261.28643798828125, -250.45767211914062, 159.57464599609375, 476.2535705566406, 333.1395263671875, 177.27932739257812, -25.775049209594727, 271.6036376953125, 486.5636901855469, 152.19337463378906, 257.77825927734375, 454.7303466796875, 529.2811279296875, 127.5705337524414, 637.6309204101562, -164.41729736328125, 693.5616455078125, 365.2469787597656, -198.62672424316406, 223.5390625, 200.15167236328125, 54.23838806152344, 243.78213500976562, 60.880733489990234, 197.52525329589844, 282.50579833984375, -71.7337875366211, 514.8443603515625, 343.79840087890625, 531.1136474609375, 603.80712890625, 415.87744140625, 280.3814392089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000339.npy"}
{"epoch": 0.4977973568281938, "step": 340, "batch_size": 64, "mean": 195.61285400390625, "std": 276.29559326171875, "min": -455.92327880859375, "p10": -132.54623336791988, "median": 164.7219467163086, "p90": 565.9695007324219, "max": 975.591552734375, "pos_frac": 0.78125, "sample": [168.87042236328125, 252.19985961914062, 94.85028839111328, 204.8125, 210.58140563964844, 492.2174377441406, 652.5108642578125, -455.92327880859375, 275.80035400390625, -62.788963317871094, 354.9290771484375, 975.591552734375, 135.20913696289062, -257.2806701660156, 571.599609375, -77.02721405029297, 304.78143310546875, 126.88903045654297, -44.26618194580078, 62.031185150146484, 182.71824645996094, -195.23873901367188, -355.8260803222656, 552.8325805664062, -152.1031036376953, 137.42477416992188, 38.705955505371094, 419.6033935546875, -34.268463134765625, 314.2471618652344, 521.2552490234375, 83.91285705566406, 859.2498168945312, 662.6228637695312, 5.284421920776367, 160.57347106933594, 118.82989501953125, 250.62774658203125, 133.3632354736328, 198.86605834960938, 224.53079223632812, 126.35409545898438, 211.21261596679688, -14.458560943603516, -158.27186584472656, 308.7403259277344, 253.8404541015625, -86.91353607177734, 524.1083374023438, 7.0615081787109375, 139.06924438476562, 436.50750732421875, 607.20458984375, 602.2591552734375, 368.56207275390625, 207.73226928710938, 130.64002990722656, 45.481449127197266, 387.4795227050781, 154.83920288085938, -275.072265625, 406.6091613769531, -55.544830322265625, 78.98235321044922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000340.npy"}
{"epoch": 0.49926578560939794, "step": 341, "batch_size": 64, "mean": 225.42550659179688, "std": 265.3753967285156, "min": -302.9471435546875, "p10": -72.36225662231445, "median": 204.67798614501953, "p90": 541.249090576172, "max": 1200.242919921875, "pos_frac": 0.796875, "sample": [1200.242919921875, -94.2799072265625, -77.35621643066406, -211.06199645996094, 406.3830871582031, 553.3726806640625, 697.4271240234375, 488.5329895019531, 206.3834686279297, 202.97250366210938, -5.543245315551758, 173.47918701171875, 132.75894165039062, 266.7592468261719, 28.693464279174805, -56.96946716308594, -55.82838439941406, 339.12860107421875, 69.7442855834961, 294.32257080078125, -145.28515625, 386.8619384765625, 426.71826171875, 257.49774169921875, 71.04598236083984, 584.6060791015625, 159.43093872070312, 197.68991088867188, -21.336166381835938, -77.18901062011719, 155.00607299804688, 365.75933837890625, 616.345458984375, 297.9347229003906, 279.1236572265625, 439.0452575683594, 92.71473693847656, 24.471277236938477, -61.099830627441406, 446.06634521484375, 489.99102783203125, 274.85052490234375, 64.64720153808594, 109.09821319580078, -302.9471435546875, 551.3076782226562, 68.97065734863281, 234.83470153808594, -265.3128662109375, 361.88507080078125, 216.21600341796875, 267.34521484375, 415.0831298828125, -59.239173889160156, 769.5956420898438, 17.820236206054688, 345.15911865234375, 450.3446044921875, 433.9737548828125, 46.2474479675293, 138.18356323242188, 110.0634536743164, 517.779052734375, 116.76537322998047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000341.npy"}
{"epoch": 0.5007342143906021, "step": 342, "batch_size": 64, "mean": 199.92034912109375, "std": 274.50823974609375, "min": -589.0936889648438, "p10": -89.56423645019531, "median": 162.89828491210938, "p90": 507.3732727050783, "max": 998.4908447265625, "pos_frac": 0.75, "sample": [715.7659301757812, -589.0936889648438, -200.48300170898438, 544.6407470703125, 62.662479400634766, 36.086639404296875, 156.4041748046875, 378.08258056640625, 346.6519775390625, 324.3025817871094, 147.3892059326172, 103.94651794433594, 201.83953857421875, 193.1957244873047, 527.299560546875, 304.2987060546875, 88.81779479980469, 449.6796569824219, -185.49240112304688, -35.82151794433594, 409.78094482421875, -78.5289535522461, 79.31924438476562, -86.1429672241211, -91.0304946899414, 317.9162292480469, 990.0318603515625, 364.65777587890625, 115.51573181152344, -243.5421600341797, -63.93940353393555, 998.4908447265625, 212.06915283203125, 198.52420043945312, -2.6533660888671875, -65.8255844116211, 560.0706176757812, 440.9184875488281, 577.7898559570312, 136.24012756347656, -19.764097213745117, 166.70269775390625, 351.5362854003906, -39.887298583984375, 442.1194152832031, 381.326904296875, 159.0938720703125, 386.4883728027344, 107.15997314453125, 416.42938232421875, 457.1092224121094, 101.8538589477539, -143.74761962890625, 109.87956237792969, 367.1031494140625, -97.95024871826172, 168.5762481689453, -81.13703155517578, 189.94427490234375, 123.21839904785156, 226.62551879882812, 460.87860107421875, 137.69491577148438, 83.8119125366211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000342.npy"}
{"epoch": 0.5022026431718062, "step": 343, "batch_size": 64, "mean": 242.99090576171875, "std": 215.8848114013672, "min": -346.01031494140625, "p10": 15.957244110107437, "median": 225.85455322265625, "p90": 509.2142028808595, "max": 833.1605834960938, "pos_frac": 0.921875, "sample": [380.3824157714844, 197.71722412109375, 189.8005828857422, 139.31512451171875, 238.91470336914062, 9.619110107421875, 313.8232421875, 345.66839599609375, 197.92572021484375, 703.9063110351562, 137.08753967285156, -148.32000732421875, 267.24786376953125, 82.41574096679688, 195.02590942382812, 90.19927978515625, 70.9078369140625, 243.4319305419922, 240.81613159179688, 588.8370361328125, 183.35365295410156, 284.5170593261719, 65.18232727050781, 301.2117614746094, 399.60528564453125, 335.2684631347656, 136.95314025878906, 110.82150268554688, 833.1605834960938, 203.4805145263672, 30.74622344970703, 92.52915954589844, 342.9205017089844, 59.398193359375, 268.83062744140625, 334.9310302734375, 244.40298461914062, 92.32200622558594, 6.311031341552734, 443.8487243652344, -346.01031494140625, 321.8801574707031, 163.87979125976562, 425.27716064453125, 317.7696228027344, 768.3525390625, 422.180908203125, 313.61846923828125, 266.6272277832031, 325.8477783203125, 187.17730712890625, 383.5408630371094, 50.75115966796875, -99.94927978515625, 487.57867431640625, 518.486572265625, 179.28965759277344, 164.0310821533203, 185.72299194335938, 212.79440307617188, -38.43262481689453, 643.7760009765625, 621.0908203125, -178.37994384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000343.npy"}
{"epoch": 0.5036710719530103, "step": 344, "batch_size": 64, "mean": 206.59829711914062, "std": 286.664794921875, "min": -272.1086120605469, "p10": -46.819253540039064, "median": 167.5109100341797, "p90": 556.1293334960938, "max": 1542.0396728515625, "pos_frac": 0.796875, "sample": [37.47114562988281, -74.33667755126953, -44.37937927246094, -12.806312561035156, 52.9300422668457, 74.44680786132812, 160.1852264404297, 110.4963150024414, 78.11216735839844, 256.71661376953125, 4.944368362426758, 858.64794921875, 102.0177230834961, 229.69749450683594, 85.97660827636719, 163.31414794921875, 215.3296661376953, -253.72935485839844, -272.1086120605469, 275.374755859375, 265.13177490234375, 53.91810607910156, 187.1180877685547, 4.909778594970703, 214.39190673828125, 552.8023681640625, 622.9296264648438, 411.4470520019531, 95.12002563476562, -20.899169921875, 392.8197326660156, 235.98779296875, 303.6985778808594, 72.33761596679688, 189.73541259765625, -46.969757080078125, 344.24847412109375, -46.46807861328125, 260.01556396484375, -4.01317024230957, 561.0806884765625, 278.3226013183594, 175.8265838623047, 92.79912567138672, 77.13333892822266, 1542.0396728515625, 412.1560363769531, 297.3023376464844, 6.540666580200195, 638.4414672851562, -23.853172302246094, 498.6276550292969, 171.70767211914062, 563.0457763671875, 93.07844543457031, -195.10060119628906, -156.23394775390625, 474.63482666015625, 557.55517578125, -216.16305541992188, 261.971923828125, 381.1175231933594, 88.72541809082031, 504.9723205566406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000344.npy"}
{"epoch": 0.5051395007342144, "step": 345, "batch_size": 64, "mean": 234.78372192382812, "std": 278.74237060546875, "min": -406.66937255859375, "p10": -57.9231948852539, "median": 188.32535552978516, "p90": 611.0756835937501, "max": 950.9000244140625, "pos_frac": 0.796875, "sample": [-29.96521759033203, 628.5743408203125, 143.65631103515625, 421.6838684082031, 141.88832092285156, -208.68695068359375, 735.4859619140625, -217.9757843017578, 178.32281494140625, 472.23834228515625, -26.628957748413086, 308.40606689453125, 138.8992919921875, 423.0138244628906, 570.2454833984375, 80.81796264648438, -355.8051452636719, 346.4570617675781, 60.64081954956055, 88.87391662597656, 415.1438903808594, 417.7447509765625, 82.71650695800781, 367.64019775390625, 234.0072021484375, 552.0321044921875, 105.00313568115234, 321.40667724609375, 117.76712036132812, 195.0669708251953, 12.45489501953125, 866.3322143554688, 420.4962158203125, 23.483688354492188, 99.802734375, 387.0102233886719, 444.43109130859375, 476.8306884765625, 382.14349365234375, 251.5159454345703, 407.48065185546875, 950.9000244140625, 53.36947250366211, 124.56256103515625, 384.32244873046875, 255.90982055664062, -23.922252655029297, 752.5687255859375, 154.50656127929688, 450.1622314453125, -61.213645935058594, -96.91666412353516, 235.78799438476562, 660.0609741210938, -12.508939743041992, -153.75082397460938, -50.24547576904297, 129.42550659179688, 686.3099365234375, 181.583740234375, 218.04351806640625, 135.66473388671875, -406.66937255859375, -22.446487426757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000345.npy"}
{"epoch": 0.5066079295154186, "step": 346, "batch_size": 64, "mean": 223.27017211914062, "std": 262.3050537109375, "min": -581.8436279296875, "p10": -101.6506507873535, "median": 237.97847747802734, "p90": 582.5382690429689, "max": 774.1378173828125, "pos_frac": 0.796875, "sample": [312.67779541015625, 168.93336486816406, 61.95824432373047, 291.80853271484375, 670.8939208984375, 528.1800537109375, 668.3870849609375, 228.55621337890625, 168.7998504638672, 760.7730102539062, -581.8436279296875, 93.21733093261719, 405.96044921875, 9.67642593383789, 360.2664489746094, 664.0234375, -20.824462890625, 326.0106201171875, 774.1378173828125, 13.694215774536133, 382.29168701171875, 180.74850463867188, -139.99578857421875, 392.0516357421875, 333.7622375488281, 294.78076171875, 325.10003662109375, 243.69866943359375, 404.8995361328125, 144.69003295898438, 8.423643112182617, -106.32662200927734, 212.95201110839844, 84.02534484863281, 613.33251953125, 232.25828552246094, 263.586669921875, 130.7667236328125, 291.73468017578125, -160.44996643066406, -31.171669006347656, 595.9351806640625, 308.5205993652344, 538.5280151367188, 160.7188262939453, 377.86224365234375, 418.238037109375, 359.506103515625, -18.638935089111328, -67.04766845703125, 257.5100402832031, -65.33622741699219, 66.10690307617188, -199.37196350097656, 149.4930877685547, 551.27880859375, 116.09115600585938, 138.91583251953125, 472.0673522949219, -200.45138549804688, -90.74005126953125, -169.24041748046875, 289.54254150390625, 293.3576965332031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000346.npy"}
{"epoch": 0.5080763582966226, "step": 347, "batch_size": 64, "mean": 222.094482421875, "std": 288.4271240234375, "min": -362.2097473144531, "p10": -104.44055175781249, "median": 180.3140640258789, "p90": 607.3411437988283, "max": 906.79248046875, "pos_frac": 0.765625, "sample": [-362.2097473144531, 273.6938171386719, 452.1251220703125, 370.8460998535156, 75.13335418701172, -34.343971252441406, 191.42355346679688, 304.13018798828125, 708.7015380859375, 9.980400085449219, 468.96942138671875, 32.14906311035156, 198.8563995361328, -109.92556762695312, 418.9510498046875, -261.75799560546875, -277.954833984375, -91.64218139648438, 798.8404541015625, 105.8744125366211, 496.8995056152344, 56.106204986572266, 172.68853759765625, 318.56646728515625, -199.11273193359375, 535.0902709960938, 569.693115234375, 343.8525390625, -7.850700378417969, 709.7235107421875, 338.37225341796875, 692.3095092773438, 328.2703857421875, -57.497474670410156, 223.283447265625, -70.54680633544922, 501.550048828125, 153.14013671875, 118.81256103515625, -82.04911804199219, 145.60110473632812, 290.9647521972656, 86.68651580810547, 334.556396484375, 159.02865600585938, 66.63943481445312, 120.51959228515625, -28.32508087158203, 906.79248046875, 89.46414947509766, 365.87353515625, 623.4760131835938, 187.93959045410156, 844.5555419921875, -86.6343765258789, 36.855201721191406, 370.4533386230469, -209.0651397705078, 462.77093505859375, 2.96466064453125, 466.3639831542969, -134.80821228027344, 151.1586151123047, 547.0722045898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000347.npy"}
{"epoch": 0.5095447870778267, "step": 348, "batch_size": 64, "mean": 187.81101989746094, "std": 270.87396240234375, "min": -363.18511962890625, "p10": -127.00883560180662, "median": 188.74897003173828, "p90": 458.00883483886724, "max": 1167.1175537109375, "pos_frac": 0.765625, "sample": [300.0367126464844, 79.13262176513672, 157.44174194335938, 428.04278564453125, 326.1592102050781, -115.8089370727539, 60.11890411376953, 364.3653869628906, 373.88958740234375, -363.18511962890625, 85.53770446777344, 68.18896484375, 633.8875732421875, 251.04122924804688, 126.69551086425781, 322.2403564453125, 9.74940299987793, 353.5805358886719, 123.62872314453125, 142.59201049804688, 334.1405944824219, 329.27947998046875, -100.47856903076172, 1167.1175537109375, 463.8838195800781, 257.09906005859375, -41.241600036621094, 255.54653930664062, -45.42974853515625, 61.78374481201172, -65.44172668457031, -185.90757751464844, -165.60916137695312, 849.7902221679688, 626.219482421875, 334.08087158203125, 340.1655578613281, 280.357666015625, 248.71119689941406, 1.5337047576904297, -273.1419677734375, 438.28594970703125, -39.21833038330078, 327.6294860839844, 267.3740234375, 248.01449584960938, -189.6295166015625, 28.311128616333008, 574.129150390625, 244.6401824951172, 141.15585327148438, -81.71806335449219, 444.300537109375, -272.66741943359375, 361.6112060546875, 220.0561981201172, 73.97748565673828, 15.19351577758789, 29.60760498046875, 559.4339599609375, 87.71505737304688, 329.8070983886719, -131.8087921142578, -56.08981704711914], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000348.npy"}
{"epoch": 0.5110132158590308, "step": 349, "batch_size": 64, "mean": 217.50335693359375, "std": 306.0675048828125, "min": -475.1156311035156, "p10": -101.57076568603513, "median": 139.35480117797852, "p90": 572.3873413085938, "max": 1244.89794921875, "pos_frac": 0.8125, "sample": [115.1472396850586, -347.546630859375, 481.9090270996094, 117.79792022705078, 105.83285522460938, 118.08486938476562, 464.1806640625, -113.33138275146484, 301.5773010253906, 451.5367736816406, 157.56085205078125, 510.467529296875, 124.91922760009766, -39.132102966308594, 28.788557052612305, 88.30290222167969, -33.52393341064453, 10.448997497558594, 267.22113037109375, -170.16900634765625, 533.7699584960938, 367.2481384277344, 353.5882263183594, 17.546850204467773, 347.1381530761719, 2.361858367919922, 432.79345703125, -74.12932586669922, 120.6245346069336, 195.31881713867188, 798.7991333007812, 11.050407409667969, -51.56449508666992, 521.2607421875, 196.1505126953125, -475.1156311035156, -169.02255249023438, 105.30204010009766, 604.271728515625, 59.830265045166016, 743.2655029296875, 10.107879638671875, -247.45477294921875, -44.55085754394531, 555.8693237304688, 610.2361450195312, 949.9808349609375, 191.28456115722656, 579.4664916992188, 153.79037475585938, 410.99627685546875, 428.15936279296875, 8.837760925292969, 336.0804443359375, 283.98284912109375, 260.5033264160156, -163.83132934570312, 76.61701965332031, 8.304454803466797, 482.6034240722656, 392.1660461425781, 37.7109375, 73.89422607421875, 1244.89794921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000349.npy"}
{"epoch": 0.5124816446402349, "step": 350, "batch_size": 64, "mean": 210.67300415039062, "std": 290.224609375, "min": -559.404052734375, "p10": -168.65575714111327, "median": 202.6007537841797, "p90": 552.2191833496096, "max": 919.5656127929688, "pos_frac": 0.8125, "sample": [781.247802734375, -393.1530456542969, 127.42456817626953, 406.3968200683594, 318.8140563964844, 196.0748291015625, 469.2659912109375, 90.33535766601562, -129.0419464111328, 122.57844543457031, 100.57023620605469, 410.20953369140625, -466.1943359375, 133.98313903808594, 200.71319580078125, -1.860015869140625, 658.7420043945312, 502.51837158203125, 919.5656127929688, 269.7825927734375, 189.19668579101562, 216.2998809814453, -31.001150131225586, 473.0497131347656, 252.51397705078125, 728.1618041992188, 163.83480834960938, -189.01556396484375, 153.504638671875, 45.95392608642578, 573.51953125, 87.78376770019531, 178.46487426757812, 170.22305297851562, 365.5515441894531, 306.52642822265625, 179.88482666015625, 302.66912841796875, -269.94793701171875, 699.080078125, 184.51353454589844, 501.85003662109375, 307.5508728027344, -168.52682495117188, 164.96868896484375, 215.6588134765625, 651.4196166992188, 425.8121032714844, 213.76577758789062, 300.64398193359375, -559.404052734375, 460.55084228515625, 206.2333526611328, 260.2356872558594, 97.00862884521484, 204.48831176757812, -345.2802734375, -94.80532836914062, 48.695457458496094, 186.12918090820312, 402.96148681640625, 209.27572631835938, -168.7110137939453, 463.8109130859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000350.npy"}
{"epoch": 0.5139500734214391, "step": 351, "batch_size": 64, "mean": 249.109375, "std": 315.7483825683594, "min": -362.5836181640625, "p10": -121.31057586669915, "median": 225.5657958984375, "p90": 686.1263916015625, "max": 1165.0716552734375, "pos_frac": 0.765625, "sample": [108.01669311523438, 289.76031494140625, 493.60028076171875, 991.5946044921875, 1165.0716552734375, 111.67298889160156, 419.71063232421875, 110.33424377441406, 267.525146484375, 312.4178466796875, 34.46826171875, 224.31637573242188, 715.7949829101562, 801.679443359375, 366.822998046875, 383.0326232910156, 508.03863525390625, 578.3657836914062, 689.7197265625, -30.2067928314209, -254.69711303710938, 407.2801208496094, 132.02468872070312, 242.154296875, 177.92330932617188, 374.4780578613281, 540.7890625, 224.2983856201172, -362.5836181640625, -51.97123718261719, -278.0638732910156, 192.3145751953125, -174.0430908203125, 106.6115951538086, 14.45025634765625, 863.6439208984375, 27.48735809326172, 155.52020263671875, 226.81521606445312, 9.159385681152344, -151.02743530273438, 406.4799499511719, -22.332988739013672, 677.741943359375, 39.197906494140625, -42.96129608154297, 187.9703826904297, -20.56268310546875, -14.43341064453125, -227.7377166748047, -39.306129455566406, 658.2700805664062, 240.03147888183594, 328.8087158203125, 727.476318359375, -181.70091247558594, 298.33038330078125, 660.6409912109375, 330.5956726074219, 281.9280090332031, 167.6889190673828, 320.8551025390625, 248.31837463378906, -46.59984588623047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000351.npy"}
{"epoch": 0.5154185022026432, "step": 352, "batch_size": 64, "mean": 163.22300720214844, "std": 270.824951171875, "min": -548.5924682617188, "p10": -124.13136901855466, "median": 135.60675048828125, "p90": 505.34832153320315, "max": 988.5267944335938, "pos_frac": 0.6875, "sample": [387.8777160644531, 370.7383117675781, 162.42044067382812, 468.0722961425781, -328.4892883300781, 124.62850189208984, 114.91226959228516, 426.7314453125, 97.49577331542969, 219.7432403564453, 625.7510986328125, 495.72430419921875, -38.3562126159668, 22.89806365966797, 215.28355407714844, -32.11872863769531, -86.2177734375, 988.5267944335938, 288.7672424316406, -135.63987731933594, 204.45223999023438, 736.7357177734375, 135.73899841308594, 431.5301513671875, 71.87773132324219, -38.711708068847656, -93.47634887695312, -220.89886474609375, 45.086463928222656, 113.2637939453125, -89.86337280273438, 509.472900390625, 17.240291595458984, 35.287620544433594, 559.0362548828125, 138.08282470703125, -548.5924682617188, 89.48006439208984, 238.93267822265625, 372.64453125, 239.7925567626953, 196.69125366210938, 187.64393615722656, -202.01034545898438, -62.47053909301758, -57.430938720703125, -185.72189331054688, 544.9572143554688, 463.031494140625, -185.4149627685547, 270.81829833984375, -59.962852478027344, -77.29170989990234, 468.72906494140625, 258.5804138183594, 201.53091430664062, 408.75152587890625, -4.577568054199219, 557.3574829101562, 135.47450256347656, -2.393890380859375, -97.27818298339844, 226.74270629882812, 124.65505981445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000352.npy"}
{"epoch": 0.5168869309838473, "step": 353, "batch_size": 64, "mean": 205.90402221679688, "std": 277.3384704589844, "min": -422.21875, "p10": -82.2024528503418, "median": 165.5747299194336, "p90": 631.4282897949219, "max": 838.9122314453125, "pos_frac": 0.71875, "sample": [384.8399658203125, 360.5799865722656, 27.24181365966797, 454.84344482421875, 158.81219482421875, 402.8148498535156, 57.30754089355469, -101.98181915283203, 569.3756713867188, 203.1766357421875, 221.84579467773438, -80.83997344970703, 267.84991455078125, 630.7586669921875, 69.61056518554688, 423.23370361328125, -136.3164520263672, 52.823455810546875, 579.0679931640625, 641.4387817382812, 159.4966278076172, 294.9997863769531, -33.06424331665039, 90.53616333007812, -422.21875, 363.29150390625, -237.62640380859375, -79.36359405517578, -82.2647476196289, 270.6630554199219, -28.69227409362793, 171.65283203125, 327.900146484375, 331.2127685546875, 35.483551025390625, -30.100154876708984, 46.984405517578125, -156.73519897460938, 810.0831298828125, 1.605499267578125, -53.43011474609375, 641.6067504882812, -24.250526428222656, 60.67230987548828, 118.44405364990234, 340.28216552734375, 337.18280029296875, 202.38670349121094, -22.548309326171875, 645.677001953125, 266.5663146972656, -49.47261047363281, 593.7626342773438, 372.9651794433594, 838.9122314453125, 770.0934448242188, -161.40098571777344, 357.7911376953125, 631.7152709960938, -82.05709838867188, 91.60472869873047, 83.05088806152344, -4.27911376953125, 202.25631713867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000353.npy"}
{"epoch": 0.5183553597650514, "step": 354, "batch_size": 64, "mean": 225.83035278320312, "std": 217.39413452148438, "min": -169.05807495117188, "p10": -45.262419128417946, "median": 189.5939483642578, "p90": 465.3043975830078, "max": 857.239990234375, "pos_frac": 0.84375, "sample": [303.31182861328125, 438.90057373046875, 236.46250915527344, 58.29266357421875, 463.8636169433594, 186.97348022460938, 623.3359375, -84.85835266113281, 225.27944946289062, -99.35398864746094, -19.801307678222656, -169.05807495117188, 249.4545135498047, 857.239990234375, 432.9577331542969, 180.69216918945312, -15.814933776855469, 379.925537109375, 474.9245910644531, 284.3936767578125, 30.30628204345703, 375.01678466796875, 175.38699340820312, 87.44744873046875, -73.37310791015625, 128.60459899902344, 378.5347595214844, 214.92433166503906, 112.73128509521484, 362.8458251953125, 492.3572998046875, -109.75169372558594, 289.4952697753906, 267.2205505371094, 164.2452392578125, 145.46339416503906, -56.17432403564453, 457.1093444824219, 192.21441650390625, 100.05641174316406, 437.8799743652344, 7.404121398925781, 367.18463134765625, 181.46591186523438, 42.08618927001953, 316.1876220703125, 96.7138900756836, 664.2189331054688, 393.4412841796875, -16.519485473632812, 14.693960189819336, 388.2969970703125, 366.73638916015625, 111.76849365234375, -80.87805938720703, 741.507080078125, 71.80931854248047, 370.8953857421875, 398.09100341796875, 72.09089660644531, 465.921875, 142.42816162109375, 152.73135375976562, 5.203243255615234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000354.npy"}
{"epoch": 0.5198237885462555, "step": 355, "batch_size": 64, "mean": 217.17283630371094, "std": 231.02503967285156, "min": -327.46044921875, "p10": -49.661719131469724, "median": 205.7917251586914, "p90": 510.7481903076172, "max": 887.240234375, "pos_frac": 0.796875, "sample": [116.16668701171875, 533.6483154296875, 278.3053894042969, 505.6816711425781, 40.656883239746094, 252.47349548339844, 245.6171875, 887.240234375, -293.6103515625, 277.38037109375, 209.0682373046875, 603.9563598632812, -13.537178039550781, -17.543163299560547, 447.95989990234375, 503.3978271484375, -99.651123046875, 558.8836059570312, 160.5736541748047, 140.1217041015625, 425.26165771484375, 260.8700866699219, -64.52674865722656, -8.357181549072266, 373.9976806640625, 512.9195556640625, 465.44366455078125, 257.1672058105469, 151.218994140625, -2.527587890625, -52.315181732177734, 194.07745361328125, 29.709671020507812, -327.46044921875, 211.65443420410156, -142.09410095214844, 334.6954345703125, 613.1546630859375, 312.1220703125, 699.6394653320312, 99.28009796142578, 202.5152130126953, 117.64109802246094, 169.88711547851562, 139.87185668945312, 269.8138732910156, 106.79373931884766, 409.9886779785156, 88.4473876953125, 194.48794555664062, 476.268798828125, 314.7740173339844, 311.76983642578125, 223.64111328125, 111.10945129394531, -43.470306396484375, 120.04212951660156, -112.44973754882812, 221.80325317382812, -22.79788589477539, 71.4034423828125, 324.9000244140625, 370.2427978515625, 151.65655517578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000355.npy"}
{"epoch": 0.5212922173274597, "step": 356, "batch_size": 64, "mean": 123.26990509033203, "std": 301.80596923828125, "min": -621.2181396484375, "p10": -238.04916839599608, "median": 97.69329833984375, "p90": 515.8371734619142, "max": 956.5797119140625, "pos_frac": 0.65625, "sample": [219.978515625, 8.569938659667969, 349.00836181640625, 76.84358978271484, -42.15108108520508, -90.00392150878906, -98.74748229980469, 220.48291015625, -103.7808837890625, 217.7760009765625, 319.6617431640625, 0.7723579406738281, 723.2360229492188, 478.6294860839844, -101.9127197265625, 559.1945190429688, -151.67897033691406, -212.13209533691406, -621.2181396484375, -243.90524291992188, 39.43730926513672, 766.0503540039062, -103.25559997558594, 121.19192504882812, 346.813720703125, 531.7833251953125, -59.290855407714844, 372.8605041503906, -297.1271667480469, 54.36608123779297, -270.709228515625, 673.0693969726562, 98.75187683105469, -282.7626953125, 411.4334716796875, 96.63471984863281, 268.80462646484375, 352.129638671875, -220.1803436279297, 77.13975524902344, -78.54130554199219, -106.9095230102539, 165.183837890625, 261.60943603515625, 8.365638732910156, 239.79788208007812, 285.79559326171875, -224.38499450683594, 696.7535400390625, 251.27810668945312, 181.09735107421875, 27.696990966796875, -326.26910400390625, -4.186836242675781, 230.06100463867188, 107.56620025634766, 154.21067810058594, 281.9537048339844, -448.1015930175781, 956.5797119140625, 371.2412109375, 308.2940673828125, -3.8376846313476562, 68.25614166259766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000356.npy"}
{"epoch": 0.5227606461086637, "step": 357, "batch_size": 64, "mean": 204.9359588623047, "std": 254.5968017578125, "min": -643.0918579101562, "p10": -50.497539520263665, "median": 208.58535766601562, "p90": 569.1196350097656, "max": 813.190673828125, "pos_frac": 0.796875, "sample": [375.7826232910156, 568.7337036132812, -22.680679321289062, 627.0646362304688, 511.2090148925781, 12.186019897460938, 72.77601623535156, 427.3900146484375, 148.82284545898438, 569.2850341796875, 278.397705078125, 72.59542083740234, -111.18607330322266, 552.9832153320312, -54.62481689453125, 126.67960357666016, 221.98178100585938, 223.89639282226562, 296.52337646484375, -40.27961730957031, 813.190673828125, -40.867225646972656, 328.30377197265625, -2.9262847900390625, -643.0918579101562, 52.93991470336914, 266.23272705078125, 161.96212768554688, -59.361289978027344, 584.8221435546875, 257.7695617675781, -157.07855224609375, 479.7547302246094, -149.72006225585938, 339.48651123046875, 12.974834442138672, 415.766357421875, 794.63427734375, 301.7293701171875, 287.8081970214844, 2.760223388671875, -5.278436660766602, 292.34490966796875, 587.9090576171875, 87.05841827392578, 102.72848510742188, 68.36790466308594, 235.87826538085938, 9.660152435302734, 202.7564697265625, -21.130613327026367, 304.31890869140625, 214.41424560546875, 10.398513793945312, 289.08062744140625, 301.94940185546875, 115.3683853149414, 304.242919921875, -67.96731567382812, 238.79725646972656, 125.15278625488281, 662.8575439453125, 14.98876953125, 137.37802124023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000357.npy"}
{"epoch": 0.5242290748898678, "step": 358, "batch_size": 64, "mean": 193.22451782226562, "std": 253.00112915039062, "min": -405.0308837890625, "p10": -54.989661026000974, "median": 164.70167541503906, "p90": 514.1653564453127, "max": 873.9393310546875, "pos_frac": 0.765625, "sample": [71.14309692382812, 615.77294921875, 74.85267639160156, 295.2507019042969, 234.7978973388672, 364.5135192871094, 44.11436462402344, 290.56640625, 120.34248352050781, 84.10945129394531, -14.104438781738281, -60.88946533203125, -129.16256713867188, -27.704421997070312, 11.228322982788086, 705.430908203125, -35.446189880371094, 529.69921875, 51.865814208984375, 95.33175659179688, 36.12548065185547, 6.3000335693359375, 17.977737426757812, 57.321266174316406, 165.96713256835938, -405.0308837890625, 339.14691162109375, -157.0172119140625, 873.9393310546875, 410.62322998046875, 450.95831298828125, 174.9915771484375, 302.56512451171875, -19.362037658691406, 570.6104125976562, 204.03424072265625, -17.21746063232422, 82.72723388671875, -48.72325897216797, 355.1905822753906, 330.75018310546875, 432.1198425292969, -147.69256591796875, 316.203369140625, 410.5723571777344, -56.256866455078125, 351.2312927246094, 821.76513671875, -52.0328483581543, 100.95378112792969, 388.90216064453125, 165.18508911132812, 99.35104370117188, -18.56678009033203, 477.919677734375, 866.8441162109375, 88.11620330810547, 171.79202270507812, 228.55259704589844, -71.69400024414062, 186.8194122314453, 164.21826171875, 219.1600341796875, 169.315673828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000358.npy"}
{"epoch": 0.5256975036710719, "step": 359, "batch_size": 64, "mean": 226.85186767578125, "std": 291.6161804199219, "min": -424.29290771484375, "p10": -117.23266601562499, "median": 179.84461975097656, "p90": 629.8946411132814, "max": 1042.3411865234375, "pos_frac": 0.78125, "sample": [-424.29290771484375, 239.85284423828125, 251.2032470703125, 493.4065856933594, -175.52719116210938, 709.6019897460938, -171.73947143554688, 929.85791015625, 139.5081787109375, -78.84074401855469, -120.56642150878906, -53.795509338378906, 368.181396484375, 316.9270935058594, -205.48883056640625, 41.20790100097656, 387.42059326171875, -37.710601806640625, 437.789794921875, -145.48025512695312, 297.2698059082031, 216.912841796875, 489.3533935546875, 90.60033416748047, 24.499835968017578, 153.6354522705078, 31.32541847229004, 514.13720703125, 103.1246337890625, 25.145469665527344, 465.154541015625, 329.64666748046875, 1042.3411865234375, 450.53936767578125, -109.45390319824219, 45.71497344970703, 187.1065673828125, 101.26506805419922, -190.062744140625, 700.5738525390625, 165.01089477539062, 473.9675598144531, 253.05210876464844, 354.07037353515625, -44.950714111328125, 164.79542541503906, 481.84197998046875, 236.57257080078125, 126.19458770751953, 692.501220703125, 53.036170959472656, -37.25907897949219, -52.90000915527344, 351.9571838378906, 191.2736358642578, 641.3302001953125, 3.171661376953125, 235.92039489746094, 773.8922119140625, 517.6588134765625, 139.77407836914062, 603.211669921875, 151.4687042236328, 172.58267211914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000359.npy"}
{"epoch": 0.527165932452276, "step": 360, "batch_size": 64, "mean": 171.06539916992188, "std": 208.84901428222656, "min": -154.85736083984375, "p10": -77.15728683471679, "median": 123.34841918945312, "p90": 467.5110412597657, "max": 663.939697265625, "pos_frac": 0.75, "sample": [54.676605224609375, 475.98602294921875, -0.34433746337890625, 123.3043212890625, -67.32466888427734, 365.46478271484375, -21.562583923339844, 183.53062438964844, 340.7674865722656, 184.21730041503906, 113.92777252197266, 495.3102722167969, -117.86405944824219, 307.23486328125, 663.939697265625, 121.28193664550781, 36.54471969604492, 21.953262329101562, 315.05462646484375, 64.14236450195312, 408.5226745605469, 213.43136596679688, -55.29540252685547, 249.2801971435547, 48.85042953491211, 329.68475341796875, 447.736083984375, 21.312301635742188, 7.163043975830078, 496.9223937988281, 541.263671875, 360.1871032714844, 532.26953125, -60.55027770996094, 52.101173400878906, 142.99183654785156, 185.3875732421875, 111.12945556640625, 398.91839599609375, -80.55560302734375, 93.25907897949219, 628.3193359375, -145.58526611328125, -108.10052490234375, -19.439422607421875, 160.73541259765625, -154.85736083984375, 163.42884826660156, -26.2668514251709, -128.95216369628906, -4.0553131103515625, 354.4296875, 23.860998153686523, 340.9753723144531, 352.7489318847656, 258.8656005859375, 283.77569580078125, -94.2626724243164, 123.39251708984375, 408.0360412597656, 101.03120422363281, -69.2278823852539, 24.997215270996094, 370.0855712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000360.npy"}
{"epoch": 0.5286343612334802, "step": 361, "batch_size": 64, "mean": 172.15640258789062, "std": 256.9750061035156, "min": -437.1697082519531, "p10": -114.23766632080077, "median": 177.1668243408203, "p90": 538.2269409179688, "max": 762.0772705078125, "pos_frac": 0.71875, "sample": [21.334890365600586, 194.56243896484375, 2.519390106201172, 390.6424560546875, -66.4419937133789, -264.37554931640625, -35.97937774658203, 202.8860626220703, 548.3711547851562, 139.8985137939453, 178.58126831054688, 243.32406616210938, -437.1697082519531, 410.0882568359375, -64.66041564941406, -78.55204010009766, 212.23895263671875, 157.58876037597656, -278.33953857421875, 7.546201705932617, 228.18649291992188, -142.30096435546875, 39.36444091796875, -8.49197006225586, 211.92872619628906, 762.0772705078125, 307.3194274902344, 476.481201171875, 143.94677734375, 424.42816162109375, -54.39741516113281, -57.63291549682617, 186.7921905517578, 159.98867797851562, 443.76153564453125, 542.934326171875, -99.63929748535156, 746.73046875, 677.2399291992188, 486.3592529296875, 195.1734161376953, 159.97398376464844, 543.3037109375, 400.6988220214844, 175.75238037109375, 188.73419189453125, 76.02825164794922, 217.02523803710938, -56.0482177734375, -89.93538665771484, 44.138824462890625, -120.49411010742188, 527.2430419921875, -21.511520385742188, 333.75970458984375, 282.1224365234375, 143.75146484375, -143.8756866455078, 276.8079528808594, 5.6028289794921875, 202.35552978515625, -257.2837219238281, 546.5445556640625, 429.0020751953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000361.npy"}
{"epoch": 0.5301027900146843, "step": 362, "batch_size": 64, "mean": 270.275146484375, "std": 266.3769836425781, "min": -165.82254028320312, "p10": -38.70624675750731, "median": 226.66297149658203, "p90": 607.7518737792968, "max": 1299.4837646484375, "pos_frac": 0.828125, "sample": [100.33413696289062, 124.97161865234375, 497.37646484375, 598.0282592773438, 307.1829528808594, -16.28545379638672, 1299.4837646484375, 250.25473022460938, 145.36947631835938, 660.837646484375, 511.7833557128906, 110.68950653076172, 33.252235412597656, 141.21060180664062, 244.20729064941406, 512.4608154296875, 225.56761169433594, -45.86341094970703, 99.66448974609375, 367.09344482421875, 459.39306640625, 117.28994750976562, 32.813819885253906, -70.230224609375, -11.457992553710938, -22.006196975708008, 153.96905517578125, 189.0505828857422, -88.97039031982422, 620.1868286132812, 608.8117065429688, 128.4949188232422, -88.913818359375, 280.6134948730469, 278.7261962890625, 179.4718017578125, 544.5028076171875, -47.453773498535156, 204.98216247558594, 605.2789306640625, 237.5467071533203, 74.95970153808594, 724.771484375, 463.4432067871094, 135.72543334960938, 135.10699462890625, 393.81036376953125, 183.15325927734375, 40.12037658691406, 771.099365234375, 241.55502319335938, 689.792236328125, 528.4293212890625, 379.07977294921875, 227.75833129882812, 466.9929504394531, 527.7120361328125, -93.18991088867188, 137.68983459472656, 346.9886779785156, 287.7827453613281, 323.9691467285156, -3.036773681640625, -165.82254028320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000362.npy"}
{"epoch": 0.5315712187958884, "step": 363, "batch_size": 64, "mean": 278.87579345703125, "std": 276.615966796875, "min": -274.6435546875, "p10": -25.984047698974603, "median": 258.3417205810547, "p90": 635.4634216308594, "max": 1295.371337890625, "pos_frac": 0.84375, "sample": [-274.6435546875, 357.59722900390625, 615.2916259765625, 251.7391815185547, 108.55593872070312, 258.0469970703125, 289.0909423828125, 256.4764709472656, 236.69898986816406, 130.6510772705078, 358.65234375, 250.5505828857422, 338.423583984375, 18.59156036376953, 366.93280029296875, 875.128173828125, -48.20855712890625, 641.292724609375, 61.31919860839844, 724.2388305664062, 467.2854919433594, 69.9936752319336, 326.021240234375, 387.03326416015625, 295.1756896972656, 412.8154296875, 1295.371337890625, 579.2277221679688, -28.796142578125, 606.6651611328125, 183.89630126953125, 205.31375122070312, 29.494049072265625, 292.9000244140625, 457.0859680175781, 75.06824493408203, 245.1021728515625, -60.441986083984375, 640.2781982421875, 458.0518798828125, 409.1338806152344, -162.23812866210938, 40.918540954589844, 403.9211120605469, -35.99065399169922, 372.5360412597656, 624.2289428710938, 3.0277099609375, 261.79669189453125, 451.8146667480469, -19.42249298095703, 138.2668914794922, -120.39131927490234, 420.17156982421875, 258.6364440917969, 25.81037139892578, -18.167213439941406, 229.40545654296875, 49.09579086303711, 38.69483184814453, 699.6109008789062, 720.3073120117188, -6.158454895019531, 309.0723876953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000363.npy"}
{"epoch": 0.5330396475770925, "step": 364, "batch_size": 64, "mean": 206.98001098632812, "std": 284.074462890625, "min": -503.4626770019531, "p10": -75.51435012817382, "median": 197.69229125976562, "p90": 539.1287536621095, "max": 929.097412109375, "pos_frac": 0.765625, "sample": [455.67767333984375, 264.6579284667969, -201.2825164794922, 48.01299285888672, -503.4626770019531, 579.0126342773438, -278.07452392578125, 321.4465637207031, 237.22000122070312, 895.8645629882812, 183.46885681152344, 659.273681640625, 429.9105224609375, 347.839599609375, -67.28389739990234, 71.25350952148438, 54.5244140625, -262.9456481933594, 314.5033874511719, -27.165130615234375, -79.04168701171875, 441.8751525878906, 220.88922119140625, 428.6942138671875, 351.4113464355469, 18.056001663208008, -10.70938491821289, 508.51519775390625, 211.9157257080078, 306.22564697265625, -18.245407104492188, 685.1993408203125, 134.782958984375, -27.715362548828125, 89.14212799072266, 100.77423095703125, 431.357421875, 327.9013977050781, 284.1512756347656, -153.63455200195312, 102.0810775756836, 551.8283081054688, 693.09228515625, 96.96604919433594, 331.53594970703125, -20.01886749267578, -9.337259292602539, 386.50189208984375, 509.4964599609375, 371.9898986816406, 43.77534103393555, 172.11636352539062, 1.9919414520263672, 156.8759765625, 929.097412109375, 73.16552734375, 379.943603515625, 316.966796875, -60.17420196533203, 240.87164306640625, 59.2572021484375, 180.313720703125, 436.0001220703125, -471.6141662597656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000364.npy"}
{"epoch": 0.5345080763582967, "step": 365, "batch_size": 64, "mean": 220.86419677734375, "std": 265.739501953125, "min": -223.26499938964844, "p10": -73.94388465881346, "median": 171.18619537353516, "p90": 531.5027709960939, "max": 1258.5836181640625, "pos_frac": 0.84375, "sample": [187.06076049804688, 169.68460083007812, 86.23272705078125, 139.1869354248047, 138.885498046875, 140.01654052734375, 357.2535705566406, 101.04480743408203, 215.63714599609375, 104.47134399414062, 268.20379638671875, 492.94305419921875, -91.28053283691406, 221.4537353515625, 85.05195617675781, -40.789093017578125, 105.49674987792969, 739.561767578125, 577.9818725585938, 291.4723815917969, 146.86874389648438, 58.08362579345703, 65.5234603881836, 17.781005859375, 258.14410400390625, -123.00201416015625, 93.56991577148438, 35.50967788696289, 172.6877899169922, 190.96490478515625, -81.89768981933594, -55.385005950927734, 111.54312896728516, 37.35123062133789, 415.28662109375, 646.5156860351562, 435.568359375, 78.47817993164062, -152.69744873046875, 511.2640380859375, 270.8817138671875, -172.76690673828125, 226.06277465820312, 41.480491638183594, -11.607904434204102, 362.9383544921875, 258.74896240234375, 333.1632080078125, 337.0144348144531, -174.20184326171875, 52.086524963378906, 402.28076171875, 7.857919692993164, 278.1658630371094, 258.638916015625, 318.7627258300781, 1258.5836181640625, 540.176513671875, 454.7574462890625, -223.26499938964844, 836.149658203125, 809.84423828125, 127.795166015625, 390.03265380859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000365.npy"}
{"epoch": 0.5359765051395007, "step": 366, "batch_size": 64, "mean": 224.57408142089844, "std": 290.8706359863281, "min": -331.88525390625, "p10": -118.52190933227537, "median": 195.93498992919922, "p90": 603.5544738769531, "max": 1046.95556640625, "pos_frac": 0.78125, "sample": [-87.52205657958984, 203.11351013183594, 322.6123046875, 114.14015197753906, -273.9696044921875, 34.331138610839844, -93.45601654052734, 68.57464599609375, -84.23583984375, 465.42767333984375, 77.91476440429688, -226.40512084960938, 524.1589965820312, 712.299072265625, 188.7564697265625, -77.11372375488281, 331.1839294433594, 32.409847259521484, 141.08518981933594, -88.94038391113281, 327.1669006347656, 603.7803344726562, 360.21636962890625, 19.695053100585938, 575.6102294921875, 1046.95556640625, 352.88665771484375, 107.00676727294922, 619.2191162109375, 213.8502197265625, 180.15081787109375, -54.9912109375, -218.08255004882812, 593.9624633789062, 374.7530517578125, 360.50933837890625, 842.1207275390625, 352.7027893066406, 229.80657958984375, 124.15855407714844, 227.8479461669922, 144.67929077148438, 58.873199462890625, -175.45086669921875, 548.2000122070312, -158.68234252929688, 308.34442138671875, 41.4218635559082, 519.8411254882812, 603.0274658203125, 401.329345703125, 306.087158203125, 720.1510009765625, 377.0036926269531, 730.1119384765625, 252.14126586914062, -66.15644836425781, 40.94044494628906, -129.26443481445312, 125.99099731445312, 131.8443603515625, 102.2654800415039, 298.236572265625, -331.88525390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000366.npy"}
{"epoch": 0.5374449339207048, "step": 367, "batch_size": 64, "mean": 185.85562133789062, "std": 235.98663330078125, "min": -499.4107360839844, "p10": -67.85486907958985, "median": 130.7096405029297, "p90": 495.7415222167969, "max": 776.8563232421875, "pos_frac": 0.78125, "sample": [99.56817626953125, 146.11888122558594, 500.16741943359375, 82.30305480957031, 639.087646484375, -0.7113056182861328, 377.310791015625, 112.107666015625, 510.67596435546875, 80.51422882080078, 27.862564086914062, 1.2100849151611328, -65.00469970703125, 511.2389831542969, 467.7801513671875, 285.27606201171875, 450.292236328125, 128.13706970214844, 219.00379943847656, -69.07637023925781, 272.2195129394531, 402.89599609375, -75.44501495361328, -19.127479553222656, 377.2574768066406, 282.1978454589844, 776.8563232421875, -18.00483512878418, 532.77734375, 116.69415283203125, 466.83184814453125, 187.5366973876953, -25.639156341552734, 324.75396728515625, 74.63276672363281, 226.62844848632812, 330.40264892578125, 276.59307861328125, 400.6221008300781, 74.11085510253906, 198.27113342285156, -243.291015625, -193.0425262451172, -38.78594970703125, -82.4718017578125, 46.469688415527344, 99.4920654296875, 423.5298767089844, 96.74757385253906, 67.50054931640625, -214.32553100585938, -499.4107360839844, 115.22721099853516, 121.95927429199219, 248.08413696289062, 364.2083740234375, 433.5205383300781, 307.05780029296875, 530.1464233398438, 2.2109222412109375, 133.28221130371094, -6.711326599121094, 485.4144287109375, 11.019134521484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000367.npy"}
{"epoch": 0.5389133627019089, "step": 368, "batch_size": 64, "mean": 291.7165222167969, "std": 224.1376953125, "min": -209.4979248046875, "p10": 5.745568275451675, "median": 309.0730438232422, "p90": 556.2475524902344, "max": 1037.515625, "pos_frac": 0.890625, "sample": [298.2129821777344, 318.2940673828125, 201.20054626464844, 536.4482421875, 388.046630859375, 597.556396484375, 148.89752197265625, 63.19331359863281, 571.6888427734375, 206.74081420898438, -18.93008041381836, 381.1854248046875, 472.70166015625, 57.384395599365234, 306.87738037109375, 375.28759765625, 311.2687072753906, 209.8287811279297, -31.84082794189453, 414.7477111816406, 219.26882934570312, 277.1521911621094, 562.42822265625, 347.048583984375, 376.6642761230469, 131.02642822265625, 403.22308349609375, -0.7414493560791016, 46.13924789428711, 35.11100769042969, 192.7979278564453, 417.41607666015625, 435.5819396972656, 397.58514404296875, 427.4036865234375, 541.8259887695312, 392.51422119140625, 46.4296875, 183.2986297607422, -23.837669372558594, 326.78765869140625, 509.4749755859375, 412.7480773925781, 643.420166015625, 162.05784606933594, 335.3507385253906, 677.7621459960938, 20.881942749023438, -209.4979248046875, 116.45211791992188, 106.41707611083984, 514.0016479492188, 462.79345703125, 500.7160339355469, 360.89019775390625, 32.489044189453125, 248.48480224609375, 1037.515625, 148.3541259765625, 245.2274169921875, 296.2356262207031, -41.27093505859375, -93.90888977050781, 639.347900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000368.npy"}
{"epoch": 0.540381791483113, "step": 369, "batch_size": 64, "mean": 150.88702392578125, "std": 281.6910705566406, "min": -675.739501953125, "p10": -158.70631256103516, "median": 127.66310501098633, "p90": 486.90813598632826, "max": 837.679931640625, "pos_frac": 0.765625, "sample": [-229.9666748046875, -123.71820831298828, 837.679931640625, -34.066932678222656, -251.5995635986328, 19.22450065612793, -54.88530731201172, 280.1546936035156, 29.267024993896484, 692.3514404296875, 213.1963653564453, -138.067138671875, 127.60208892822266, 337.1683654785156, 318.84600830078125, 812.7274780273438, 519.3330078125, 158.93118286132812, 42.562442779541016, 405.5589904785156, 295.1152648925781, 112.99641418457031, 110.55197143554688, 275.1623229980469, 218.93478393554688, 216.249755859375, -470.86279296875, 271.17047119140625, 126.47036743164062, 182.50836181640625, 262.32061767578125, 343.662109375, 76.37419891357422, 126.33662414550781, 174.94851684570312, 1.0056381225585938, 341.0634765625, 91.68511199951172, -150.85719299316406, 292.55902099609375, 499.1853942871094, -205.2745819091797, 722.2637939453125, 31.132308959960938, 458.2611999511719, 170.60906982421875, 382.307861328125, -479.3484802246094, 0.4967994689941406, 600.6104736328125, 157.7859649658203, 117.82749938964844, -675.739501953125, 86.87833404541016, 220.573486328125, -162.07022094726562, -35.21937561035156, 94.86280059814453, 121.41999053955078, 420.93511962890625, 184.88980102539062, -6.150032043457031, 127.72412109375, -36.887718200683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000369.npy"}
{"epoch": 0.5418502202643172, "step": 370, "batch_size": 64, "mean": 148.7425537109375, "std": 294.4658508300781, "min": -408.1298522949219, "p10": -231.6700729370117, "median": 153.73001098632812, "p90": 524.8428405761719, "max": 907.7745361328125, "pos_frac": 0.734375, "sample": [104.18811798095703, 307.9581604003906, 239.52476501464844, -396.65850830078125, 692.9439697265625, 422.63873291015625, 132.2694091796875, 308.8455810546875, 10.618772506713867, -154.65029907226562, 443.3802490234375, 518.4241943359375, 44.2825927734375, 101.222412109375, -103.44175720214844, 527.5936889648438, 224.96023559570312, 199.27587890625, 239.556396484375, 907.7745361328125, 881.2099609375, 227.91928100585938, 320.76910400390625, -212.33956909179688, 296.4705810546875, -124.3050308227539, 2.398681640625, 294.63421630859375, -203.38653564453125, 173.51055908203125, 532.7125854492188, 141.04150390625, 6.4398956298828125, 283.13482666015625, -23.73111343383789, 408.5389404296875, 245.47280883789062, -193.95797729492188, -350.17816162109375, 41.07817077636719, 176.29855346679688, 225.9080810546875, -239.95457458496094, 70.87689208984375, 451.6136474609375, 86.77606201171875, 166.41851806640625, 303.8426513671875, 231.03465270996094, -408.1298522949219, -112.80195617675781, -49.663055419921875, 119.622314453125, 116.61962890625, -300.8988952636719, -337.66656494140625, 9.671279907226562, -303.9982604980469, -155.79119873046875, 508.068115234375, 613.645751953125, 236.57150268554688, 3.3883056640625, 589.9326171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000370.npy"}
{"epoch": 0.5433186490455213, "step": 371, "batch_size": 64, "mean": 213.51039123535156, "std": 302.1003112792969, "min": -525.7710571289062, "p10": -112.03485946655273, "median": 173.6386947631836, "p90": 620.3423461914065, "max": 948.9454956054688, "pos_frac": 0.734375, "sample": [414.0886535644531, 265.64654541015625, 502.0444030761719, 93.98372650146484, -525.7710571289062, 155.08657836914062, 525.01220703125, 227.8270263671875, 825.3206176757812, 291.887939453125, 444.7469482421875, -37.159889221191406, 371.08685302734375, 217.65530395507812, -321.7696838378906, 114.44819641113281, -12.044921875, 181.85076904296875, 523.041748046875, -13.57330322265625, -51.34605407714844, -243.63380432128906, 640.903076171875, -109.34025573730469, -201.35968017578125, 178.13975524902344, 943.1183471679688, 686.9771728515625, -234.5986785888672, 21.632749557495117, 652.395263671875, -21.318649291992188, 380.1371154785156, 254.70362854003906, -113.18968963623047, -49.62841796875, 572.3673095703125, -134.54010009765625, 272.77215576171875, 427.1435546875, -18.272005081176758, 117.46236419677734, 768.3887939453125, 86.8384780883789, 21.171733856201172, 272.3102722167969, 328.83721923828125, 384.75, 509.225830078125, -30.429153442382812, 428.5413818359375, 113.26897430419922, 78.66261291503906, 337.8795166015625, 153.08116149902344, 169.13763427734375, 118.18379211425781, 73.32341003417969, 4.9844818115234375, 489.18597412109375, 288.5776062011719, 948.9454956054688, -107.40946960449219, 13.276016235351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000371.npy"}
{"epoch": 0.5447870778267254, "step": 372, "batch_size": 64, "mean": 255.05174255371094, "std": 344.44561767578125, "min": -474.307861328125, "p10": -139.32240753173826, "median": 187.04007720947266, "p90": 744.1325866699219, "max": 1469.330322265625, "pos_frac": 0.765625, "sample": [311.4921875, 291.44970703125, 745.8792114257812, -278.40203857421875, 34.75242614746094, 782.6671142578125, 508.3762512207031, 238.0758056640625, 333.16448974609375, -20.601058959960938, 1469.330322265625, 278.7440185546875, -49.402587890625, 146.48556518554688, -65.87991333007812, 126.32140350341797, 93.24845886230469, 75.05284881591797, 809.7259521484375, -201.34881591796875, 110.99024963378906, 214.84478759765625, 728.9163818359375, -153.88314819335938, 493.8915710449219, 165.03131103515625, 120.5334701538086, 199.51031494140625, 149.7359619140625, 147.77688598632812, 172.13369750976562, 370.0181579589844, 378.10888671875, -103.9732894897461, 161.76339721679688, -168.84315490722656, 372.0992126464844, 740.05712890625, 110.19171905517578, 1018.5391845703125, 613.5473022460938, 357.79278564453125, 187.93019104003906, -113.29501342773438, 483.85546875, 485.5118103027344, 721.0983276367188, 269.45172119140625, -10.843063354492188, 348.49481201171875, 93.44662475585938, 272.87542724609375, 107.00480651855469, -76.2760238647461, 788.00732421875, 151.3739471435547, -57.196197509765625, 186.14996337890625, -150.4770050048828, 855.977783203125, -236.94876098632812, -474.307861328125, 472.37969970703125, 191.18392944335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000372.npy"}
{"epoch": 0.5462555066079295, "step": 373, "batch_size": 64, "mean": 244.83599853515625, "std": 295.3245849609375, "min": -566.85693359375, "p10": -54.396836853027324, "median": 212.18781280517578, "p90": 631.2705932617189, "max": 926.7122192382812, "pos_frac": 0.796875, "sample": [135.54183959960938, -127.73286437988281, -221.37823486328125, 320.443603515625, -0.6485271453857422, 926.7122192382812, 479.0552978515625, 12.487106323242188, 258.0085144042969, 146.17257690429688, 642.808837890625, 200.31712341308594, -61.271636962890625, 239.81253051757812, 413.82861328125, 18.381662368774414, 26.55819320678711, 769.4205322265625, 352.4217224121094, -12.105537414550781, -391.9483642578125, -18.131683349609375, 227.82217407226562, -566.85693359375, 128.66552734375, 56.665565490722656, 157.01156616210938, 135.87686157226562, 604.3480224609375, 416.3261413574219, 218.76792907714844, 467.48675537109375, 326.5714111328125, 572.4375, 508.8157043457031, -89.72320556640625, -164.65716552734375, 584.9679565429688, 280.44427490234375, -37.54582214355469, 845.10302734375, 107.25569915771484, 460.9310302734375, 205.60769653320312, 364.5878601074219, 539.2095336914062, 337.5547180175781, 203.48448181152344, 527.9654541015625, 804.5194091796875, 158.0399169921875, -11.74560546875, 127.212646484375, 237.02255249023438, 11.485580444335938, 273.5474853515625, 114.95503234863281, 120.34396362304688, 785.50341796875, 405.641845703125, 443.7727355957031, 662.4678955078125, -38.35563659667969, 47.21544647216797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000373.npy"}
{"epoch": 0.5477239353891337, "step": 374, "batch_size": 64, "mean": 220.60926818847656, "std": 359.8564453125, "min": -701.3601684570312, "p10": -99.51512145996092, "median": 142.03038024902344, "p90": 584.6535156250001, "max": 1279.7706298828125, "pos_frac": 0.734375, "sample": [-185.86489868164062, 143.352783203125, -28.645950317382812, 1279.7706298828125, 329.87347412109375, 1129.66015625, 296.7781677246094, 614.7344970703125, 459.7200927734375, -29.40021514892578, 140.70797729492188, 333.6917419433594, 27.9381103515625, 198.88427734375, 63.478912353515625, -48.69518280029297, 538.8333740234375, 69.76943969726562, 561.6452026367188, 327.60321044921875, 16.147918701171875, -19.915409088134766, 364.598876953125, 34.94462585449219, -104.68318176269531, 922.3011474609375, -19.583364486694336, 137.3236846923828, -253.1661834716797, 348.4098205566406, 260.0206604003906, 290.361083984375, -35.608062744140625, 1039.818603515625, -458.56903076171875, 110.71903991699219, -47.46672058105469, 580.0202026367188, 106.8442153930664, -38.0467529296875, 159.54576110839844, 37.775917053222656, 55.605934143066406, 85.06096649169922, 283.9967041015625, 159.46633911132812, -159.01136779785156, 491.5002746582031, 379.50421142578125, 461.16412353515625, 104.02037048339844, 586.6392211914062, -87.45631408691406, 452.963134765625, 291.4819641113281, 5.850196838378906, -159.9609375, 231.81939697265625, -5.0610809326171875, 74.06306457519531, 1188.05419921875, -701.3601684570312, 176.049560546875, 548.9745483398438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000374.npy"}
{"epoch": 0.5491923641703378, "step": 375, "batch_size": 64, "mean": 216.62973022460938, "std": 308.5451354980469, "min": -486.10284423828125, "p10": -154.70993194580072, "median": 175.19449615478516, "p90": 597.7040100097657, "max": 1060.9251708984375, "pos_frac": 0.765625, "sample": [687.035888671875, 353.8768310546875, -7.108642578125, 280.14373779296875, -29.459976196289062, -181.27365112304688, 511.296875, 217.90216064453125, 289.91925048828125, 28.561840057373047, -10.871957778930664, -92.72792053222656, 25.269859313964844, -474.855224609375, 138.54043579101562, 928.787353515625, 347.0297546386719, 48.176177978515625, 350.8160095214844, 115.55187225341797, 25.864837646484375, 255.4886474609375, -486.10284423828125, 56.618690490722656, 438.22698974609375, -189.90650939941406, 616.2451171875, 164.1764373779297, 143.6954345703125, 89.1269302368164, -369.9333190917969, 92.84553527832031, 571.3619384765625, 634.5550537109375, 61.7407112121582, 245.36904907226562, 351.4046325683594, 485.4212646484375, 447.9803466796875, 672.693115234375, 372.7077331542969, 186.21255493164062, 111.1461410522461, -314.67864990234375, 453.47186279296875, 487.8197937011719, 304.3350524902344, -294.6617431640625, 1060.9251708984375, 608.9934692382812, 362.6094055175781, -27.197879791259766, -2.407135009765625, 408.5357360839844, 127.63162231445312, 457.3031921386719, 138.29115295410156, 561.8912353515625, -65.1269760131836, 157.16513061523438, 426.91259765625, 456.6193542480469, 104.89755249023438, -52.57648849487305], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000375.npy"}
{"epoch": 0.5506607929515418, "step": 376, "batch_size": 64, "mean": 262.1507568359375, "std": 314.5519104003906, "min": -536.0286865234375, "p10": -154.91670532226559, "median": 214.7726593017578, "p90": 643.559130859375, "max": 888.317138671875, "pos_frac": 0.8125, "sample": [-22.681190490722656, 737.1805419921875, 215.09320068359375, 47.46085739135742, 874.64306640625, 102.65034484863281, 513.808349609375, -241.2737579345703, 50.30027770996094, -2.7172393798828125, 430.94879150390625, 0.4531822204589844, 572.6465454101562, 103.08163452148438, -201.17202758789062, 448.40972900390625, 571.5623168945312, -172.12930297851562, -105.8251953125, 156.3988494873047, 86.59403991699219, 14.148542404174805, -171.24188232421875, 888.317138671875, 790.4453735351562, 122.23052215576172, 422.5576171875, 305.0206298828125, 522.9710083007812, -122.82618713378906, 633.9749755859375, 541.9898071289062, 124.90272521972656, 632.9192504882812, 139.29977416992188, 530.8106689453125, 587.1143188476562, 355.7453308105469, 419.59393310546875, 238.50875854492188, 272.211181640625, 528.9632568359375, -168.66978454589844, 303.859619140625, 598.205078125, 125.17027282714844, 793.2578125, 20.817970275878906, -186.23350524902344, 58.72199249267578, 18.17291259765625, 151.3908233642578, 647.6666259765625, 673.3058471679688, 322.02984619140625, 617.4163818359375, 155.3678436279297, 256.52825927734375, 214.45211791992188, 170.17098999023438, -89.85567474365234, 77.67922973632812, 611.13232421875, -536.0286865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000376.npy"}
{"epoch": 0.5521292217327459, "step": 377, "batch_size": 64, "mean": 246.15280151367188, "std": 295.74945068359375, "min": -385.4408874511719, "p10": -79.43132934570312, "median": 237.93524932861328, "p90": 649.6059692382813, "max": 881.0942993164062, "pos_frac": 0.734375, "sample": [311.2369689941406, 872.828369140625, 186.4796142578125, -47.424407958984375, 328.9174499511719, -27.638837814331055, 245.76239013671875, -151.2611083984375, 281.37255859375, -8.462028503417969, 810.1443481445312, 797.4212646484375, -32.29283905029297, 853.063720703125, -0.21677589416503906, 259.0255432128906, 232.5105743408203, 465.4148864746094, 198.604736328125, -33.476898193359375, 257.59808349609375, 603.1565551757812, 390.2910461425781, -22.90249252319336, 657.842529296875, 881.0942993164062, 444.3866882324219, -185.9044647216797, 582.3738403320312, 156.85427856445312, 276.6759033203125, 229.75477600097656, 82.6273422241211, 179.46127319335938, 27.734142303466797, 580.3329467773438, 50.43995666503906, 314.987060546875, 378.88134765625, 630.3873291015625, 265.05914306640625, -28.705810546875, -385.4408874511719, 58.90544891357422, -54.492095947265625, 92.66234588623047, 411.10284423828125, 404.6922607421875, -303.2078857421875, 395.2315979003906, 108.28587341308594, 628.1953735351562, -79.24261474609375, -79.51220703125, -96.43463897705078, 476.1083068847656, 130.37002563476562, 734.666259765625, 372.3704833984375, 21.85405731201172, 312.48291015625, 187.34036254882812, 243.35992431640625, -119.95345306396484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000377.npy"}
{"epoch": 0.55359765051395, "step": 378, "batch_size": 64, "mean": 262.8395690917969, "std": 281.70458984375, "min": -508.97723388671875, "p10": -46.29496688842773, "median": 232.6874771118164, "p90": 599.6037841796875, "max": 1064.7374267578125, "pos_frac": 0.84375, "sample": [654.7562255859375, 257.76202392578125, 660.6715087890625, 564.816650390625, 520.8392944335938, 110.10331726074219, 204.09060668945312, 447.45281982421875, 172.97418212890625, 202.86041259765625, 230.43075561523438, 358.74810791015625, 572.4722900390625, 200.16883850097656, 98.01873779296875, -32.93522644042969, 242.35955810546875, 723.9457397460938, 213.30715942382812, 304.91546630859375, 246.08551025390625, -120.97147369384766, 41.83096694946289, 102.78833770751953, 258.2195739746094, 180.88711547851562, -86.94136047363281, 520.013427734375, 49.04778289794922, 478.16058349609375, 581.8699951171875, 530.1529541015625, 200.40447998046875, 148.542236328125, -39.555870056152344, 473.9087219238281, 193.1319580078125, 1064.7374267578125, 49.32081604003906, -342.09515380859375, 244.2654266357422, -7.158866882324219, 234.94419860839844, 607.2039794921875, 174.82278442382812, -49.18315124511719, 458.9003601074219, 302.1605529785156, 493.2434387207031, 341.58880615234375, 99.66102600097656, 343.6400146484375, 431.81732177734375, 241.3399658203125, -508.97723388671875, 177.3655242919922, 227.99420166015625, 548.31298828125, -63.27428436279297, 803.9755859375, 125.11387634277344, -433.30438232421875, 152.9224395751953, 637.0614013671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000378.npy"}
{"epoch": 0.5550660792951542, "step": 379, "batch_size": 64, "mean": 253.78219604492188, "std": 310.1694641113281, "min": -461.40936279296875, "p10": -112.65117111206054, "median": 231.35073852539062, "p90": 663.2630432128909, "max": 1254.758544921875, "pos_frac": 0.828125, "sample": [252.0626220703125, 373.802978515625, 4.394378662109375, 221.6612548828125, -104.31688690185547, 102.21045684814453, 186.32501220703125, 694.2451171875, 211.25106811523438, 144.6619873046875, 416.9549865722656, 799.3198852539062, 58.74504089355469, 481.5195617675781, 364.567626953125, 208.19290161132812, -461.40936279296875, 941.0845336914062, 53.94056701660156, 325.4956359863281, 495.84283447265625, 409.1741638183594, 261.77069091796875, 49.57252883911133, 118.87979125976562, -89.35281372070312, -311.9066162109375, 146.39105224609375, 36.89435577392578, 354.848876953125, 743.7505493164062, 357.3909912109375, -298.6210632324219, 47.930538177490234, 471.6313171386719, 716.783203125, 323.0, 397.4417724609375, 590.117919921875, 114.92964172363281, -74.9445571899414, 598.6441650390625, 241.04022216796875, -46.58506393432617, 383.94598388671875, -149.03897094726562, 176.24685668945312, 471.0113220214844, 298.8565979003906, 286.3369140625, -116.22300720214844, 517.322021484375, 690.9568481445312, 168.77940368652344, 1254.758544921875, 243.3206787109375, 152.1177978515625, 503.2501220703125, 128.0689697265625, -156.04693603515625, 19.223159790039062, 570.944580078125, -154.42422485351562, 23.320755004882812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000379.npy"}
{"epoch": 0.5565345080763583, "step": 380, "batch_size": 64, "mean": 280.27264404296875, "std": 312.65576171875, "min": -686.41748046875, "p10": -96.67137145996092, "median": 297.5885009765625, "p90": 677.7396240234376, "max": 1137.5849609375, "pos_frac": 0.859375, "sample": [139.24658203125, 120.32478332519531, 38.42173767089844, 1137.5849609375, 76.70503997802734, 105.74938201904297, 145.15235900878906, 378.8880615234375, 376.4533996582031, 339.57073974609375, 167.37734985351562, 377.25091552734375, -52.85358428955078, 602.5140991210938, 836.8309326171875, 484.78265380859375, 391.6104736328125, 405.36456298828125, 502.14056396484375, 177.0609130859375, 410.57672119140625, -244.54833984375, 264.2349853515625, 200.98208618164062, 67.18023681640625, -87.49535369873047, 466.431396484375, 335.8568420410156, 148.77064514160156, 644.2542114257812, 373.7677001953125, 692.0905151367188, 611.3147583007812, 537.8848876953125, 621.3834228515625, 412.8905029296875, 150.52162170410156, -105.39227294921875, 260.9700622558594, 57.83533477783203, 811.8511962890625, 126.98957824707031, -255.71878051757812, 210.118408203125, -203.5263671875, 216.09683227539062, 330.9420166015625, 723.6165771484375, 65.19125366210938, -339.66424560546875, 440.13604736328125, 44.888336181640625, 758.3102416992188, 358.43951416015625, 248.3216552734375, -100.60395050048828, 469.81707763671875, 371.1114807128906, 156.49362182617188, 77.06829833984375, -686.41748046875, 365.97552490234375, 808.3385009765625, 369.98748779296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000380.npy"}
{"epoch": 0.5580029368575624, "step": 381, "batch_size": 64, "mean": 200.75160217285156, "std": 332.6195373535156, "min": -744.9247436523438, "p10": -175.33491058349608, "median": 185.88229370117188, "p90": 606.414959716797, "max": 1028.01220703125, "pos_frac": 0.734375, "sample": [220.40945434570312, 30.821319580078125, 97.25350952148438, 623.8214721679688, 608.0420532226562, 651.5474853515625, 492.59991455078125, -26.466415405273438, 141.75686645507812, 538.8775024414062, 469.4956359863281, 97.02586364746094, 421.7507019042969, 261.7611389160156, 1028.01220703125, -173.61801147460938, 443.997802734375, 410.26922607421875, 678.7286987304688, 190.89761352539062, 906.9341430664062, 602.618408203125, -211.04574584960938, -95.61268615722656, -86.15990447998047, 118.46041870117188, 577.5403442382812, 17.561809539794922, -302.3958435058594, -67.76044464111328, 140.92942810058594, 219.23577880859375, -12.079116821289062, -35.32082748413086, 258.59576416015625, -64.60841369628906, 245.9375, 130.9599609375, 240.27127075195312, -78.57730102539062, 70.66326141357422, 116.80453491210938, 990.5873413085938, 208.84884643554688, 399.7637023925781, 401.4113464355469, 46.99981689453125, -220.0167236328125, 510.3079833984375, -176.0707244873047, 26.69038963317871, 397.85833740234375, -75.93563842773438, 242.7812957763672, 134.43875122070312, -236.03256225585938, 83.60671997070312, 316.5794982910156, 431.9345703125, 180.86697387695312, 233.3417205810547, -744.9247436523438, -604.7066650390625, 399.8363342285156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000381.npy"}
{"epoch": 0.5594713656387665, "step": 382, "batch_size": 64, "mean": 245.24298095703125, "std": 263.3995056152344, "min": -287.45330810546875, "p10": -100.90397720336914, "median": 200.48407745361328, "p90": 638.0592712402346, "max": 941.4036865234375, "pos_frac": 0.8125, "sample": [941.4036865234375, 657.7510375976562, 156.53286743164062, 136.6756591796875, 176.94212341308594, 766.913818359375, -115.6504898071289, 108.8477554321289, 307.307861328125, -144.57260131835938, 147.8003692626953, 254.87554931640625, 376.11376953125, 461.40252685546875, 448.3697509765625, 556.602783203125, 211.9407501220703, 105.44032287597656, 398.7601318359375, 109.93177795410156, 420.79998779296875, -158.1702117919922, 9.736099243164062, 301.2727966308594, 728.9320068359375, -98.9006576538086, 331.95855712890625, -101.76254272460938, 520.4026489257812, 287.51898193359375, 747.95751953125, -131.60594177246094, 23.122684478759766, 306.58612060546875, 400.0605773925781, -287.45330810546875, -153.54135131835938, 459.9126281738281, -15.131698608398438, 280.2368469238281, 178.07073974609375, 206.82962036132812, 355.298583984375, 24.0259952545166, 295.29815673828125, 135.86473083496094, 183.5832061767578, 249.06973266601562, 194.13853454589844, 156.69190979003906, 693.0623779296875, 542.54345703125, 285.26763916015625, -33.850624084472656, 154.23626708984375, 698.8020629882812, 72.27894592285156, -43.676029205322266, 465.5357666015625, 592.11181640625, 135.1707000732422, 99.5052719116211, -3.8585357666015625, 124.2288818359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000382.npy"}
{"epoch": 0.5609397944199707, "step": 383, "batch_size": 64, "mean": 244.95123291015625, "std": 339.1126403808594, "min": -880.06689453125, "p10": -104.36193389892577, "median": 242.6487274169922, "p90": 729.7895812988281, "max": 965.9595336914062, "pos_frac": 0.75, "sample": [-7.995933532714844, -70.6734619140625, 108.91067504882812, -281.94134521484375, 557.4998168945312, -234.25181579589844, -145.74822998046875, 550.0882568359375, 336.2239685058594, -23.800548553466797, 390.9324951171875, 265.31488037109375, 74.7860107421875, -23.940505981445312, -285.03955078125, 245.62594604492188, 952.58544921875, 30.187110900878906, 244.81744384765625, 288.38323974609375, 754.68017578125, -165.12069702148438, 3.5233192443847656, 692.3221435546875, 240.48001098632812, -34.196205139160156, 766.7581787109375, 25.495927810668945, -51.99151611328125, 313.48876953125, 965.9595336914062, 219.10008239746094, 699.0050048828125, 212.20578002929688, 386.60791015625, 385.8321533203125, 455.1837158203125, 32.8612060546875, 245.3697052001953, 296.90045166015625, -78.28277587890625, -20.4591064453125, 234.84408569335938, 77.83998107910156, 395.8525085449219, 722.3719482421875, -106.27703857421875, 575.2769165039062, 46.32655334472656, 270.430419921875, 765.4385375976562, 168.38919067382812, -99.89335632324219, 313.4129638671875, 425.86602783203125, 33.059242248535156, 776.9539794921875, 571.113525390625, 214.07574462890625, 732.9685668945312, -880.06689453125, 111.5821533203125, 453.05682373046875, 556.5699462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000383.npy"}
{"epoch": 0.5624082232011748, "step": 384, "batch_size": 64, "mean": 191.53524780273438, "std": 223.45127868652344, "min": -335.9676208496094, "p10": -94.43865661621092, "median": 188.42733001708984, "p90": 432.9469818115235, "max": 817.507568359375, "pos_frac": 0.8125, "sample": [182.73651123046875, 43.596343994140625, 817.507568359375, -127.44453430175781, 4.614410400390625, 81.69328308105469, -209.10569763183594, -1.3067474365234375, 719.6851196289062, 200.2911376953125, 434.17242431640625, 425.75958251953125, 203.1385955810547, -227.76773071289062, 371.0234069824219, 107.65702819824219, -68.89631652832031, 61.57302474975586, 353.74127197265625, 362.54339599609375, 305.136962890625, 233.672119140625, 100.43446350097656, 327.28057861328125, 285.1468200683594, 159.1761932373047, 156.14224243164062, -214.43173217773438, -335.9676208496094, 91.35375213623047, 335.6018981933594, 160.69435119628906, 50.920433044433594, 184.7760009765625, 190.12718200683594, -78.3500747680664, 393.8825378417969, 304.3860168457031, 352.00531005859375, 66.67822265625, 10.524883270263672, 236.94398498535156, 321.2498474121094, 498.82354736328125, 501.8600158691406, 333.43603515625, 315.49407958984375, 230.73336791992188, 259.0028381347656, -107.82825469970703, 186.72747802734375, -38.338645935058594, 88.3181381225586, 498.03448486328125, 76.689697265625, -101.3337631225586, -31.395198822021484, 412.0466003417969, 430.0876159667969, 106.57231140136719, 556.6820678710938, 318.7453308105469, 53.933258056640625, 297.3674011230469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000384.npy"}
{"epoch": 0.5638766519823789, "step": 385, "batch_size": 64, "mean": 233.17413330078125, "std": 233.24720764160156, "min": -330.227294921875, "p10": -37.175697326660135, "median": 209.49551391601562, "p90": 569.2040771484376, "max": 743.5350952148438, "pos_frac": 0.875, "sample": [-44.8641357421875, 21.32489776611328, 43.43572235107422, 233.9720458984375, 426.3164367675781, 290.3789367675781, 488.88983154296875, 165.65188598632812, -131.73431396484375, 491.1612548828125, 165.2749481201172, 50.93736267089844, 146.07369995117188, 190.8653106689453, 479.0191650390625, 144.456787109375, 591.2591552734375, 337.54681396484375, 108.98279571533203, 76.15885162353516, 354.1195068359375, 172.3637237548828, -161.03074645996094, 308.08349609375, -232.80560302734375, 274.4310302734375, 208.71022033691406, 640.1564331054688, 92.94941711425781, 32.140625, 341.5028076171875, 240.93016052246094, 134.99734497070312, 743.5350952148438, 722.421630859375, 174.4851837158203, 192.65997314453125, -19.236007690429688, 551.832763671875, 259.4229736328125, 46.73133850097656, 100.83174133300781, 478.6014709472656, 210.2808074951172, 228.36770629882812, 576.64892578125, 424.078369140625, -134.11727905273438, 616.35546875, 341.87091064453125, -330.227294921875, 71.46040344238281, 199.76675415039062, -247.1044921875, 247.2025604248047, 280.68353271484375, 435.3704528808594, 146.33474731445312, 278.4188232421875, 660.621337890625, 134.6044464111328, 343.44744873046875, 192.74066162109375, 313.3978271484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000385.npy"}
{"epoch": 0.5653450807635829, "step": 386, "batch_size": 64, "mean": 257.5692138671875, "std": 305.5406494140625, "min": -430.52032470703125, "p10": -72.94065856933592, "median": 193.34922790527344, "p90": 720.4236755371096, "max": 976.7825317382812, "pos_frac": 0.828125, "sample": [-59.1878547668457, 394.0437316894531, -64.72667694091797, 376.6476745605469, 751.48193359375, 191.7993927001953, 125.33271026611328, 463.5494689941406, 147.50125122070312, 502.7044677734375, 416.7706604003906, 976.7825317382812, -279.19537353515625, -11.102266311645508, 656.0173950195312, 410.890869140625, 320.713623046875, 414.7560119628906, 62.93965148925781, 96.56623077392578, 141.06866455078125, 286.65716552734375, 182.47186279296875, 211.64422607421875, 289.1172790527344, 276.8058166503906, 128.49099731445312, -76.45806884765625, 919.5923461914062, 620.3550415039062, -64.73336791992188, 130.79522705078125, 105.50979614257812, -188.78369140625, 182.06346130371094, 748.0263671875, 897.9326782226562, 404.184814453125, 763.6741943359375, 145.91407775878906, 238.38046264648438, 543.4921875, -237.4397735595703, 171.0349884033203, 46.392547607421875, 141.76046752929688, 134.44789123535156, 194.89906311035156, -184.4878692626953, 389.05029296875, -430.52032470703125, 853.9800415039062, 106.76432037353516, 19.949546813964844, 93.48992919921875, 147.38783264160156, 499.3123779296875, 357.941162109375, -356.74578857421875, 457.9717102050781, 524.3004760742188, 278.9769592285156, 388.3121032714844, 107.16303253173828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000386.npy"}
{"epoch": 0.566813509544787, "step": 387, "batch_size": 64, "mean": 207.89810180664062, "std": 244.35458374023438, "min": -301.1578063964844, "p10": -58.21962394714355, "median": 192.77425384521484, "p90": 512.9523437500001, "max": 841.9522094726562, "pos_frac": 0.796875, "sample": [297.15093994140625, -22.05376434326172, 100.07420349121094, 113.83384704589844, 207.3494873046875, 841.9522094726562, 144.44931030273438, 102.25294494628906, 326.7216796875, 445.9842224121094, -301.1578063964844, 306.4539489746094, 113.14761352539062, 51.3200798034668, -293.60296630859375, 230.58840942382812, 433.9619140625, 593.1119995117188, -106.6071548461914, 179.15585327148438, 748.2015991210938, 62.28803634643555, 203.42605590820312, 774.7796630859375, 387.2255554199219, 193.34848022460938, 149.32931518554688, 231.95260620117188, 57.13397979736328, 139.84375, 28.67804718017578, 192.2000274658203, 522.3807373046875, 96.11994934082031, 543.593017578125, 174.67349243164062, 383.60321044921875, 115.28825378417969, 288.091796875, 311.7821350097656, 37.583396911621094, -8.407572746276855, 631.3526611328125, 440.8039855957031, 339.0694885253906, 128.17413330078125, 374.9512634277344, -54.7116584777832, 490.9527587890625, -59.72303771972656, 373.1767578125, -119.54464721679688, 56.673866271972656, 335.3220520019531, -206.01242065429688, 233.96580505371094, -8.7042236328125, 399.77532958984375, -36.51177978515625, -29.5194091796875, 267.54541015625, -273.24261474609375, 279.47900390625, 345.00311279296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000387.npy"}
{"epoch": 0.5682819383259912, "step": 388, "batch_size": 64, "mean": 283.08306884765625, "std": 327.8969421386719, "min": -527.787109375, "p10": -53.946214294433574, "median": 234.99268341064453, "p90": 689.0857177734379, "max": 1412.7535400390625, "pos_frac": 0.859375, "sample": [1412.7535400390625, 550.676513671875, -84.68357849121094, 517.9152221679688, 528.744873046875, 823.3345336914062, 169.91517639160156, -277.3818054199219, 136.35238647460938, 313.20355224609375, 192.02786254882812, 899.1614990234375, 558.813232421875, 251.42062377929688, -527.787109375, 53.78278350830078, 356.22894287109375, 227.72604370117188, 432.48382568359375, 4.862281799316406, 168.27725219726562, 731.4254150390625, 165.41647338867188, -82.64679718017578, 256.2864685058594, 242.89089965820312, 67.5688705444336, -14.837936401367188, 156.95645141601562, 307.5880432128906, 101.48046112060547, 148.3186492919922, 291.83514404296875, 80.16366577148438, 452.6744384765625, 171.5743408203125, 123.47459411621094, 51.060890197753906, 553.5985717773438, 128.00546264648438, 443.2400207519531, 87.75108337402344, 412.75360107421875, 165.73480224609375, 195.4813232421875, 33.64875030517578, 429.233154296875, 301.7689208984375, -227.21353149414062, 379.0183410644531, 334.36041259765625, 1237.806884765625, -184.08985900878906, 380.878662109375, 242.2593231201172, 29.204559326171875, -62.379241943359375, 504.48089599609375, 755.6453857421875, 778.0792846679688, 590.2930908203125, 463.8860168457031, 219.08352661132812, -34.26914978027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000388.npy"}
{"epoch": 0.5697503671071953, "step": 389, "batch_size": 64, "mean": 238.16555786132812, "std": 310.4461669921875, "min": -401.635498046875, "p10": -111.81875991821289, "median": 257.6193161010742, "p90": 595.6424255371094, "max": 959.0499267578125, "pos_frac": 0.734375, "sample": [96.79144287109375, 529.0107421875, 358.4903259277344, -376.4403991699219, 70.70294189453125, 345.26922607421875, 177.0730438232422, 585.0016479492188, 441.56829833984375, 278.89422607421875, 234.22055053710938, -112.27408599853516, 346.3475646972656, -206.68460083007812, 174.2251739501953, 492.2109069824219, 158.99273681640625, 472.98126220703125, 419.4048156738281, -87.07821655273438, -283.3432312011719, -20.431255340576172, 600.2027587890625, -18.130619049072266, 288.18719482421875, 647.2435913085938, -401.635498046875, 2.8518829345703125, 124.78728485107422, 142.18896484375, 327.6916198730469, -51.087982177734375, 142.3773956298828, 350.3365783691406, -283.7105407714844, 303.92041015625, 231.3688201904297, 546.2273559570312, 386.70391845703125, 957.2830810546875, 476.93402099609375, -94.68318176269531, 450.9640808105469, 368.5130615234375, 308.5692138671875, 396.14739990234375, -7.065492630004883, 103.18327331542969, 494.0650939941406, 150.6658935546875, 75.0701675415039, 959.0499267578125, -81.09400939941406, -69.91960906982422, 244.5537567138672, 784.7785034179688, -40.938232421875, 900.5466918945312, 322.5286865234375, -110.75633239746094, 341.1141052246094, -209.79312133789062, 270.68487548828125, 817.7382202148438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000389.npy"}
{"epoch": 0.5712187958883994, "step": 390, "batch_size": 64, "mean": 244.15020751953125, "std": 378.6503601074219, "min": -558.407470703125, "p10": -132.84322586059568, "median": 251.31453704833984, "p90": 570.7348266601563, "max": 2224.67236328125, "pos_frac": 0.765625, "sample": [209.01832580566406, 299.86627197265625, 618.7081298828125, 525.67724609375, -558.407470703125, -90.27715301513672, -269.1306457519531, 147.33924865722656, 366.9576416015625, 250.31253051757812, 493.7460021972656, 57.11506652832031, 311.43994140625, 401.6467590332031, 265.04205322265625, 46.208709716796875, -124.41478729248047, 2224.67236328125, -136.45541381835938, 390.11944580078125, 137.14697265625, 557.278076171875, 589.0841064453125, 164.36138916015625, 179.5398406982422, -41.478084564208984, 109.30986785888672, -34.4630126953125, 117.34989929199219, 314.4013671875, 174.61026000976562, -40.729736328125, -204.2188720703125, -102.2171401977539, 168.01129150390625, 542.55322265625, 821.163818359375, 574.2951049804688, -377.55230712890625, 358.6127624511719, 564.28076171875, 650.6452026367188, 413.0697021484375, 135.66015625, 573.5008544921875, 213.9282989501953, 177.4287109375, 71.06044006347656, 514.6548461914062, 416.11273193359375, 487.0969543457031, 282.8057861328125, 332.3093566894531, 356.63531494140625, 410.8187561035156, -30.26784324645996, -18.679840087890625, -264.2267150878906, -523.9175415039062, 252.31654357910156, 449.06646728515625, 87.86083984375, 278.9353942871094, 358.2741394042969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000390.npy"}
{"epoch": 0.5726872246696035, "step": 391, "batch_size": 64, "mean": 143.4707794189453, "std": 291.51275634765625, "min": -695.4203491210938, "p10": -144.86351928710937, "median": 101.611572265625, "p90": 489.07761840820314, "max": 1088.2696533203125, "pos_frac": 0.734375, "sample": [21.704875946044922, 10.465744018554688, 89.6686019897461, -73.65597534179688, 180.61618041992188, 343.0132141113281, 13.927835464477539, 123.19617462158203, 18.813251495361328, 388.5227355957031, -96.38729095458984, 153.8935089111328, -227.11767578125, -70.66167449951172, 58.49772644042969, 35.87355041503906, 380.9698791503906, 128.72784423828125, 22.246973037719727, -116.14398193359375, 237.80503845214844, 486.2498474121094, 558.4786376953125, 490.2895202636719, -107.64881134033203, 69.71342468261719, 1088.2696533203125, -205.45225524902344, -114.14645385742188, 310.14129638671875, 332.369873046875, -141.86929321289062, 83.98114013671875, 41.16495132446289, -80.73754119873047, 234.4356231689453, 108.92855072021484, -44.582481384277344, 517.0911254882812, 149.649169921875, 94.29459381103516, -184.64010620117188, 161.69146728515625, 241.50045776367188, 708.759765625, -252.17221069335938, 623.500244140625, -30.016530990600586, -534.3244018554688, 20.033042907714844, 54.63731384277344, 281.6734313964844, -146.14675903320312, 396.84503173828125, 419.4859619140625, 68.88972473144531, 789.6134033203125, 382.045166015625, 342.6475830078125, -695.4203491210938, 257.021240234375, 239.87286376953125, 297.4112548828125, 244.6248321533203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000391.npy"}
{"epoch": 0.5741556534508077, "step": 392, "batch_size": 64, "mean": 280.9255065917969, "std": 280.8028259277344, "min": -174.76478576660156, "p10": -44.60391311645504, "median": 230.9010467529297, "p90": 652.6270568847656, "max": 1155.8701171875, "pos_frac": 0.859375, "sample": [-3.9802474975585938, 181.3011474609375, -150.41835021972656, 10.66402816772461, 210.90867614746094, -88.79447174072266, 651.0838012695312, 331.358154296875, 369.4180908203125, 381.98565673828125, 653.2884521484375, -174.76478576660156, 273.677734375, 149.26388549804688, 174.69761657714844, 34.3961181640625, 730.3582153320312, 750.6483154296875, 467.70684814453125, 200.8350372314453, -90.80706024169922, 390.34844970703125, 483.2127380371094, 46.76820373535156, 373.46136474609375, 420.04302978515625, 220.44166564941406, 1155.8701171875, -61.200164794921875, 244.2349853515625, 182.42857360839844, 299.92041015625, 438.35040283203125, 179.53189086914062, 558.056884765625, 241.3604278564453, 333.103515625, 172.7046661376953, 211.31434631347656, 153.91314697265625, 353.4683837890625, 437.622314453125, 68.62335205078125, -5.879325866699219, 545.0020141601562, 914.374267578125, 422.298583984375, 483.2298889160156, 394.24090576171875, -167.708740234375, 124.81707000732422, 86.05961608886719, 81.09140014648438, 363.16729736328125, 168.958740234375, 134.96702575683594, 298.18218994140625, 28.945629119873047, 1053.11572265625, 42.82368469238281, -105.23360443115234, 77.8739013671875, 320.0539855957031, 752.44677734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000392.npy"}
{"epoch": 0.5756240822320118, "step": 393, "batch_size": 64, "mean": 219.51449584960938, "std": 346.0307312011719, "min": -339.4408874511719, "p10": -196.30450592041012, "median": 236.0550765991211, "p90": 594.124890136719, "max": 1602.287353515625, "pos_frac": 0.71875, "sample": [279.100830078125, 715.1239013671875, -24.39523696899414, -93.93289947509766, 441.8927001953125, 516.2933959960938, -161.06869506835938, 357.94659423828125, 319.77862548828125, 240.0919189453125, 83.84674072265625, 271.7864990234375, 241.513427734375, 344.072021484375, 333.793212890625, 63.888587951660156, 1104.3759765625, 918.1622314453125, 665.8209228515625, -6.969390869140625, 232.0182342529297, 487.0169677734375, -236.06460571289062, -287.406005859375, -122.65971374511719, 8.505786895751953, -212.58815002441406, 227.79470825195312, 303.79583740234375, 281.64031982421875, 183.36720275878906, 444.39141845703125, 449.45904541015625, 240.62960815429688, 529.1883544921875, 336.2661437988281, 261.6465148925781, 111.80081939697266, 402.7685546875, -111.78639221191406, 17.419143676757812, 165.55813598632812, 656.0034790039062, -204.52096557617188, -233.07662963867188, 525.3353271484375, 293.6343994140625, -207.70114135742188, 10.420677185058594, 165.8975830078125, 116.48542022705078, 225.05006408691406, -177.1327667236328, -134.11688232421875, -153.93838500976562, 385.6432189941406, 421.33038330078125, -339.4408874511719, 309.35809326171875, 51.185646057128906, -112.91295623779297, 1602.287353515625, 621.954833984375, -96.70022583007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000393.npy"}
{"epoch": 0.5770925110132159, "step": 394, "batch_size": 64, "mean": 234.3231964111328, "std": 306.7842102050781, "min": -351.5914001464844, "p10": -113.06614456176757, "median": 166.17357635498047, "p90": 640.5941345214845, "max": 1128.50048828125, "pos_frac": 0.796875, "sample": [104.43675231933594, 331.6502685546875, -201.2057647705078, 427.1675720214844, 184.15682983398438, -151.41510009765625, 47.7872200012207, 92.28495788574219, 530.6717529296875, 78.61874389648438, 549.2027587890625, -17.29131317138672, -351.5914001464844, 86.95597839355469, -51.70713806152344, -180.1328125, -115.56409454345703, 781.3744506835938, 677.1370849609375, -170.04637145996094, 557.9364013671875, -123.73126220703125, 299.53076171875, 276.9918518066406, 172.5709686279297, 371.70916748046875, 142.11598205566406, 91.18904113769531, -52.75493621826172, 605.9017944335938, 578.9160766601562, 751.53173828125, -107.23759460449219, 362.9803161621094, 110.93893432617188, 46.0357666015625, 147.57318115234375, 49.78789520263672, 133.5328369140625, 655.4622802734375, 23.83771514892578, 538.510009765625, 2.337512969970703, 209.4259490966797, 101.29026794433594, 78.20452880859375, 505.8414611816406, 877.6790771484375, 250.8811798095703, 175.94500732421875, 452.6968994140625, 283.01214599609375, 1128.50048828125, 1071.2552490234375, 352.1962890625, 42.995933532714844, 76.31669616699219, -70.0396728515625, 202.11026000976562, 411.5657043457031, 193.99691772460938, -73.49305725097656, 159.77618408203125, 278.3706359863281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000394.npy"}
{"epoch": 0.57856093979442, "step": 395, "batch_size": 64, "mean": 250.46954345703125, "std": 322.7596740722656, "min": -464.3038635253906, "p10": -118.84705352783202, "median": 223.5328369140625, "p90": 738.2884338378907, "max": 999.7794799804688, "pos_frac": 0.765625, "sample": [468.6614074707031, -124.2667236328125, 221.20916748046875, 58.664154052734375, 263.36480712890625, 319.1596374511719, 348.7772521972656, 223.74008178710938, -269.1724548339844, 257.3231506347656, -132.0080108642578, 423.14288330078125, 372.4194030761719, -21.13477325439453, 157.61666870117188, -9.032093048095703, 717.1422729492188, 312.6981506347656, 932.37646484375, -57.83570098876953, 640.302734375, 26.282325744628906, 537.1184692382812, -135.1108856201172, 223.32559204101562, 9.433868408203125, 164.30093383789062, 398.876708984375, -57.63488006591797, 61.224021911621094, 805.94580078125, 999.7794799804688, 478.015869140625, 139.38580322265625, 358.3771057128906, 51.35577392578125, 158.35195922851562, 327.8277587890625, 905.8201904296875, 157.878173828125, 509.20318603515625, 505.4344177246094, -464.3038635253906, -38.44990539550781, -266.0074768066406, 349.98779296875, -81.14793395996094, 810.2059936523438, 35.353919982910156, -106.20115661621094, 103.66998291015625, -185.69100952148438, 71.86946868896484, 977.056884765625, 747.35107421875, 247.18821716308594, 279.513916015625, 527.5911865234375, 235.7212371826172, 468.1365661621094, 475.0685119628906, 66.05370330810547, 106.3882064819336, -57.64393997192383], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000395.npy"}
{"epoch": 0.580029368575624, "step": 396, "batch_size": 64, "mean": 222.18008422851562, "std": 262.3597717285156, "min": -473.63922119140625, "p10": -21.007541275024412, "median": 160.66656494140625, "p90": 552.2312316894532, "max": 1005.7598876953125, "pos_frac": 0.859375, "sample": [165.8890838623047, 634.4820556640625, 104.06417083740234, 186.31341552734375, 98.82327270507812, 568.0836791992188, 141.148681640625, 170.30587768554688, 155.5865478515625, 285.6465759277344, 384.0951232910156, 301.8455810546875, 223.1446533203125, 430.0435485839844, -135.19302368164062, 154.1389617919922, 191.50711059570312, 3.4055328369140625, 90.98614501953125, 104.97716522216797, 82.02600860595703, 377.85748291015625, 874.0798950195312, 290.17022705078125, 46.103851318359375, 460.2439270019531, 283.52423095703125, 111.00347900390625, 195.4559326171875, 656.2540283203125, -18.650806427001953, 365.80462646484375, 165.74658203125, 274.2720031738281, 63.99092483520508, 153.7733917236328, 515.2421875, -202.10018920898438, 484.9268798828125, 695.6156005859375, 306.978271484375, 5.3548583984375, -75.38623046875, 506.5171203613281, -3.88726806640625, -64.19052124023438, 506.96856689453125, -22.01757049560547, 20.402677536010742, 249.63784790039062, 892.4174194335938, 99.42127227783203, 16.16131591796875, -80.23699951171875, 70.02763366699219, 260.85906982421875, 306.126708984375, 131.17404174804688, 135.27908325195312, -473.63922119140625, 102.65275573730469, 1005.7598876953125, 35.46923828125, 153.04229736328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000396.npy"}
{"epoch": 0.5814977973568282, "step": 397, "batch_size": 64, "mean": 291.03643798828125, "std": 300.57586669921875, "min": -300.70391845703125, "p10": -41.13445091247558, "median": 254.6646270751953, "p90": 698.1957153320313, "max": 1055.1917724609375, "pos_frac": 0.875, "sample": [20.70761489868164, 568.5009765625, 9.005157470703125, 369.4015197753906, 169.694091796875, 95.17887878417969, 261.9015197753906, 322.2669982910156, 493.08349609375, 178.4636688232422, 688.91162109375, -43.41328430175781, 641.8585815429688, -86.25518798828125, 145.2301025390625, 355.9087219238281, 210.43450927734375, -229.40029907226562, 461.9578857421875, 689.0458984375, 266.02313232421875, 753.6655883789062, 333.08087158203125, 447.62481689453125, 1055.1917724609375, 4.7892303466796875, 722.4332275390625, 454.5137939453125, -123.50595092773438, 132.16954040527344, 561.937744140625, 287.95355224609375, 116.58163452148438, 883.5802001953125, 448.7090148925781, 177.9840087890625, 67.79679870605469, 339.42877197265625, 205.2813720703125, 172.09747314453125, -300.70391845703125, -164.07833862304688, 43.36736297607422, -228.72933959960938, 531.029052734375, 163.2978057861328, 18.83899688720703, 133.97451782226562, -35.81717300415039, 533.3922119140625, 37.50400924682617, 702.1170654296875, 184.43411254882812, 1030.738037109375, 149.23983764648438, 879.4898681640625, 61.81785583496094, 247.427734375, 521.65478515625, 151.3343505859375, 294.9918212890625, 322.060302734375, 415.501220703125, 303.630615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000397.npy"}
{"epoch": 0.5829662261380323, "step": 398, "batch_size": 64, "mean": 261.36553955078125, "std": 347.5559387207031, "min": -360.8876953125, "p10": -103.1951965332031, "median": 158.23501586914062, "p90": 638.6141357421875, "max": 1333.5487060546875, "pos_frac": 0.765625, "sample": [151.625732421875, 90.33967590332031, 433.4425354003906, 696.8462524414062, -123.14781188964844, 57.213294982910156, 297.96502685546875, -334.62713623046875, 91.83793640136719, -110.45330047607422, 570.2716674804688, -360.8876953125, 632.2337646484375, 336.16180419921875, 463.0249328613281, 1039.208984375, 164.9073028564453, 28.28185272216797, 626.106201171875, 312.80010986328125, 567.2985229492188, 146.40682983398438, 529.760498046875, 475.7884826660156, 1183.4969482421875, -12.94268798828125, 175.799560546875, 144.36680603027344, 635.456787109375, 417.32330322265625, 69.71112060546875, 193.57504272460938, -113.25201416015625, -67.80192565917969, 331.8273010253906, 102.86152648925781, -5.637367248535156, 415.969970703125, 25.155128479003906, 447.86572265625, 141.71934509277344, 448.9021911621094, 138.4049072265625, 69.51154327392578, 135.97988891601562, -51.49585723876953, 408.46240234375, 10.169647216796875, -64.64497375488281, 58.065399169921875, 164.84429931640625, 1333.5487060546875, 639.96728515625, 794.1593017578125, -332.0350036621094, 544.953125, 446.12701416015625, 508.4321594238281, 842.2601318359375, 149.9373779296875, -52.43804931640625, -28.633056640625, -86.2596206665039, -218.72412109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000398.npy"}
{"epoch": 0.5844346549192364, "step": 399, "batch_size": 64, "mean": 234.83462524414062, "std": 333.4617614746094, "min": -433.8165283203125, "p10": -203.25854492187494, "median": 199.69983673095703, "p90": 643.2350463867189, "max": 1081.234130859375, "pos_frac": 0.75, "sample": [130.82443237304688, 291.251708984375, 93.09716033935547, -276.347900390625, 623.6239013671875, -73.59261322021484, 650.877685546875, 59.04716110229492, 147.52706909179688, -41.8973388671875, 281.3778991699219, -339.3339538574219, 708.2813110351562, -54.93609619140625, 625.4022216796875, 186.22096252441406, 333.0123291015625, -137.38153076171875, -21.1601619720459, 67.25961303710938, 703.59814453125, 140.58932495117188, 382.6221923828125, 228.01869201660156, 573.9860229492188, 798.446533203125, 198.60708618164062, 414.79559326171875, -301.9312744140625, 253.10702514648438, 23.383132934570312, 485.0942077636719, 187.29916381835938, 161.7119598388672, 423.0662536621094, -228.0008544921875, 1081.234130859375, 530.8145751953125, 581.1339721679688, -282.09881591796875, 321.6894226074219, 480.0764465332031, 190.25184631347656, -433.8165283203125, -54.342708587646484, 426.791015625, 36.004608154296875, 930.1058349609375, 55.32744598388672, -145.5264892578125, 446.4872741699219, 938.0762329101562, 488.10455322265625, 200.79258728027344, 559.3641967773438, 201.4669647216797, 186.36898803710938, 236.17617797851562, -360.4336853027344, 281.4293518066406, 469.2156982421875, -80.19093322753906, 162.62521362304688, -115.25817108154297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000399.npy"}
{"epoch": 0.5859030837004405, "step": 400, "batch_size": 64, "mean": 333.2471008300781, "std": 307.8584899902344, "min": -553.0962524414062, "p10": -45.20373001098633, "median": 318.1609344482422, "p90": 691.7175659179687, "max": 1122.758544921875, "pos_frac": 0.84375, "sample": [264.34478759765625, 469.9415283203125, 413.78955078125, 248.77761840820312, 573.9605712890625, -113.49929809570312, 510.51568603515625, 224.58428955078125, -42.74071502685547, 577.1532592773438, 200.40121459960938, 94.83600616455078, 506.7387390136719, 279.841552734375, -553.0962524414062, 718.5036010742188, 658.492919921875, 184.08750915527344, 406.1177978515625, 411.39520263671875, 824.3449096679688, 519.4637451171875, 3.067535400390625, 368.5710144042969, 72.91735076904297, 320.09539794921875, 113.63444519042969, 556.90087890625, 310.742919921875, 397.5587158203125, 377.4891662597656, 219.04962158203125, -93.66471099853516, 416.9796142578125, 224.95242309570312, -259.307373046875, -8.154273986816406, 764.4022216796875, 316.2264709472656, 550.2304077148438, -42.14277648925781, 214.2261962890625, 303.8043212890625, 975.8126220703125, 31.033388137817383, 637.2333374023438, 294.9900207519531, 197.2807159423828, 19.947101593017578, 1122.758544921875, 674.80615234375, -46.259307861328125, 222.28448486328125, 467.6524658203125, 322.634765625, 654.4340209960938, -211.3732452392578, 694.2008056640625, 655.3745727539062, 425.232421875, 295.91754150390625, 685.92333984375, 750.159912109375, -47.76280212402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000400.npy"}
{"epoch": 0.5873715124816447, "step": 401, "batch_size": 64, "mean": 190.42982482910156, "std": 398.49053955078125, "min": -878.6638793945312, "p10": -247.5817749023437, "median": 154.94528198242188, "p90": 769.1032775878908, "max": 1126.476806640625, "pos_frac": 0.6875, "sample": [156.1666717529297, 514.6387939453125, -74.05412292480469, 224.6944580078125, 531.4614868164062, 76.50618743896484, -335.8830871582031, -379.71600341796875, -878.6638793945312, -67.98155212402344, 99.28887176513672, 215.22250366210938, -207.42901611328125, 73.5391616821289, -28.265533447265625, -39.74341583251953, 334.516357421875, 286.3454895019531, 163.37832641601562, 13.881729125976562, -105.57803344726562, -190.0723114013672, -77.54206085205078, 1026.965576171875, 28.06122398376465, 690.9221801757812, 1002.4436645507812, 467.47235107421875, 795.3634033203125, 153.72389221191406, 84.07015991210938, 280.6824951171875, -29.66353988647461, 53.40717315673828, -117.4070053100586, -512.0767822265625, 976.920166015625, -63.35493087768555, 220.33425903320312, 567.6861572265625, 192.0840301513672, 528.9927978515625, 368.78326416015625, -65.5208740234375, 299.472900390625, 191.67010498046875, 187.07630920410156, -264.79010009765625, 1126.476806640625, -522.0569458007812, 648.72998046875, 479.70703125, 344.0799255371094, 78.42211151123047, 867.797607421875, 871.4588623046875, 45.178627014160156, 375.7969055175781, 4.84820556640625, 64.732421875, 207.25433349609375, 707.8296508789062, -279.8056945800781, -200.9703826904297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000401.npy"}
{"epoch": 0.5888399412628488, "step": 402, "batch_size": 64, "mean": 350.07647705078125, "std": 310.92864990234375, "min": -286.915283203125, "p10": 13.901937103271486, "median": 291.2509307861328, "p90": 781.9913330078126, "max": 1378.5826416015625, "pos_frac": 0.90625, "sample": [413.14605712890625, 27.525007247924805, 13.303916931152344, 470.03997802734375, 225.17123413085938, 437.950439453125, -6.7924346923828125, 305.5844421386719, -51.553428649902344, 655.320556640625, 384.76153564453125, 581.5332641601562, 658.25927734375, 200.1415557861328, -58.79718017578125, 596.655517578125, 35.22581481933594, 295.3526916503906, 240.78086853027344, 432.3703308105469, 560.3001098632812, 208.37490844726562, 573.3370971679688, -286.915283203125, 377.6435546875, 545.9636840820312, 71.2808837890625, 942.214599609375, 15.297317504882812, 924.0960693359375, 222.62371826171875, 344.044921875, 175.1098175048828, 506.38629150390625, 89.98310089111328, 275.2429504394531, 905.4481201171875, 133.24142456054688, 155.83226013183594, 692.8585205078125, 27.974075317382812, -68.90318298339844, 791.330810546875, 922.21923828125, 259.7587585449219, 473.6138000488281, 760.19921875, 150.45790100097656, 286.7706604003906, 1378.5826416015625, 427.15234375, 589.39501953125, 25.348356246948242, 881.1107177734375, 250.7064208984375, 302.748046875, 336.8082275390625, 283.2319030761719, 124.42941284179688, -181.44979858398438, 383.03765869140625, 194.38970947265625, 230.489501953125, 287.149169921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000402.npy"}
{"epoch": 0.5903083700440529, "step": 403, "batch_size": 64, "mean": 271.0248107910156, "std": 309.0369873046875, "min": -350.03155517578125, "p10": -115.38826980590818, "median": 244.04527282714844, "p90": 674.981463623047, "max": 1226.6429443359375, "pos_frac": 0.84375, "sample": [245.92385864257812, 74.65848541259766, 88.77985382080078, 192.49560546875, 310.5081481933594, 36.3416633605957, 242.16668701171875, 373.30987548828125, 2.8540706634521484, -244.2008514404297, 182.40057373046875, -9.092010498046875, -350.03155517578125, 582.267578125, 153.89048767089844, 890.7431640625, 257.12127685546875, 39.833961486816406, 408.826416015625, 225.04310607910156, 421.45623779296875, 81.55484008789062, 719.623779296875, -88.21558380126953, -127.68233489990234, 565.2658081054688, 476.40545654296875, 697.0963745117188, 283.3726501464844, 902.6312255859375, -127.03370666503906, 487.8942565917969, 623.3800048828125, 61.573665618896484, 11.272209167480469, 495.2685852050781, -150.05941772460938, 262.9589538574219, 619.5907592773438, 501.68011474609375, -171.98643493652344, 289.7355041503906, 1226.6429443359375, 16.382619857788086, 564.5010375976562, 136.45823669433594, 387.0745544433594, 758.6612548828125, 403.00799560546875, 910.02978515625, 319.9859313964844, 444.52081298828125, 362.1622009277344, 129.92576599121094, -34.92950439453125, 36.50019836425781, 35.142234802246094, 350.0599365234375, 46.47087097167969, 94.31657409667969, 350.5151062011719, 227.17886352539062, -127.81564331054688, 169.17184448242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000403.npy"}
{"epoch": 0.591776798825257, "step": 404, "batch_size": 64, "mean": 267.9746398925781, "std": 372.95635986328125, "min": -550.493408203125, "p10": -222.3148452758789, "median": 274.17657470703125, "p90": 732.0711730957031, "max": 1328.7203369140625, "pos_frac": 0.765625, "sample": [-287.34698486328125, 294.48681640625, 65.50267791748047, 622.6806640625, 554.3267211914062, 187.54476928710938, 216.1732940673828, 1024.1942138671875, 175.903564453125, 819.36181640625, 1328.7203369140625, 242.77342224121094, 314.09051513671875, 860.840087890625, -108.04926300048828, -378.179443359375, 902.1763305664062, 110.93157196044922, -298.07550048828125, 146.1975555419922, -54.43320083618164, -28.084678649902344, -85.93818664550781, 430.1328125, 265.9083251953125, 316.8669128417969, -198.13677978515625, 457.03741455078125, 282.44482421875, 365.4678039550781, 528.4534912109375, -274.9346618652344, 411.4967041015625, 112.44364166259766, 615.917724609375, 109.4330062866211, -187.7212371826172, 175.40977478027344, 333.3555908203125, 508.37646484375, -550.493408203125, 436.04351806640625, 93.88374328613281, 664.8273315429688, 242.14974975585938, -300.383056640625, -217.2865753173828, 520.8378295898438, 611.8355712890625, -122.27412414550781, 412.9123229980469, 3.4461288452148438, 409.2147216796875, 410.0732421875, 525.3634033203125, 734.0941772460938, 313.8700866699219, 83.15986633300781, 23.897924423217773, -224.46981811523438, 932.0772094726562, 158.31884765625, 727.350830078125, 384.179443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000404.npy"}
{"epoch": 0.593245227606461, "step": 405, "batch_size": 64, "mean": 220.505615234375, "std": 387.6534118652344, "min": -513.6572265625, "p10": -265.24851226806635, "median": 176.62430572509766, "p90": 810.3806945800783, "max": 1164.2232666015625, "pos_frac": 0.71875, "sample": [840.9589233398438, 829.7310791015625, 232.60385131835938, -218.9597625732422, -464.9722900390625, -43.727142333984375, -431.380126953125, 95.70773315429688, 398.96142578125, -208.326416015625, 51.88860321044922, 1126.85302734375, 546.44091796875, 125.12472534179688, 87.11522674560547, 1164.2232666015625, -46.5892333984375, -50.491920471191406, 43.12493896484375, -156.020751953125, 116.4321060180664, 236.12220764160156, 114.91325378417969, 542.5643920898438, 176.24563598632812, 180.49017333984375, -154.07789611816406, 580.89599609375, -334.350341796875, -44.704856872558594, 419.1943664550781, 5.791387557983398, 448.69744873046875, 53.95911407470703, 415.00885009765625, 138.614501953125, 276.7876281738281, -320.38629150390625, 38.055641174316406, 42.20230484008789, 561.0447387695312, 193.51077270507812, 686.506103515625, 350.4693298339844, -55.04452896118164, 182.83840942382812, 616.031494140625, -209.64447021484375, 247.28115844726562, 302.674072265625, 390.43914794921875, -285.0865478515625, 177.0029754638672, 915.714111328125, 75.51932525634766, -513.6572265625, 499.833740234375, 329.9866638183594, -57.22320556640625, 969.337890625, 880.8406982421875, -297.18829345703125, 531.2208251953125, 765.2297973632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000405.npy"}
{"epoch": 0.5947136563876652, "step": 406, "batch_size": 64, "mean": 287.32806396484375, "std": 351.7142333984375, "min": -378.40216064453125, "p10": -73.98970794677734, "median": 215.78164672851562, "p90": 736.6491516113282, "max": 1270.327880859375, "pos_frac": 0.78125, "sample": [486.55230712890625, 146.3798828125, 275.2457580566406, 76.61575317382812, 146.98974609375, 277.02593994140625, 168.74105834960938, -371.1181640625, 1192.0693359375, 153.8802490234375, 527.738037109375, 204.4186248779297, 1270.327880859375, 563.4049072265625, 284.60699462890625, -51.5375862121582, 551.3724365234375, 309.3091735839844, 566.2672119140625, -136.8605194091797, 900.49169921875, 360.1121520996094, 343.27557373046875, -107.60279846191406, 109.9540786743164, 174.0177764892578, 10.80902099609375, -74.13177490234375, 515.6406860351562, 10.087799072265625, -73.65821838378906, 222.77781677246094, 512.9388427734375, -294.73681640625, -176.32171630859375, 469.17535400390625, 888.841064453125, 215.8262939453125, 150.7374725341797, -12.18482780456543, 161.19281005859375, 205.61642456054688, 476.1393127441406, -35.162960052490234, -67.75942993164062, 17.742591857910156, 710.0953369140625, 351.3290100097656, 34.62937927246094, 215.73699951171875, 636.6580200195312, 693.2188720703125, 480.64776611328125, 63.34452819824219, -378.40216064453125, -0.15331649780273438, 804.2437133789062, 520.8425903320312, 1067.3475341796875, 420.722412109375, 353.39495849609375, 748.0293579101562, 134.84397888183594, -12.781944274902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000406.npy"}
{"epoch": 0.5961820851688693, "step": 407, "batch_size": 64, "mean": 177.00746154785156, "std": 330.871337890625, "min": -879.1386108398438, "p10": -249.5128616333008, "median": 155.22250366210938, "p90": 581.2775146484375, "max": 840.6063842773438, "pos_frac": 0.71875, "sample": [-309.63287353515625, 537.818603515625, 572.6851806640625, -338.6717834472656, -268.3294677734375, -93.31368255615234, 240.8921356201172, -252.98104858398438, -261.5535583496094, 260.859375, 402.9456787109375, 106.70256042480469, 380.90582275390625, 776.760986328125, 626.0592041015625, 38.05281066894531, -97.37612915039062, 474.11639404296875, 103.37818145751953, -106.53843688964844, 429.2570495605469, 131.27587890625, 71.35865783691406, 116.29739379882812, 86.15879821777344, 491.9908447265625, 251.59567260742188, 240.5286865234375, 100.17298889160156, 141.1291046142578, -225.86660766601562, 203.50607299804688, 13.099578857421875, -98.31168365478516, 47.484886169433594, 138.40480041503906, 488.5469665527344, 805.5657958984375, 95.46064758300781, -879.1386108398438, 736.3140258789062, 368.30364990234375, 392.50140380859375, -33.280296325683594, 415.033935546875, 188.5054168701172, 183.8087158203125, 840.6063842773438, 328.2004699707031, 353.7787170410156, 574.6029052734375, -187.8848876953125, -93.74609375, 584.1380615234375, -28.54230499267578, 123.1031723022461, 169.31590270996094, 489.1383361816406, -89.42976379394531, -410.392333984375, 677.1944580078125, 239.6044464111328, -241.42042541503906, 307.7260437011719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000407.npy"}
{"epoch": 0.5976505139500734, "step": 408, "batch_size": 64, "mean": 205.55117797851562, "std": 269.80572509765625, "min": -282.23492431640625, "p10": -147.2392837524414, "median": 221.0392837524414, "p90": 597.2308349609377, "max": 801.7899169921875, "pos_frac": 0.75, "sample": [36.02021789550781, 694.9895629882812, 801.7899169921875, -0.291015625, 406.8426513671875, 361.32525634765625, -63.26708984375, 180.75978088378906, -93.464599609375, -160.1418914794922, -24.22230339050293, -273.6051025390625, 110.7087173461914, -67.754150390625, 16.873512268066406, -282.23492431640625, 126.41294860839844, 278.9280090332031, 241.51077270507812, 78.14811706542969, 179.6875762939453, 354.87908935546875, -112.311279296875, 419.3454895019531, -138.8547821044922, 235.37063598632812, -9.607950210571289, 249.57901000976562, 78.91890716552734, 733.1842651367188, 403.054931640625, 235.27188110351562, 521.7633056640625, -5.5869293212890625, -259.92425537109375, -234.48208618164062, 261.5597229003906, 276.6142578125, 59.59111022949219, 219.63735961914062, 545.9425048828125, 438.2306213378906, 369.1278076171875, 475.62469482421875, 165.5209503173828, -223.63723754882812, 682.845947265625, 34.798179626464844, 91.73912048339844, 82.99349975585938, 107.6988525390625, 661.9921264648438, 271.1365661621094, -150.8326416015625, 320.20257568359375, 329.9158630371094, 45.651405334472656, 461.6690673828125, 222.4412078857422, 282.80352783203125, 691.2996826171875, 619.2115478515625, 471.9222412109375, 319.95819091796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000408.npy"}
{"epoch": 0.5991189427312775, "step": 409, "batch_size": 64, "mean": 201.48634338378906, "std": 295.236572265625, "min": -427.4471130371094, "p10": -97.96447067260742, "median": 152.90426635742188, "p90": 549.7907165527344, "max": 1020.2586669921875, "pos_frac": 0.75, "sample": [-39.152496337890625, -368.25567626953125, 314.3778381347656, 224.45436096191406, 198.84703063964844, 350.6834716796875, -98.02057647705078, -97.83355712890625, -57.957122802734375, 23.146163940429688, 517.9061889648438, 547.9171142578125, -335.5121154785156, 159.90609741210938, 454.00238037109375, 133.47085571289062, 206.00369262695312, 267.1385803222656, 145.90243530273438, 812.572265625, 312.665283203125, -167.66421508789062, 283.64208984375, 26.698162078857422, 694.2183227539062, -112.35692596435547, -7.673377990722656, 51.96290588378906, -83.56295776367188, 142.1759490966797, 426.80157470703125, 771.1817626953125, 517.1396484375, -427.4471130371094, 58.60254669189453, 1020.2586669921875, -13.502647399902344, 97.41604614257812, -348.1221923828125, 141.3125, 536.6618041992188, -46.903038024902344, 550.5936889648438, 97.54823303222656, 259.2742919921875, 230.00830078125, 80.2065200805664, 133.6182403564453, 757.0858154296875, 511.19635009765625, -36.54840850830078, 189.3552703857422, -56.10084533691406, 176.6197967529297, 211.2155303955078, 452.6468505859375, 362.8457336425781, 319.75634765625, 52.405303955078125, 76.17301940917969, 754.0859375, 276.5916748046875, 130.059326171875, 133.3875732421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000409.npy"}
{"epoch": 0.6005873715124816, "step": 410, "batch_size": 64, "mean": 247.55059814453125, "std": 306.2297058105469, "min": -579.2340087890625, "p10": -110.66102981567381, "median": 272.3592071533203, "p90": 635.5991455078126, "max": 896.58251953125, "pos_frac": 0.8125, "sample": [188.92633056640625, 318.46051025390625, -114.07847595214844, 79.92716217041016, 552.3150634765625, 345.8554992675781, 528.7701416015625, 475.8417053222656, 499.0319519042969, 8.080389022827148, 124.25013732910156, 281.77850341796875, 12.060836791992188, 454.10931396484375, -102.6869888305664, -191.794677734375, 647.1859130859375, -451.31475830078125, -94.02908325195312, 349.25628662109375, 486.642333984375, 769.2391357421875, -116.78721618652344, 41.60957336425781, 600.1861572265625, 697.1895751953125, 512.2190551757812, 33.718196868896484, 304.02337646484375, 896.58251953125, 309.2027587890625, 31.38143539428711, 346.3968505859375, 37.047821044921875, 402.7340087890625, -84.26773071289062, -51.15515899658203, -13.207174301147461, 134.453369140625, -135.03497314453125, 216.3352508544922, 549.547607421875, 163.70318603515625, 379.51446533203125, 201.27725219726562, -579.2340087890625, 379.3644104003906, 245.79554748535156, 825.5021362304688, 341.7490234375, 107.18600463867188, 31.754430770874023, 477.64434814453125, 678.8931884765625, 353.27752685546875, 756.384765625, 150.35060119628906, 364.0002746582031, -374.6868896484375, 164.02879333496094, 262.9399108886719, 39.170188903808594, 608.5633544921875, 386.0577087402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000410.npy"}
{"epoch": 0.6020558002936858, "step": 411, "batch_size": 64, "mean": 234.196533203125, "std": 351.9522399902344, "min": -575.6903686523438, "p10": -120.88887786865232, "median": 162.95513916015625, "p90": 715.8546752929689, "max": 1392.0137939453125, "pos_frac": 0.75, "sample": [245.20208740234375, -7.225227355957031, -135.12802124023438, 119.82576751708984, 678.6229248046875, 106.48882293701172, 94.16891479492188, 322.01373291015625, 355.77069091796875, 402.127685546875, 134.36581420898438, -22.241342544555664, 154.1796875, -8.973751068115234, 19.790319442749023, -2.436830520629883, 538.310546875, 269.50787353515625, 85.44305419921875, 418.7723693847656, 877.7608032226562, 364.31976318359375, -244.551025390625, -22.558584213256836, 171.89022827148438, 157.41075134277344, 55.19789123535156, 186.25804138183594, 112.68966674804688, 1074.2210693359375, 144.68093872070312, 446.7810363769531, 861.0170288085938, -73.91454315185547, 516.4921264648438, 39.70771026611328, 312.72003173828125, 691.3140869140625, 765.3696899414062, 179.91839599609375, 10.827901840209961, 366.0824890136719, 114.70478057861328, 273.25177001953125, 152.0394287109375, 314.4179992675781, -57.10053634643555, -575.6903686523438, 1392.0137939453125, 270.4627380371094, 168.49952697753906, 361.6409606933594, 632.9580078125, 726.3720703125, 661.5037231445312, -261.67431640625, -29.52649688720703, -508.55535888671875, 182.64755249023438, -99.10140991210938, 780.8028564453125, -143.36895751953125, 0.2870941162109375, -130.2263641357422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000411.npy"}
{"epoch": 0.6035242290748899, "step": 412, "batch_size": 64, "mean": 250.42259216308594, "std": 323.71832275390625, "min": -524.0042114257812, "p10": -120.01763534545896, "median": 212.79204559326172, "p90": 638.8585632324221, "max": 1237.79638671875, "pos_frac": 0.796875, "sample": [-148.03524780273438, 183.47174072265625, 459.7098388671875, 463.30987548828125, 403.32513427734375, -94.60655975341797, 692.1907348632812, -34.88086700439453, 157.46890258789062, 539.1175537109375, 122.8829345703125, 356.0776672363281, 47.0369873046875, 371.90667724609375, 13.767011642456055, 546.7988891601562, 381.59954833984375, 84.9145278930664, -524.0042114257812, 202.89886474609375, 53.88434600830078, 14.915321350097656, 580.755615234375, 203.09767150878906, 82.01344299316406, 381.9548034667969, 10.984771728515625, 360.2105407714844, -177.77835083007812, 222.48641967773438, -81.52227020263672, 81.10474395751953, 243.89620971679688, 416.9413146972656, 119.55393981933594, -79.60392761230469, -329.3773193359375, -130.90809631347656, 136.35745239257812, 949.0408935546875, 13.42034912109375, -407.402099609375, 367.02069091796875, 606.0840454101562, 705.527587890625, -136.14613342285156, 521.8436889648438, 820.3369750976562, 136.8914794921875, 521.4332275390625, 355.9642639160156, -69.88088989257812, 115.46769714355469, 652.90478515625, 560.8133544921875, 1237.79638671875, 16.214488983154297, 353.57635498046875, 417.880126953125, -29.390260696411133, 459.4886474609375, 375.57098388671875, 654.6566162109375, 494.0160217285156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000412.npy"}
{"epoch": 0.604992657856094, "step": 413, "batch_size": 64, "mean": 239.5261688232422, "std": 316.60430908203125, "min": -765.2942504882812, "p10": -149.26843414306632, "median": 249.4036102294922, "p90": 598.4300598144531, "max": 934.4389038085938, "pos_frac": 0.828125, "sample": [48.68756103515625, -18.058300018310547, 224.143798828125, 387.62664794921875, 192.5804443359375, 119.41722106933594, 244.1177978515625, 247.318603515625, 80.24063873291016, 186.97396850585938, 585.7133178710938, 912.4024658203125, 26.23455047607422, 222.8284912109375, -6.474021911621094, -16.73676872253418, 327.11859130859375, 131.34146118164062, -45.89183044433594, 824.7447509765625, 123.96578979492188, 482.3504638671875, 433.8413391113281, -765.2942504882812, 77.17416381835938, 298.1253662109375, 303.09014892578125, 143.97802734375, 555.90966796875, 934.4389038085938, 369.21649169921875, 326.9971008300781, 623.7142333984375, 331.89288330078125, -198.2115478515625, 489.2293701171875, 385.1421813964844, 158.81674194335938, 299.4366760253906, 328.37493896484375, 76.40013122558594, 176.22181701660156, 648.541015625, -327.741455078125, 597.6237182617188, 371.7431335449219, 263.1424255371094, -271.7390441894531, -282.146728515625, -630.7737426757812, 166.93101501464844, 463.517822265625, 439.11737060546875, 768.3782958984375, 28.392498016357422, 598.775634765625, 251.48861694335938, 541.705810546875, 272.27203369140625, 305.5804443359375, 83.486572265625, 214.2457733154297, 391.56597900390625, -193.57269287109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000413.npy"}
{"epoch": 0.6064610866372981, "step": 414, "batch_size": 64, "mean": 256.2596435546875, "std": 312.81951904296875, "min": -688.4190673828125, "p10": -52.44013118743894, "median": 241.35254669189453, "p90": 675.9684631347658, "max": 941.3126220703125, "pos_frac": 0.84375, "sample": [352.0544128417969, 334.7887268066406, 471.47589111328125, 219.72657775878906, 44.64377212524414, 913.3582153320312, 213.3922119140625, -688.4190673828125, 29.167770385742188, 187.43890380859375, 348.1656799316406, 296.0670166015625, 695.6154174804688, 869.4827880859375, 239.57949829101562, -23.18794059753418, 319.27850341796875, 93.61376953125, -124.85509490966797, 109.37348175048828, 201.35406494140625, 532.0279541015625, -80.81248474121094, 112.63558959960938, 470.31396484375, 321.87322998046875, -292.4339904785156, -31.899187088012695, 452.4482421875, -61.24339294433594, 563.5106201171875, 513.5771484375, 17.737323760986328, 251.48167419433594, 690.5813598632812, 56.351863861083984, 391.06011962890625, -24.974655151367188, -336.2279052734375, 198.66090393066406, 342.2835693359375, 267.543701171875, 641.8717041015625, -316.5856628417969, 83.07496643066406, 570.7058715820312, 30.769546508789062, 632.306396484375, 130.42160034179688, 25.489574432373047, 243.12559509277344, 83.81993103027344, 164.15032958984375, 117.7304916381836, 265.9228515625, 283.203125, 802.1571044921875, 15.520965576171875, 440.9766845703125, 878.1817016601562, 941.3126220703125, 430.2574462890625, 131.6695098876953, 347.9255676269531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000414.npy"}
{"epoch": 0.6079295154185022, "step": 415, "batch_size": 64, "mean": 198.34219360351562, "std": 301.28546142578125, "min": -863.8922729492188, "p10": -115.32838439941406, "median": 206.86713409423828, "p90": 610.0275024414063, "max": 944.673583984375, "pos_frac": 0.765625, "sample": [271.8617858886719, -104.2083740234375, 347.8663024902344, 772.1407470703125, -46.43998718261719, 779.8704833984375, 580.7261962890625, 164.8421173095703, -66.92205047607422, 199.9141387939453, 512.6166381835938, 365.2052307128906, 459.90130615234375, 417.32525634765625, -112.9088363647461, -127.59614562988281, 209.41122436523438, -247.097900390625, 270.68682861328125, 213.28611755371094, 6.8314056396484375, 54.870880126953125, 5.678436279296875, 624.4818115234375, -141.18283081054688, 143.15841674804688, 730.834228515625, -36.418800354003906, 118.11048126220703, 326.58697509765625, -56.18463134765625, 364.1530456542969, 248.82501220703125, 82.46994018554688, 944.673583984375, -249.2105255126953, -79.96516418457031, 130.8048095703125, 445.99493408203125, 662.4857177734375, 59.3785285949707, 332.40789794921875, 209.63714599609375, 80.8265380859375, 120.44512939453125, 204.3230438232422, -33.793067932128906, 281.06634521484375, 435.4549865722656, -116.3653335571289, 226.2042236328125, 424.9907531738281, 291.3531494140625, 10.31646728515625, 413.09283447265625, 12.63859748840332, 220.09744262695312, 1.023651123046875, 488.54119873046875, -261.6840515136719, 622.585205078125, 73.86427307128906, -863.8922729492188, 273.90924072265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000415.npy"}
{"epoch": 0.6093979441997063, "step": 416, "batch_size": 64, "mean": 310.9082946777344, "std": 290.04217529296875, "min": -151.33755493164062, "p10": -57.46387901306152, "median": 256.5294952392578, "p90": 708.8130310058594, "max": 978.2022705078125, "pos_frac": 0.828125, "sample": [835.8713989257812, 107.27411651611328, 148.673583984375, 415.59228515625, -94.97933197021484, 556.1322631835938, 265.3983154296875, 128.70382690429688, 521.5895385742188, 683.6105346679688, -116.99515533447266, -61.432037353515625, 67.01996612548828, 302.6018371582031, 247.66067504882812, 496.55145263671875, 396.2001647949219, -15.214576721191406, 579.543701171875, 166.3425750732422, 422.83050537109375, 523.2650756835938, 98.87672424316406, 171.54275512695312, 675.732666015625, 246.98367309570312, 331.7649841308594, 728.7103881835938, 92.43026733398438, 736.2247314453125, 188.58433532714844, 580.2691040039062, 978.2022705078125, -151.33755493164062, -141.42041015625, 69.24434661865234, 557.4235229492188, 495.07147216796875, 11.420219421386719, 857.05419921875, -8.245368957519531, 959.2382202148438, 268.8201904296875, 221.3469696044922, 500.19708251953125, 500.6453857421875, 315.8455810546875, 612.076904296875, 222.8304443359375, 70.10590362548828, 38.90000534057617, 214.73045349121094, -120.20733642578125, 698.6795654296875, 362.3332824707031, -54.460357666015625, 335.8922119140625, -58.751102447509766, 237.47396850585938, -47.85026550292969, 136.65206909179688, 228.47146606445312, 447.2310791015625, 713.1559448242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000416.npy"}
{"epoch": 0.6108663729809104, "step": 417, "batch_size": 64, "mean": 263.2933044433594, "std": 254.3087615966797, "min": -231.77117919921875, "p10": -74.10251541137694, "median": 215.9885711669922, "p90": 638.176580810547, "max": 891.605224609375, "pos_frac": 0.859375, "sample": [66.7718276977539, 22.904800415039062, 1.0103397369384766, 366.09210205078125, 38.53951644897461, 522.1260375976562, 158.8865509033203, 173.70809936523438, -155.10586547851562, 682.8075561523438, 27.47378921508789, 615.884033203125, 405.0926513671875, 115.447265625, 247.10130310058594, -83.25743103027344, -97.17900085449219, 652.1322631835938, 653.29052734375, 45.398406982421875, 571.9356689453125, 202.77188110351562, 747.3836059570312, 163.0491943359375, 572.3687133789062, 891.605224609375, 303.9395751953125, 241.02432250976562, 172.59439086914062, 320.7917785644531, 77.80216217041016, 138.83192443847656, -95.0114517211914, 134.3623046875, 338.3971862792969, 603.588623046875, 68.41238403320312, 331.54571533203125, 393.0782775878906, 208.6048126220703, 86.16911315917969, 369.6442565917969, 223.37232971191406, 336.58477783203125, -63.94673156738281, 298.0130615234375, 155.59820556640625, 144.87704467773438, 484.24005126953125, 306.78265380859375, 488.79449462890625, -231.77117919921875, 363.2831115722656, 367.8371887207031, -27.760093688964844, -161.21823120117188, 206.70855712890625, 179.2281494140625, 647.1839599609375, 471.5484619140625, 617.1593627929688, 160.94920349121094, 659.7467041015625, -78.45499420166016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000417.npy"}
{"epoch": 0.6123348017621145, "step": 418, "batch_size": 64, "mean": 271.077880859375, "std": 371.2981872558594, "min": -415.42596435546875, "p10": -136.8207565307617, "median": 203.96804809570312, "p90": 715.9693725585938, "max": 1316.34912109375, "pos_frac": 0.75, "sample": [221.42050170898438, -193.30979919433594, 226.55108642578125, -18.27017593383789, 143.90989685058594, 929.5897827148438, 63.75122833251953, 75.63751220703125, 719.759765625, 581.4028930664062, -1.03692626953125, -32.32470703125, 479.71319580078125, -73.59197235107422, 1136.933349609375, 560.522705078125, 334.92584228515625, -144.3129425048828, 478.3869323730469, 66.060546875, -252.82864379882812, 707.1251220703125, -119.3389892578125, 529.2965087890625, 226.677490234375, 74.23609924316406, 231.4664306640625, 554.2350463867188, 540.9415893554688, 186.51559448242188, 531.0504150390625, 67.10050964355469, 107.20386505126953, 867.87646484375, -415.42596435546875, -110.5354232788086, -185.19674682617188, 417.36517333984375, 261.94720458984375, 476.515380859375, 1316.34912109375, -54.98334503173828, 62.30401611328125, 138.04678344726562, 276.0491638183594, 171.77426147460938, 167.1201171875, 155.4679412841797, -310.19561767578125, -311.693359375, 365.15484619140625, 47.76399230957031, 581.5087890625, -82.45817565917969, 627.945556640625, 297.7057189941406, 650.3280029296875, 1082.778564453125, 121.29727935791016, 26.894908905029297, 604.4244384765625, 961.417236328125, 313.0086364746094, -110.96929931640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000418.npy"}
{"epoch": 0.6138032305433186, "step": 419, "batch_size": 64, "mean": 297.6322021484375, "std": 323.2002258300781, "min": -448.2620849609375, "p10": -62.80826721191404, "median": 286.07728576660156, "p90": 717.0528686523438, "max": 1004.8865966796875, "pos_frac": 0.84375, "sample": [803.2567749023438, -359.80108642578125, 335.01885986328125, 552.1209106445312, 85.35193634033203, 821.10595703125, -7.372524261474609, -32.89250564575195, 29.58642578125, 668.890625, 1004.8865966796875, 197.90597534179688, 312.6055603027344, 544.8826904296875, 450.73565673828125, 535.1370849609375, 197.4822998046875, 149.833984375, -448.2620849609375, 49.73969268798828, -103.81182098388672, 723.583984375, -239.3646697998047, 281.0312805175781, 548.1175537109375, -310.131591796875, 173.23052978515625, 150.4095001220703, 516.0828857421875, 858.6677856445312, 369.1147766113281, 102.4271240234375, 443.9801940917969, 291.123291015625, 458.700927734375, 118.25521087646484, 125.82020568847656, 66.22250366210938, 521.5941162109375, 340.01947021484375, 618.8572998046875, 185.995361328125, 701.8135986328125, 19.792461395263672, 81.10453033447266, 76.52091979980469, 32.551239013671875, -71.33950805664062, 665.1754150390625, 568.9600830078125, 601.809814453125, 429.12890625, 84.74346923828125, 980.375732421875, 430.232421875, 432.8476257324219, 469.80010986328125, 273.36663818359375, 126.05926513671875, 735.3886108398438, -214.1179656982422, -42.90203857421875, 160.51510620117188, 376.49627685546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000419.npy"}
{"epoch": 0.6152716593245228, "step": 420, "batch_size": 64, "mean": 179.68783569335938, "std": 392.33544921875, "min": -834.665771484375, "p10": -228.80961608886713, "median": 143.80525970458984, "p90": 592.3000610351562, "max": 1213.5389404296875, "pos_frac": 0.65625, "sample": [-185.828125, 170.17807006835938, 26.561119079589844, -26.58517837524414, 587.7742919921875, -76.40190887451172, -247.23025512695312, -20.757659912109375, 212.7397918701172, 590.6685791015625, -51.73829650878906, -42.93199920654297, -60.6408576965332, 183.29258728027344, -22.04949951171875, 104.07038116455078, 570.91357421875, 272.2748107910156, 80.97477722167969, 271.3369445800781, 21.703916549682617, 266.0101013183594, -149.39508056640625, 494.8864440917969, -82.06582641601562, 155.50930786132812, -22.162918090820312, 396.2822265625, 140.5794677734375, -102.73956298828125, 543.6307373046875, 477.641357421875, 441.167236328125, -248.8625946044922, 147.0310516357422, 224.37120056152344, -304.351318359375, 15.490821838378906, 704.5339965820312, 736.0072021484375, -34.687095642089844, -834.665771484375, 663.1586303710938, 519.796630859375, 592.999267578125, 147.71337890625, 502.3230895996094, 474.470703125, 365.100341796875, 335.7917175292969, 11.017038345336914, -533.0274658203125, 173.2612762451172, 32.51279067993164, -37.29753494262695, 1213.5389404296875, -745.1215209960938, 65.99006652832031, 125.17582702636719, 325.46826171875, 1198.5826416015625, -114.61791229248047, -336.6946716308594, 1197.34423828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000420.npy"}
{"epoch": 0.6167400881057269, "step": 421, "batch_size": 64, "mean": 204.13345336914062, "std": 308.9214782714844, "min": -894.52978515625, "p10": -158.94231338500973, "median": 258.5935821533203, "p90": 578.8029418945314, "max": 856.6429443359375, "pos_frac": 0.78125, "sample": [335.8915710449219, 88.02059936523438, 116.55608367919922, -117.42768096923828, 258.0636901855469, 471.0860595703125, 799.7211303710938, -25.16900634765625, 109.47270202636719, 83.89549255371094, 325.85382080078125, -10.12625503540039, 558.771484375, 649.5357666015625, 469.7071533203125, 638.7640380859375, 311.4921875, -226.7058563232422, 110.61951446533203, 147.97586059570312, 763.1885986328125, 466.464111328125, 159.11712646484375, -223.96926879882812, 234.10292053222656, 263.14483642578125, 411.49468994140625, 259.12347412109375, 36.141456604003906, 207.44801330566406, 558.9658203125, 334.7115173339844, 588.3483276367188, 285.50909423828125, 415.67877197265625, -519.0563354492188, 429.42333984375, 364.97265625, -396.3625793457031, 587.3045654296875, 338.07379150390625, -18.8314266204834, 308.46356201171875, 856.6429443359375, 273.7892150878906, 292.9276123046875, 139.3118133544922, -2.1058216094970703, 21.20530128479004, 426.629150390625, 98.72142028808594, 3.3199539184570312, 269.6951904296875, -894.52978515625, -215.6904754638672, 65.61485290527344, 10.77471923828125, -115.96572875976562, -92.1058349609375, 266.650390625, 245.61875915527344, -176.7342987060547, 283.3056335449219, 358.01019287109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000421.npy"}
{"epoch": 0.618208516886931, "step": 422, "batch_size": 64, "mean": 242.64328002929688, "std": 339.99072265625, "min": -720.4974365234375, "p10": -91.71028747558593, "median": 226.37308502197266, "p90": 646.4427612304688, "max": 1220.555908203125, "pos_frac": 0.78125, "sample": [286.66485595703125, 653.4700927734375, 812.8734741210938, 628.6075439453125, 347.80023193359375, -213.8470001220703, -560.2924194335938, 729.80322265625, -16.91005516052246, 1220.555908203125, -96.06056213378906, 166.13987731933594, 220.01268005371094, 48.51690673828125, 489.01220703125, 187.24472045898438, 70.47466278076172, -175.30844116210938, -33.630672454833984, 223.3037872314453, 51.3760986328125, 792.4459838867188, 334.8858642578125, 88.31369018554688, 413.90863037109375, 153.53811645507812, 186.31185913085938, 630.045654296875, 337.45159912109375, 615.117431640625, 764.0767211914062, 356.19415283203125, 417.5897521972656, 172.2633514404297, 310.5035705566406, 328.2825927734375, 328.09368896484375, -380.34246826171875, 286.7450256347656, 16.331146240234375, -39.75386428833008, 320.5122375488281, 229.4423828125, -720.4974365234375, -49.16958236694336, 562.5838623046875, 469.707763671875, 50.99922180175781, 706.49462890625, -59.02201843261719, 465.74884033203125, 206.7989959716797, 28.19085693359375, 105.87294006347656, 546.802490234375, 432.4952697753906, 285.40423583984375, -81.55964660644531, 460.0191955566406, 515.7611083984375, -58.146759033203125, 221.6717987060547, 180.67410278320312, -443.4249267578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000422.npy"}
{"epoch": 0.6196769456681351, "step": 423, "batch_size": 64, "mean": 269.53515625, "std": 315.45367431640625, "min": -395.78094482421875, "p10": -122.72834320068358, "median": 236.45469665527344, "p90": 731.1557006835939, "max": 1096.186767578125, "pos_frac": 0.796875, "sample": [264.0618896484375, 86.82659149169922, 188.3196563720703, 60.23797607421875, -136.00991821289062, -312.2400207519531, 791.1408081054688, 254.24154663085938, 622.6700439453125, 519.93505859375, 741.1305541992188, -129.65359497070312, 856.35791015625, 204.7579345703125, 503.0863952636719, -76.25970458984375, 821.38037109375, 854.9627075195312, 803.0806884765625, 707.8810424804688, -112.669677734375, -23.5726318359375, 385.497314453125, 181.38656616210938, 526.5179443359375, 283.72784423828125, -12.026128768920898, 39.830474853515625, -395.78094482421875, -165.07485961914062, 386.925048828125, -127.03919982910156, 284.8093566894531, 316.35675048828125, 626.9837646484375, 270.1678466796875, 4.221235275268555, 447.4378662109375, -143.7971954345703, 101.40200805664062, 350.9774475097656, 632.6176147460938, 233.30239868164062, 361.9143981933594, 1096.186767578125, 140.6947479248047, 360.9783020019531, 19.940185546875, 476.02587890625, 45.81471252441406, 64.25843811035156, 201.62229919433594, 472.96783447265625, 486.1483154296875, 239.60699462890625, 200.9539031982422, -58.3697509765625, 190.49667358398438, 175.95474243164062, -103.06759643554688, 608.3738403320312, 44.101966857910156, 134.25784301757812, 373.2811584472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000423.npy"}
{"epoch": 0.6211453744493393, "step": 424, "batch_size": 64, "mean": 250.56887817382812, "std": 352.5650634765625, "min": -361.3214111328125, "p10": -210.34189453124998, "median": 234.7029800415039, "p90": 700.8061828613282, "max": 1250.06591796875, "pos_frac": 0.78125, "sample": [-222.54214477539062, 338.8777770996094, 52.44303894042969, -142.32061767578125, -94.24365234375, 630.6431884765625, 412.6286315917969, 318.4145202636719, 103.61780548095703, -361.3214111328125, 281.6503601074219, 277.3232727050781, -75.18360900878906, -238.83982849121094, 238.44447326660156, 707.4198608398438, 457.7587585449219, 962.41796875, 131.45533752441406, 424.9835510253906, 535.5590209960938, 220.9104766845703, 714.9595947265625, 572.527587890625, 303.9883728027344, -126.75257110595703, 210.95352172851562, 22.343055725097656, -219.61001586914062, -188.71627807617188, 150.33226013183594, 1185.4278564453125, -271.7002258300781, 267.9373474121094, 351.58111572265625, -110.015380859375, 342.9617614746094, -248.35414123535156, 239.1640625, 80.55593872070312, -71.47777557373047, 121.62601470947266, -306.2066650390625, 34.374698638916016, 324.8292236328125, 348.6375427246094, 230.96148681640625, 87.25775909423828, 303.8656921386719, 1250.06591796875, 386.6893615722656, 685.374267578125, 411.8565368652344, 193.93130493164062, 377.94049072265625, 90.10578918457031, 864.5125122070312, 172.79299926757812, 178.06735229492188, 1154.8184814453125, 473.8558044433594, 250.3271942138672, 124.76924133300781, 109.7529296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000424.npy"}
{"epoch": 0.6226138032305433, "step": 425, "batch_size": 64, "mean": 187.0858612060547, "std": 332.3014831542969, "min": -805.51611328125, "p10": -143.3329605102539, "median": 198.3335723876953, "p90": 600.7332580566407, "max": 861.7115478515625, "pos_frac": 0.78125, "sample": [-119.47503662109375, 20.818769454956055, 377.5012512207031, -361.48919677734375, -805.51611328125, 213.26400756835938, 138.59437561035156, 302.8493347167969, 105.66232299804688, 249.66212463378906, -27.29625701904297, 539.7578125, -789.98583984375, -144.7097625732422, 190.25318908691406, 112.37274169921875, 137.58609008789062, 178.0115966796875, 605.8336791992188, 113.65306091308594, 291.82861328125, 145.46319580078125, -49.64370346069336, 206.41395568847656, 665.2314453125, 7.929756164550781, 525.8685302734375, 106.76509094238281, -97.99772644042969, 733.5400390625, 153.34860229492188, 588.832275390625, 652.2297973632812, -179.2561492919922, 168.4019317626953, 294.1004943847656, 634.8357543945312, 142.0110321044922, 17.67681884765625, 683.995849609375, -596.7979125976562, 390.18048095703125, 141.60987854003906, -392.5075988769531, 409.1110534667969, 382.75396728515625, 464.462158203125, 221.7850341796875, 6.1356353759765625, 345.88372802734375, 209.16143798828125, 259.24114990234375, -114.61964416503906, -140.12042236328125, 265.50579833984375, 377.9183349609375, 508.2755126953125, 861.7115478515625, 524.6410522460938, 271.4290771484375, 514.5905151367188, -97.4742660522461, 266.6914978027344, 165.0032958984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000425.npy"}
{"epoch": 0.6240822320117474, "step": 426, "batch_size": 64, "mean": 238.563720703125, "std": 307.7294616699219, "min": -578.775390625, "p10": -57.92626342773437, "median": 167.80438995361328, "p90": 684.3603149414064, "max": 844.4959716796875, "pos_frac": 0.796875, "sample": [269.80413818359375, 105.98285675048828, 101.93616485595703, 167.85397338867188, 160.52670288085938, 81.46583557128906, 94.2789306640625, 213.49961853027344, 699.4610595703125, 133.97647094726562, 167.7548065185547, 226.073974609375, -46.547122955322266, -32.40709686279297, 649.125244140625, -214.9139404296875, 38.74070739746094, 192.40328979492188, 491.9070129394531, 198.66571044921875, -53.357582092285156, 712.4862670898438, -153.06854248046875, 218.84249877929688, 155.0754852294922, 100.66363525390625, -164.7483673095703, 364.59429931640625, 778.736328125, 523.73486328125, 154.15196228027344, 604.4717407226562, 68.49536895751953, 363.7682800292969, 156.94837951660156, 801.4347534179688, 15.81793212890625, -299.6405029296875, 116.7990951538086, 446.0213623046875, -35.890716552734375, 321.47076416015625, 844.4959716796875, 12.009757995605469, 471.22650146484375, 45.137596130371094, 352.524169921875, 556.8663940429688, -40.52398681640625, 95.09538269042969, -334.879638671875, 308.4739074707031, -11.812274932861328, 456.9428405761719, 607.9674072265625, 288.37921142578125, -59.88426971435547, 477.082275390625, 831.58203125, -578.775390625, 621.1731567382812, 812.0177001953125, 68.75112915039062, 547.8335571289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000426.npy"}
{"epoch": 0.6255506607929515, "step": 427, "batch_size": 64, "mean": 189.3392333984375, "std": 290.7200927734375, "min": -375.8910827636719, "p10": -149.3357391357422, "median": 155.38015747070312, "p90": 471.47783508300796, "max": 1007.0493774414062, "pos_frac": 0.765625, "sample": [370.4399108886719, 38.19511032104492, 353.9605407714844, 399.13519287109375, 832.87841796875, 93.21269989013672, -56.72948455810547, 40.545082092285156, 416.2109069824219, 204.74819946289062, -18.020965576171875, 60.24385070800781, 133.80050659179688, 163.59881591796875, 87.36589050292969, 333.2333984375, 411.7373352050781, 869.4176025390625, 94.10285186767578, -213.69976806640625, 2.2264633178710938, 147.1614990234375, 351.544677734375, 312.2613220214844, 131.29995727539062, 270.851318359375, 380.22259521484375, 890.9035034179688, 40.69627380371094, 135.78431701660156, -238.45254516601562, 486.0047912597656, 437.58160400390625, -126.50305938720703, -44.77248001098633, 359.1181640625, 381.83843994140625, -57.827816009521484, 548.672607421875, 203.14669799804688, 218.7523193359375, 124.58604431152344, 106.92815399169922, 176.15122985839844, -373.7708740234375, 307.39666748046875, 382.71710205078125, 393.282958984375, -363.46966552734375, -279.3774108886719, 505.4437255859375, 253.1900634765625, 411.7676086425781, 19.94316864013672, 1007.0493774414062, -150.0076904296875, 61.55732727050781, 317.8071594238281, 368.3059997558594, 58.24140167236328, -147.76785278320312, -375.8910827636719, -28.041709899902344, -103.21830749511719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000427.npy"}
{"epoch": 0.6270190895741556, "step": 428, "batch_size": 64, "mean": 255.57199096679688, "std": 389.7872009277344, "min": -799.4129638671875, "p10": -175.10899200439448, "median": 250.8757781982422, "p90": 742.2724365234376, "max": 1193.638671875, "pos_frac": 0.734375, "sample": [-23.196636199951172, -230.46212768554688, 2.0543460845947266, 429.9582214355469, 856.088623046875, 377.6510009765625, 124.91642761230469, 610.5471801757812, 468.57568359375, 1193.638671875, -278.4408874511719, 97.15606689453125, 509.3649597167969, 204.72323608398438, 468.1255798339844, -53.93890380859375, -545.9903564453125, 782.1144409179688, 437.75482177734375, 595.9990234375, 503.753173828125, 399.3345947265625, -105.57028198242188, -30.95236587524414, 418.3186340332031, 799.4375, 344.6624755859375, 246.425537109375, -122.72323608398438, 340.59307861328125, -72.34484100341797, 367.32989501953125, 862.8880615234375, -100.43773651123047, 255.32601928710938, 54.02693176269531, 622.8900146484375, 468.433837890625, 29.764881134033203, 175.58563232421875, 103.10223388671875, 153.46258544921875, -46.51819610595703, -131.42684936523438, -799.4129638671875, -119.40016174316406, 72.14289855957031, 185.96627807617188, 1074.521240234375, 174.0731201171875, 454.4253845214844, 461.43731689453125, 650.9150390625, -685.3593139648438, 580.9317626953125, 284.78948974609375, 712.3501586914062, 751.9564819335938, 399.2374267578125, 156.7220916748047, 719.6763305664062, -193.8299102783203, 128.61167907714844, -215.1483154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000428.npy"}
{"epoch": 0.6284875183553598, "step": 429, "batch_size": 64, "mean": 206.82952880859375, "std": 359.9501953125, "min": -520.5164794921875, "p10": -242.40450592041014, "median": 204.4083251953125, "p90": 613.7321899414063, "max": 1107.6165771484375, "pos_frac": 0.734375, "sample": [38.672874450683594, 190.89266967773438, -431.73223876953125, 334.335693359375, -75.6586685180664, 599.70263671875, -114.23131561279297, 118.40645599365234, 143.92547607421875, 377.49761962890625, -461.07623291015625, 183.79580688476562, 94.26155853271484, 109.60503387451172, -251.53565979003906, 417.2943420410156, 127.02053833007812, -249.69606018066406, 848.354248046875, -245.3629913330078, 1107.6165771484375, 925.7019653320312, 0.8909912109375, 414.5664978027344, 265.94586181640625, 36.295413970947266, 330.49090576171875, 6.783916473388672, 645.208251953125, 617.821044921875, 276.30572509765625, 361.06414794921875, 407.6278076171875, -519.8951416015625, -120.1011734008789, -214.7854461669922, -162.09603881835938, 235.20680236816406, 258.21466064453125, -22.55110740661621, 543.66162109375, 412.6287841796875, 282.99493408203125, -18.768157958984375, 793.59912109375, 370.74432373046875, 578.1663208007812, -220.94436645507812, 163.14732360839844, -235.50137329101562, 217.92398071289062, 173.42323303222656, 536.0804443359375, 240.89678955078125, -77.97138214111328, -520.5164794921875, 12.142318725585938, 282.98077392578125, 68.6783218383789, 604.1915283203125, 420.0848083496094, 1066.227294921875, 520.9799194335938, 417.45660400390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000429.npy"}
{"epoch": 0.6299559471365639, "step": 430, "batch_size": 64, "mean": 206.62753295898438, "std": 287.2034912109375, "min": -392.3701171875, "p10": -103.9875419616699, "median": 173.93126678466797, "p90": 673.8929321289063, "max": 1039.86572265625, "pos_frac": 0.734375, "sample": [222.68075561523438, 197.73167419433594, -10.413047790527344, -31.45528793334961, 682.7952880859375, 393.8990173339844, 37.818328857421875, 268.1669921875, 182.54132080078125, 534.9597778320312, -11.46114730834961, -31.608131408691406, 77.36066436767578, -113.9125747680664, 184.2261505126953, 696.6712036132812, -282.533203125, 424.25177001953125, 76.83119201660156, 102.57620239257812, 142.5326690673828, 109.75040435791016, 164.5299530029297, 186.83468627929688, -41.82244110107422, 165.3212127685547, 408.77783203125, 381.7763366699219, -80.82913208007812, 192.34361267089844, 102.67695617675781, -161.00791931152344, 1039.86572265625, -18.915603637695312, 83.34085845947266, 679.5597534179688, -18.23175811767578, -392.3701171875, 21.4908504486084, 234.9667510986328, 418.0084228515625, 757.064453125, 423.6175231933594, 143.14178466796875, 258.5589904785156, 706.6851806640625, -178.50685119628906, -242.49163818359375, 585.3353271484375, 127.25318908691406, 94.56416320800781, -34.84305953979492, 428.30816650390625, 245.68028259277344, -26.488605499267578, 306.5384521484375, 357.2646484375, -232.70040893554688, 355.12615966796875, 817.0230712890625, 660.6703491210938, 220.48529052734375, 27.626808166503906, 204.52342224121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000430.npy"}
{"epoch": 0.631424375917768, "step": 431, "batch_size": 64, "mean": 297.0145568847656, "std": 320.4397888183594, "min": -440.065185546875, "p10": -94.56382064819334, "median": 264.4649353027344, "p90": 653.73466796875, "max": 1004.4764404296875, "pos_frac": 0.84375, "sample": [-263.49761962890625, 229.0268096923828, -196.43910217285156, 484.5334777832031, 249.41232299804688, 110.9877700805664, 825.638427734375, 158.3970947265625, 339.9207763671875, 533.7242431640625, 235.03570556640625, 455.8324890136719, 85.21566772460938, -70.9620361328125, -104.67887115478516, 124.18565368652344, 15.113250732421875, -440.065185546875, 497.1868896484375, -245.70591735839844, 656.7523193359375, 646.6934814453125, 1004.4764404296875, 1000.4146728515625, 141.46253967285156, 633.570556640625, 415.01287841796875, 399.3584289550781, 224.3223114013672, 392.28759765625, 732.5723876953125, 337.0937805175781, 294.30419921875, 98.03688049316406, 449.8458251953125, -403.2462158203125, 925.0050048828125, 8.026535034179688, 279.5175476074219, -66.77559661865234, 493.2283020019531, 235.8854522705078, 956.7603759765625, -12.289138793945312, 128.95753479003906, 220.13485717773438, 443.56573486328125, 21.75486183166504, 231.82733154296875, 346.6597900390625, 350.5804748535156, 575.701416015625, 627.6199340820312, 372.28411865234375, 530.469482421875, 432.217041015625, 206.539794921875, 102.01026153564453, -159.74493408203125, 233.24913024902344, 637.2258911132812, 170.87994384765625, 147.0607452392578, 524.7593383789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000431.npy"}
{"epoch": 0.6328928046989721, "step": 432, "batch_size": 64, "mean": 290.58984375, "std": 293.08392333984375, "min": -273.5431213378906, "p10": -14.13495712280272, "median": 247.97593688964844, "p90": 701.8621032714844, "max": 1317.9202880859375, "pos_frac": 0.875, "sample": [91.38752746582031, 213.40087890625, 265.36383056640625, 258.99847412109375, 212.5140838623047, 232.67820739746094, 109.17789459228516, 74.73616027832031, 222.60516357421875, 81.86146545410156, 49.567691802978516, 19.196788787841797, 577.658935546875, 438.52752685546875, -53.22591018676758, 1317.9202880859375, 691.5863647460938, 282.3153381347656, -1.5282440185546875, 178.03631591796875, 107.81057739257812, 116.37960815429688, 228.49185180664062, 46.924102783203125, 466.85174560546875, 285.07989501953125, 756.6013793945312, 210.8575897216797, 435.892822265625, 74.25738525390625, 253.769287109375, -19.53783416748047, -273.5431213378906, 286.5189208984375, -157.64425659179688, 635.4495239257812, 178.42430114746094, -135.0766143798828, 366.5148010253906, 97.51023864746094, 242.82745361328125, -110.09310913085938, 283.5086669921875, 499.7557373046875, 306.7465515136719, 401.8951721191406, 656.0388793945312, 428.0533447265625, 410.40020751953125, 391.64361572265625, 147.5566864013672, 520.717041015625, 47.356719970703125, 847.4330444335938, 253.12442016601562, 178.59564208984375, 731.9678955078125, 399.2531433105469, 676.8466796875, 839.21484375, -230.76803588867188, 729.4998168945312, 15.597999572753906, 706.2659912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000432.npy"}
{"epoch": 0.6343612334801763, "step": 433, "batch_size": 64, "mean": 188.51522827148438, "std": 337.96759033203125, "min": -881.205322265625, "p10": -172.2292907714844, "median": 219.57298278808594, "p90": 581.9626281738282, "max": 730.8955078125, "pos_frac": 0.75, "sample": [13.970916748046875, 511.0572509765625, 515.502197265625, 424.07586669921875, 408.0149230957031, -111.03919982910156, 341.55462646484375, -776.7498779296875, 544.7594604492188, 611.3289184570312, -881.205322265625, 10.368270874023438, 670.6236572265625, 393.3728942871094, 192.3787841796875, -98.36015319824219, 160.9612579345703, 350.70806884765625, 624.4944458007812, -767.3458862304688, -18.630592346191406, 571.0934448242188, 586.620849609375, 281.19525146484375, 152.75921630859375, 174.19981384277344, 75.82133483886719, 213.48159790039062, -70.65885925292969, 402.3205871582031, 79.7760009765625, 730.8955078125, -306.1537780761719, 423.64794921875, 327.47021484375, 444.91552734375, 500.7567138671875, -266.8402099609375, 297.6226806640625, 17.084022521972656, 647.2061767578125, 566.385986328125, 217.47964477539062, -115.28185272216797, 230.941650390625, 65.6402816772461, 340.3006286621094, -5.912498474121094, -170.7470245361328, -190.30084228515625, 592.9439086914062, 427.7851867675781, 221.66632080078125, 39.75838088989258, 109.704833984375, -172.8645477294922, -24.938438415527344, 386.9478454589844, -51.19744873046875, 7.548057556152344, 301.03558349609375, 426.455078125, 181.55421447753906, 277.0157470703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000433.npy"}
{"epoch": 0.6358296622613803, "step": 434, "batch_size": 64, "mean": 225.47265625, "std": 357.62359619140625, "min": -309.36431884765625, "p10": -99.6663459777832, "median": 165.14928436279297, "p90": 691.5034790039064, "max": 2019.46923828125, "pos_frac": 0.75, "sample": [495.9696350097656, 169.88856506347656, 2019.46923828125, 172.55604553222656, -148.62364196777344, 113.77371215820312, 188.98822021484375, 475.7235412597656, 34.29267120361328, -42.65238952636719, 252.93531799316406, -18.942550659179688, 95.5790023803711, 703.7735595703125, -25.699321746826172, 69.06436157226562, -88.29247283935547, -309.36431884765625, 452.9741516113281, 662.873291015625, 711.8331298828125, -189.98251342773438, 156.5860595703125, 223.7340087890625, 284.842041015625, 204.48464965820312, -28.34804344177246, -40.41387939453125, 245.1105194091797, 237.83206176757812, 82.29165649414062, 219.30801391601562, 402.296630859375, -84.50105285644531, 45.58473205566406, 405.199951171875, 116.92868041992188, 13.935182571411133, 174.15045166015625, 229.49404907226562, -38.073699951171875, 394.3860778808594, 569.8170166015625, 61.18684387207031, 160.41000366210938, 237.01217651367188, 388.9670104980469, 89.79853820800781, 119.8153305053711, 801.2999877929688, 281.6319580078125, -51.47589111328125, 809.4622802734375, -211.18482971191406, 120.19309997558594, 110.72830200195312, 797.09716796875, 196.09356689453125, -104.54086303710938, -216.23199462890625, 921.5592651367188, 517.9247436523438, 5.5838165283203125, -215.86256408691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000434.npy"}
{"epoch": 0.6372980910425844, "step": 435, "batch_size": 64, "mean": 245.1993408203125, "std": 311.724609375, "min": -642.1610107421875, "p10": -68.93781013488767, "median": 215.7286148071289, "p90": 637.5289062500001, "max": 1004.6622314453125, "pos_frac": 0.828125, "sample": [969.287353515625, 325.74658203125, -642.1610107421875, 330.7864990234375, 561.9165649414062, 180.61911010742188, 167.76541137695312, 390.59381103515625, 352.5667419433594, 152.09823608398438, 526.6511840820312, 30.814498901367188, 88.45616149902344, 286.6231384277344, -80.85011291503906, 580.7318725585938, 1004.6622314453125, 901.7537841796875, 234.4599609375, -92.21409606933594, -32.644622802734375, 6.873870849609375, -31.967117309570312, 177.76744079589844, -113.92352294921875, 131.93190002441406, 252.1055145263672, 208.42080688476562, -159.904052734375, 201.55999755859375, 262.3230895996094, 32.78892517089844, 122.5830078125, 480.7873840332031, 565.5068359375, 379.9054260253906, 342.8929748535156, -458.93927001953125, 421.7835388183594, -314.8530578613281, 228.53414916992188, 276.1452941894531, 646.4521484375, 7.3427581787109375, 93.01888275146484, -41.14243698120117, 616.7080078125, 549.0619506835938, 9.170413970947266, -17.610170364379883, 842.351806640625, 45.33122253417969, 299.11004638671875, 124.01754760742188, 197.73721313476562, 700.7958984375, 653.7499389648438, 255.41580200195312, 223.0364227294922, 546.8067016601562, 172.59593200683594, 2.4449920654296875, 335.65435791015625, 180.72201538085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000435.npy"}
{"epoch": 0.6387665198237885, "step": 436, "batch_size": 64, "mean": 220.5616912841797, "std": 290.15338134765625, "min": -578.1475219726562, "p10": -136.1795310974121, "median": 178.23751831054688, "p90": 566.2327697753907, "max": 908.6744384765625, "pos_frac": 0.796875, "sample": [-104.67491149902344, 175.2628631591797, -139.95712280273438, 88.7594223022461, 577.265625, 265.9139099121094, 363.7734680175781, 755.7425537109375, 562.6019897460938, -115.38143157958984, 73.64775085449219, 804.734619140625, -44.6103515625, -150.91981506347656, 296.26739501953125, 233.06777954101562, -157.45924377441406, 43.71234130859375, 895.2701416015625, -267.9715576171875, 451.30987548828125, 519.9169921875, 277.12005615234375, 370.74554443359375, 474.8962097167969, 111.4405517578125, 408.42718505859375, 567.788818359375, 64.62834167480469, 97.81794738769531, 908.6744384765625, 85.44348907470703, 535.7453002929688, 168.05397033691406, 287.25091552734375, 141.61544799804688, -31.017044067382812, -15.505271911621094, 259.98052978515625, 127.4107666015625, 367.33856201171875, 77.33267211914062, 228.11354064941406, 157.66806030273438, 243.02598571777344, 302.06512451171875, 242.52076721191406, -170.38189697265625, 181.21217346191406, 150.23883056640625, 283.4156188964844, 173.697509765625, 413.4731750488281, 556.4076538085938, 858.4790649414062, 4.0478363037109375, 115.96955108642578, 2.3323516845703125, -578.1475219726562, 268.23162841796875, -127.36515045166016, 102.35220336914062, 448.84185791015625, -151.71060180664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000436.npy"}
{"epoch": 0.6402349486049926, "step": 437, "batch_size": 64, "mean": 226.75625610351562, "std": 286.6820983886719, "min": -526.2520751953125, "p10": -87.55421829223633, "median": 206.86681365966797, "p90": 567.7707702636719, "max": 990.3386840820312, "pos_frac": 0.78125, "sample": [123.52711486816406, -298.4039306640625, 281.37359619140625, 549.9862670898438, 383.5453796386719, 3.414846420288086, 597.6718139648438, 173.73483276367188, 304.951904296875, -43.698333740234375, -526.2520751953125, 153.34774780273438, -276.6391906738281, 483.9843444824219, -25.27374267578125, 162.3505096435547, 209.74081420898438, -247.7161865234375, 146.14797973632812, 623.0459594726562, 70.44603729248047, 511.7767333984375, 237.18772888183594, -84.91466522216797, -136.68817138671875, 670.989501953125, 148.84939575195312, 111.82015228271484, 418.8471374511719, 437.75457763671875, 779.1260375976562, 437.29193115234375, 765.0364990234375, -97.20866394042969, 990.3386840820312, -88.68545532226562, -32.91771697998047, 3.252408981323242, 251.34890747070312, 178.82156372070312, 14.531639099121094, 13.045135498046875, 203.99281311035156, 283.865966796875, 403.713134765625, 122.11160278320312, 441.2350158691406, 467.4747009277344, 503.85821533203125, 537.0472412109375, 112.63539123535156, 486.4712219238281, 384.00238037109375, -45.874969482421875, -41.609642028808594, 575.3927001953125, 358.8702087402344, 376.15484619140625, -65.57878112792969, 269.151611328125, 52.017215728759766, 447.0046691894531, 257.1131896972656, 4.462532043457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000437.npy"}
{"epoch": 0.6417033773861968, "step": 438, "batch_size": 64, "mean": 261.35968017578125, "std": 291.9039001464844, "min": -420.16357421875, "p10": -55.73866271972656, "median": 248.4976806640625, "p90": 659.2117309570314, "max": 967.6296997070312, "pos_frac": 0.84375, "sample": [59.5440673828125, 49.220008850097656, 406.5566711425781, -111.40602111816406, 243.34213256835938, 128.06735229492188, 753.3958740234375, -6.654191970825195, 194.69393920898438, 14.83270263671875, 272.2636413574219, 424.15234375, 4.035671234130859, 164.93310546875, 406.9921875, 458.91876220703125, 285.14874267578125, 967.6296997070312, 76.28260803222656, 262.635009765625, 200.52532958984375, 580.8717651367188, 425.92840576171875, 642.2637329101562, 161.27401733398438, 257.84710693359375, 41.587379455566406, 291.9886169433594, -371.0447082519531, -53.85333251953125, -56.546661376953125, 42.13883972167969, 567.849609375, 694.8446655273438, 621.1756591796875, 368.2451477050781, 253.65322875976562, -420.16357421875, 539.1712036132812, 315.81182861328125, 688.54150390625, 156.3309326171875, -52.29555892944336, 733.8414916992188, -116.8023681640625, 205.35354614257812, 580.6563720703125, 397.5653991699219, 589.8814086914062, 109.09008026123047, 80.22930908203125, 283.4020080566406, 728.7869873046875, 98.06547546386719, -68.7291259765625, 192.769775390625, 156.827392578125, -413.40948486328125, 10.045158386230469, 589.526611328125, 185.31483459472656, 666.4751586914062, 285.485107421875, 481.9140930175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000438.npy"}
{"epoch": 0.6431718061674009, "step": 439, "batch_size": 64, "mean": 210.98980712890625, "std": 335.1656188964844, "min": -459.57623291015625, "p10": -139.1959106445312, "median": 141.97432708740234, "p90": 706.5486022949219, "max": 1071.1954345703125, "pos_frac": 0.734375, "sample": [221.11856079101562, 798.95458984375, 116.86784362792969, 343.40155029296875, 22.38543701171875, -102.30624389648438, 709.8272705078125, 56.13926696777344, 144.43968200683594, 94.87947845458984, -352.17169189453125, 746.0670166015625, 84.99031066894531, -19.50725555419922, -60.83123016357422, 274.0227966308594, 337.82989501953125, 576.7904052734375, 671.8612060546875, 181.46392822265625, 100.29582214355469, 972.023681640625, 255.58547973632812, -25.481658935546875, 203.1715087890625, -209.1583251953125, 93.78973388671875, -112.49702453613281, -3.3451385498046875, 23.54689598083496, 328.8151550292969, -193.06655883789062, 309.8017272949219, 516.2877197265625, 1071.1954345703125, 340.5938415527344, 554.7990112304688, 252.96420288085938, 87.87849426269531, 193.3729248046875, 139.50897216796875, -90.3205795288086, -432.11761474609375, -38.545562744140625, -13.302085876464844, 218.94671630859375, 132.3121337890625, 253.14393615722656, -323.245849609375, 575.56787109375, 698.8983764648438, 38.45951461791992, 214.2776641845703, 750.2157592773438, 108.45464324951172, -150.63829040527344, 478.1290283203125, 356.2835388183594, 59.161006927490234, 85.91090393066406, 1030.104736328125, 318.56915283203125, -459.57623291015625, -53.64640426635742], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000439.npy"}
{"epoch": 0.644640234948605, "step": 440, "batch_size": 64, "mean": 245.49815368652344, "std": 343.5228271484375, "min": -276.71246337890625, "p10": -100.7785430908203, "median": 149.77825927734375, "p90": 729.3162292480469, "max": 1438.187744140625, "pos_frac": 0.765625, "sample": [341.4913024902344, 345.25103759765625, 554.5869140625, 383.7547607421875, -90.73956298828125, 5.522148132324219, 636.1632690429688, 221.6181640625, 73.40306091308594, 829.48681640625, 73.10503387451172, -22.148345947265625, 261.3931884765625, 135.00540161132812, 726.2168579101562, -165.12283325195312, 145.63473510742188, -174.48768615722656, 33.430755615234375, 5.548118591308594, 150.68319702148438, 641.3997192382812, 71.59423828125, 59.45545196533203, 295.9984130859375, 198.11607360839844, -126.34569549560547, 326.22235107421875, 380.2740478515625, 187.15274047851562, 134.26156616210938, 774.5751342773438, -105.08096313476562, 265.43768310546875, 1.7063922882080078, -63.711517333984375, 1150.2720947265625, -258.4266052246094, 213.00015258789062, -4.569543838500977, 792.9334106445312, 238.55706787109375, 947.8253173828125, 553.9273071289062, 213.7308349609375, 105.9506607055664, -183.54531860351562, 69.50511169433594, 445.5444030761719, 544.7144165039062, 193.74017333984375, 730.64453125, -33.46527862548828, 107.3497085571289, 24.989070892333984, 536.6561279296875, 40.92628860473633, -40.77516174316406, -51.724342346191406, 148.87332153320312, 1438.187744140625, 570.6416625976562, -276.71246337890625, -17.720916748046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000440.npy"}
{"epoch": 0.6461086637298091, "step": 441, "batch_size": 64, "mean": 242.96311950683594, "std": 445.7601013183594, "min": -765.0991821289062, "p10": -241.3094573974609, "median": 233.01223754882812, "p90": 746.7207031250001, "max": 2014.7120361328125, "pos_frac": 0.6875, "sample": [327.47259521484375, 229.2400665283203, 281.8438720703125, 527.1697387695312, 485.0193786621094, 810.2904663085938, -60.455657958984375, 28.150970458984375, 133.41262817382812, -375.74310302734375, -183.2812957763672, -81.66255950927734, 727.1609497070312, 1076.5616455078125, 325.7325439453125, -479.2142028808594, 230.71131896972656, 685.8665771484375, 99.05084228515625, -28.996368408203125, 39.99052429199219, 112.40281677246094, -12.334564208984375, 795.8919677734375, 755.1034545898438, 55.398834228515625, 235.3131561279297, 393.09765625, 401.5242919921875, 2014.7120361328125, -10.006555557250977, -566.096923828125, 248.9150848388672, 572.252685546875, -448.74517822265625, -222.17333984375, 144.25933837890625, 255.4049072265625, 600.8411254882812, -453.9437561035156, -77.36904907226562, 520.82421875, -52.85618591308594, -765.0991821289062, 377.820068359375, 411.0501708984375, 417.90106201171875, 468.2348937988281, 360.5049133300781, -77.65908813476562, 184.3043975830078, -249.51065063476562, -66.791748046875, 447.3677978515625, 370.8175048828125, 64.2884521484375, 631.6604614257812, 362.07373046875, 589.8518676757812, 1185.452880859375, -19.688663482666016, -108.24358367919922, 131.83575439453125, 772.7320556640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000441.npy"}
{"epoch": 0.6475770925110133, "step": 442, "batch_size": 64, "mean": 295.62847900390625, "std": 396.7184143066406, "min": -515.55029296875, "p10": -72.33919715881345, "median": 167.91915130615234, "p90": 818.1462829589843, "max": 1262.7523193359375, "pos_frac": 0.78125, "sample": [-82.62984466552734, 169.17149353027344, 525.4805908203125, -11.8677978515625, 314.8663330078125, 146.27664184570312, -47.82421112060547, 202.56568908691406, 66.351318359375, 90.6365966796875, 27.440685272216797, 479.7967834472656, -26.13898468017578, 548.8713989257812, 370.4575500488281, 668.81640625, 1026.2926025390625, 429.04949951171875, -18.988616943359375, 164.93333435058594, 166.66680908203125, 531.8351440429688, 41.1719970703125, 269.8178405761719, -290.0799255371094, 214.06369018554688, 819.3482055664062, 314.49822998046875, 969.6766357421875, 248.39248657226562, 62.4671630859375, -515.55029296875, 148.18588256835938, -114.64732360839844, -7.499153137207031, 644.961181640625, -48.32768630981445, 495.2378845214844, 53.87755584716797, 1227.2294921875, -274.49237060546875, 387.67535400390625, 798.3040771484375, 33.218849182128906, 20.2530517578125, 1262.7523193359375, 815.341796875, 288.34832763671875, 675.9948120117188, 92.40804290771484, 97.5159912109375, -319.1745910644531, 15.96771240234375, 130.4723663330078, -1.6513214111328125, -234.15293884277344, 346.56536865234375, 428.40130615234375, 1208.826416015625, 645.5386352539062, 1163.1904296875, 772.7063598632812, 164.13099670410156, 127.19789123535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000442.npy"}
{"epoch": 0.6490455212922174, "step": 443, "batch_size": 64, "mean": 187.8720703125, "std": 431.3890686035156, "min": -966.292236328125, "p10": -243.0147674560547, "median": 167.51332092285156, "p90": 697.1250427246097, "max": 1541.1722412109375, "pos_frac": 0.75, "sample": [1238.8900146484375, 81.42505645751953, 899.0298461914062, 19.481185913085938, 294.83905029296875, 115.89988708496094, -102.47150421142578, -84.99272155761719, -429.33673095703125, 120.34485626220703, 69.98448181152344, 24.942276000976562, 229.4515838623047, 214.405517578125, 58.397857666015625, -728.7346801757812, 99.05763244628906, -132.70689392089844, -114.00789642333984, -52.191192626953125, 1224.8446044921875, 358.87481689453125, 613.3052978515625, 328.6365661621094, -682.2098388671875, -631.3081665039062, 180.37319946289062, 248.7248077392578, 733.0477905273438, -966.292236328125, 430.8844299316406, 368.595458984375, 261.7911376953125, 147.33580017089844, 109.80635070800781, -125.59882354736328, 133.9193115234375, 458.9910888671875, 492.454833984375, 189.56663513183594, -244.0602264404297, -40.84312438964844, 468.4988708496094, -311.9557189941406, 390.012451171875, 147.84376525878906, 287.75494384765625, 1541.1722412109375, 313.2720947265625, 110.74083709716797, 578.3115844726562, 103.95883178710938, 213.3142547607422, 263.7689208984375, -157.52903747558594, 319.3968200683594, -240.5753631591797, 234.81068420410156, 153.7227783203125, 160.09783935546875, 895.9053955078125, 767.8350830078125, 174.92880249023438, 195.97848510742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000443.npy"}
{"epoch": 0.6505139500734214, "step": 444, "batch_size": 64, "mean": 300.8608093261719, "std": 403.4767761230469, "min": -606.9053344726562, "p10": -168.52476501464838, "median": 289.3016815185547, "p90": 766.6613891601568, "max": 1545.5203857421875, "pos_frac": 0.765625, "sample": [325.31109619140625, -253.12677001953125, 95.98584747314453, 633.6517333984375, 14.458852767944336, 350.95013427734375, -220.62916564941406, 1467.54150390625, 861.4454345703125, -8.004444122314453, -194.87918090820312, 294.3590393066406, 226.75030517578125, 524.5790405273438, 622.4087524414062, 161.862548828125, 823.66552734375, 495.44384765625, -228.65719604492188, 151.54652404785156, -37.80699920654297, -1.5916080474853516, 384.7259216308594, 893.8651733398438, -4.93621826171875, 408.57958984375, 329.2405090332031, -36.63812255859375, 321.4371032714844, -427.5013427734375, -10.213708877563477, 1545.5203857421875, 314.3628234863281, 392.0971984863281, 329.9525146484375, 499.92486572265625, 1194.5048828125, 424.7248840332031, 550.2645263671875, 250.01657104492188, -107.0311279296875, 1107.370361328125, 583.935546875, 13.981651306152344, 600.8377075195312, 524.2498168945312, 591.9822387695312, 18.54007911682129, 211.55970764160156, 158.24163818359375, 284.24432373046875, 217.0763702392578, 312.4319152832031, 203.83248901367188, 401.26611328125, 252.06103515625, 12.850217819213867, -25.000198364257812, -285.9148254394531, 37.036956787109375, 618.7425537109375, -606.9053344726562, 204.31094360351562, 460.19854736328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000444.npy"}
{"epoch": 0.6519823788546255, "step": 445, "batch_size": 64, "mean": 280.84063720703125, "std": 363.599609375, "min": -457.11383056640625, "p10": -126.38410186767574, "median": 218.09463500976562, "p90": 712.2947937011719, "max": 1274.9434814453125, "pos_frac": 0.796875, "sample": [225.95761108398438, 94.52316284179688, 25.036766052246094, 890.6241455078125, 13.932022094726562, 645.310546875, 512.1825561523438, 152.630126953125, 49.01287841796875, 161.2695770263672, 101.434814453125, 622.056640625, 12.351821899414062, 410.96185302734375, 145.71334838867188, -457.11383056640625, 99.7037353515625, 481.9492492675781, 352.537353515625, 231.53465270996094, 456.5420227050781, 364.82427978515625, 591.4877319335938, 717.42333984375, 6.782398223876953, 141.06387329101562, 1274.9434814453125, 477.4204406738281, 464.84710693359375, 651.6192626953125, 700.3281860351562, 594.8864135742188, 202.53260803222656, 739.3531494140625, 17.634384155273438, -213.0423583984375, 197.73721313476562, 280.1136779785156, -141.83621215820312, 572.59521484375, 41.835655212402344, 1076.7607421875, 498.1895446777344, -226.7157745361328, 834.4661865234375, 378.0313720703125, 193.69149780273438, -243.57489013671875, -49.697105407714844, -90.32917785644531, 267.5419616699219, -52.059608459472656, -44.10710525512695, 430.6693420410156, 96.87921142578125, 306.8719482421875, -34.928924560546875, -316.927978515625, 1236.87255859375, -75.70591735839844, 650.3277587890625, 210.23165893554688, 297.42144775390625, -280.8088684082031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000445.npy"}
{"epoch": 0.6534508076358296, "step": 446, "batch_size": 64, "mean": 299.6471862792969, "std": 329.9698486328125, "min": -656.377197265625, "p10": -48.08669815063476, "median": 268.6734313964844, "p90": 751.6568359375001, "max": 1138.68603515625, "pos_frac": 0.84375, "sample": [-40.08373260498047, 298.499267578125, 8.684768676757812, 725.2003173828125, 817.5171508789062, 374.5561218261719, 19.118701934814453, -656.377197265625, 210.15855407714844, 670.255126953125, 864.3883666992188, 403.14794921875, -51.855979919433594, 727.8259887695312, 546.3873901367188, 609.1428833007812, 626.9532470703125, 352.3531188964844, 148.648193359375, 383.83123779296875, -51.51654052734375, 166.80477905273438, 150.15196228027344, 208.67396545410156, 235.03855895996094, -400.72955322265625, 110.05702209472656, 157.52731323242188, 514.1567993164062, 259.34918212890625, 617.75, -117.30714416503906, 21.382293701171875, 192.6558074951172, 23.773298263549805, 189.30438232421875, -14.62420654296875, 415.02783203125, 279.322265625, 871.9199829101562, 329.7474365234375, 194.8842010498047, 854.5258178710938, 1138.68603515625, 547.135986328125, 3.7266311645507812, 869.8240966796875, 489.6330871582031, 305.94293212890625, 273.04022216796875, 364.4823913574219, -1.8708648681640625, -77.8126220703125, 145.08181762695312, 27.47020721435547, 264.306640625, 156.26817321777344, 401.36846923828125, 761.8700561523438, 211.16030883789062, -242.16580200195312, 702.4114990234375, 311.42169189453125, 279.2107238769531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000446.npy"}
{"epoch": 0.6549192364170338, "step": 447, "batch_size": 64, "mean": 316.29620361328125, "std": 411.7406921386719, "min": -499.46978759765625, "p10": -125.51631469726561, "median": 273.6072540283203, "p90": 754.9641723632812, "max": 2202.04541015625, "pos_frac": 0.8125, "sample": [231.53077697753906, 43.90174102783203, 501.78143310546875, 229.32606506347656, 51.802398681640625, -499.46978759765625, 570.708251953125, 267.0506896972656, 318.59832763671875, -100.90475463867188, -13.405254364013672, 762.4378051757812, 270.2162170410156, 204.21144104003906, 749.591552734375, 596.9872436523438, 803.8534545898438, 50.60069274902344, 2202.04541015625, 812.66259765625, 183.22799682617188, 1184.8829345703125, 459.03033447265625, 394.5303955078125, -247.84547424316406, 276.998291015625, 559.38330078125, -151.01437377929688, 315.8758850097656, 169.1663055419922, -171.35214233398438, -69.95146179199219, -28.670433044433594, -120.84591674804688, 394.0146484375, 4.09132194519043, 278.9206848144531, 593.4496459960938, 91.593017578125, 32.485252380371094, 585.8558349609375, 82.41495513916016, 114.43794250488281, -422.4830322265625, 757.2667236328125, 521.3716430664062, 475.2343444824219, -243.38760375976562, 264.5259094238281, 28.7772159576416, 473.05718994140625, 409.6504211425781, 423.93988037109375, 354.452392578125, 153.79193115234375, 630.2357788085938, 352.44793701171875, 231.3146209716797, 1062.646240234375, 627.961669921875, 87.86876678466797, 652.672119140625, 544.9269409179688, -127.51791381835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000447.npy"}
{"epoch": 0.6563876651982379, "step": 448, "batch_size": 64, "mean": 293.324462890625, "std": 357.9877014160156, "min": -562.0764770507812, "p10": -108.18391876220701, "median": 241.89767456054688, "p90": 672.2722717285158, "max": 1416.95166015625, "pos_frac": 0.828125, "sample": [13.690055847167969, 44.528316497802734, -361.3143615722656, 162.27420043945312, 592.826904296875, 37.345947265625, 167.8839874267578, -73.59290313720703, 329.1842956542969, -562.0764770507812, 472.9425048828125, 97.30890655517578, 647.10546875, 110.64508056640625, 227.92755126953125, 553.912109375, 39.85045623779297, -5.50898551940918, 629.586669921875, -155.43331909179688, -116.17445373535156, 57.059810638427734, 743.1988525390625, 471.26708984375, 238.9874267578125, 343.1521911621094, 244.80792236328125, 177.5277099609375, 497.59521484375, 912.5927734375, 321.64129638671875, 644.40869140625, -34.914100646972656, 377.1611328125, 1416.95166015625, 39.92286682128906, 84.75880432128906, 217.55076599121094, 89.92777252197266, 309.26727294921875, 565.9121704101562, 618.8658447265625, 266.1852111816406, -212.158203125, 1157.49755859375, 355.3839111328125, 228.5294189453125, -89.53933715820312, 427.8221740722656, 129.52334594726562, 327.4794616699219, -231.10984802246094, 992.772216796875, -164.8243408203125, 622.1871337890625, 918.4805297851562, 56.232025146484375, 368.0982666015625, 131.19375610351562, 323.6097106933594, 595.63330078125, 683.0580444335938, 509.67864990234375, 186.4769287109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000448.npy"}
{"epoch": 0.657856093979442, "step": 449, "batch_size": 64, "mean": 300.69012451171875, "std": 455.12646484375, "min": -852.9403076171875, "p10": -203.86676025390622, "median": 253.3141632080078, "p90": 778.5385375976563, "max": 2321.828857421875, "pos_frac": 0.765625, "sample": [179.46446228027344, 23.930641174316406, 314.0873107910156, -19.442214965820312, 766.7900390625, 760.2388916015625, 205.1463623046875, 117.56499481201172, 288.51861572265625, 281.3219299316406, 895.8974609375, -185.5531768798828, -28.38037872314453, 226.93240356445312, 665.053955078125, 24.223888397216797, 484.56451416015625, 179.84982299804688, 111.99681091308594, -328.29620361328125, 783.5736083984375, 288.1158447265625, 565.5340576171875, 747.6331787109375, 334.3580322265625, 741.5178833007812, -329.0153503417969, 42.70726013183594, 425.808837890625, 406.7041320800781, 182.22833251953125, 217.99478149414062, 35.6986083984375, 611.3143310546875, 586.4736938476562, 743.040283203125, -155.48306274414062, 189.63377380371094, -158.353759765625, -211.71543884277344, 2321.828857421875, 654.6390380859375, -234.23109436035156, 335.60711669921875, 377.37994384765625, 166.52914428710938, -84.776611328125, -852.9403076171875, -44.494937896728516, 1097.480712890625, 279.6959228515625, 854.5094604492188, 815.23388671875, -270.1819763183594, 465.8714599609375, 182.30557250976562, 445.62762451171875, 1076.102294921875, 282.3240966796875, -117.03962707519531, -215.1695098876953, 52.271881103515625, 197.7077178955078, 446.2066345214844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000449.npy"}
{"epoch": 0.6593245227606461, "step": 450, "batch_size": 64, "mean": 329.4759216308594, "std": 330.1685485839844, "min": -767.2117919921875, "p10": 18.185406875610358, "median": 292.62298583984375, "p90": 756.4460266113282, "max": 1235.1019287109375, "pos_frac": 0.90625, "sample": [419.8829345703125, 605.6182861328125, 806.6822509765625, 359.7026062011719, 58.39544677734375, 650.0132446289062, 1235.1019287109375, -56.02781677246094, 381.0700378417969, 25.13697052001953, 657.768310546875, 527.0830688476562, 701.4298706054688, 374.0531921386719, 409.20745849609375, 744.602294921875, 79.65472412109375, 84.5478515625, 232.496337890625, 49.092716217041016, 388.08160400390625, 761.5219116210938, 557.3567504882812, 786.460693359375, 246.80101013183594, -60.744319915771484, 145.9295196533203, 906.814208984375, 392.7998046875, 685.2245483398438, 82.69023132324219, 384.12432861328125, 278.59417724609375, 217.47508239746094, 294.4222412109375, 15.206165313720703, 238.0869598388672, 399.0967102050781, 222.896484375, 126.19148254394531, 936.1651000976562, -234.37718200683594, 124.72002410888672, 465.9999084472656, 145.5897216796875, 344.3999938964844, 56.12792205810547, 665.3387451171875, 223.26348876953125, 40.8338737487793, 479.38507080078125, -68.15301513671875, 567.9931030273438, 348.88519287109375, 57.07721710205078, -767.2117919921875, 299.65478515625, 290.82373046875, 80.86017608642578, 207.9832305908203, 152.625, 1077.879150390625, -34.91254425048828, 210.96713256835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000450.npy"}
{"epoch": 0.6607929515418502, "step": 451, "batch_size": 64, "mean": 231.30206298828125, "std": 442.19921875, "min": -1319.343017578125, "p10": -205.6855697631836, "median": 258.77110290527344, "p90": 691.6066040039065, "max": 1197.018310546875, "pos_frac": 0.796875, "sample": [63.216094970703125, -169.52877807617188, 21.091064453125, 346.4849853515625, 536.1513061523438, 282.0144958496094, 421.7794189453125, 615.21240234375, -59.170509338378906, 301.8070068359375, 501.3899230957031, 1197.018310546875, 492.5938720703125, 851.6514892578125, 139.51727294921875, 202.18685913085938, 136.99850463867188, -1319.343017578125, -811.8064575195312, -209.600830078125, 137.59609985351562, 55.85071563720703, 439.1749572753906, 589.4068603515625, 709.6878662109375, 260.55438232421875, 473.83465576171875, 192.57826232910156, 313.0917663574219, 649.4169921875, 206.73907470703125, 1063.33935546875, 962.8075561523438, 402.2912902832031, 587.5255126953125, 598.76171875, 434.07489013671875, -389.5562744140625, 256.9878234863281, 300.75787353515625, -41.77986145019531, -166.13015747070312, 434.66082763671875, -1088.7032470703125, 160.88558959960938, 177.82720947265625, 179.8766326904297, 168.14810180664062, 284.9901428222656, 96.22440338134766, 631.4772338867188, -184.43894958496094, -202.53927612304688, 758.54052734375, 545.2534790039062, 99.17523193359375, 404.092529296875, -495.0771789550781, 198.7884521484375, 8.71271800994873, 797.7760009765625, -207.0339813232422, 312.3945617675781, 145.62680053710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000451.npy"}
{"epoch": 0.6622613803230544, "step": 452, "batch_size": 64, "mean": 286.1051330566406, "std": 304.45660400390625, "min": -365.5533752441406, "p10": -102.64582061767577, "median": 282.75360107421875, "p90": 698.2640869140627, "max": 970.2665405273438, "pos_frac": 0.828125, "sample": [880.7276611328125, -289.6528015136719, 538.4030151367188, 635.8822631835938, -108.40196990966797, 485.7069091796875, 444.8955078125, 550.3418579101562, 541.0925903320312, -89.21480560302734, 575.7698364257812, 293.7159423828125, 143.41693115234375, 117.62980651855469, 418.43194580078125, 313.0528259277344, 652.2457275390625, 271.791259765625, 472.47528076171875, 14.167219161987305, 375.182861328125, -12.389472961425781, 490.78192138671875, 100.49732208251953, 340.49993896484375, 710.7832641601562, 107.6455078125, -227.35183715820312, 587.597412109375, 217.58270263671875, -89.02474975585938, 418.258056640625, 79.79356384277344, 192.5260009765625, 731.9717407226562, 111.95272827148438, 237.35972595214844, 669.0526733398438, 87.17253875732422, 198.19927978515625, 307.18414306640625, -249.853271484375, 252.92697143554688, 743.188720703125, 236.2840576171875, 432.2947082519531, 133.55718994140625, 862.433349609375, 970.2665405273438, 79.38725280761719, 95.08100891113281, 367.57037353515625, 388.70318603515625, 9.21609115600586, -365.5533752441406, 300.80718994140625, -132.03668212890625, 480.6639709472656, 490.02685546875, 726.0626220703125, -156.56820678710938, 189.95443725585938, -65.35719299316406, 23.9211368560791], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000452.npy"}
{"epoch": 0.6637298091042585, "step": 453, "batch_size": 64, "mean": 271.54254150390625, "std": 367.0718994140625, "min": -337.310791015625, "p10": -111.85130386352539, "median": 188.31182861328125, "p90": 747.9303649902347, "max": 1661.365966796875, "pos_frac": 0.734375, "sample": [672.429443359375, 586.0264892578125, -102.73882293701172, 87.42620086669922, 190.5250244140625, 120.70979309082031, 255.32254028320312, 160.5919647216797, 247.56201171875, 30.88958740234375, 220.44882202148438, 582.3671264648438, 505.5205383300781, 598.98095703125, -7.269588470458984, 164.30787658691406, 311.36822509765625, 145.451416015625, -174.25392150878906, 147.44728088378906, 564.1107788085938, -189.44618225097656, 239.96719360351562, 388.22210693359375, 671.6011962890625, -267.90545654296875, -94.48149108886719, 780.2879028320312, -337.310791015625, 346.7410888671875, -36.48155212402344, 205.15817260742188, 542.407958984375, 858.9644165039062, 872.3421630859375, 4.637012481689453, -95.47715759277344, 151.61050415039062, 1661.365966796875, 819.5184326171875, 418.9884338378906, -101.0400619506836, 950.8959350585938, -18.749427795410156, 266.6431884765625, 104.11100769042969, 557.2699584960938, 186.0986328125, 380.41961669921875, 444.14495849609375, -199.45277404785156, 498.34271240234375, -115.75665283203125, 545.9672241210938, 45.94830322265625, -102.33881378173828, -186.5078582763672, 117.44161224365234, 1051.2239990234375, 465.67938232421875, -28.157642364501953, -28.36577606201172, 138.67823791503906, 158.29269409179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000453.npy"}
{"epoch": 0.6651982378854625, "step": 454, "batch_size": 64, "mean": 302.390625, "std": 394.3345642089844, "min": -847.0496826171875, "p10": -91.86246643066404, "median": 245.78482818603516, "p90": 757.0154052734376, "max": 1473.2222900390625, "pos_frac": 0.84375, "sample": [265.42218017578125, 26.697998046875, 1016.2410278320312, 284.613525390625, 543.5143432617188, 60.17353820800781, 713.702392578125, 359.93408203125, 19.14950942993164, 239.4524383544922, -631.01953125, 674.768310546875, 372.2051086425781, 331.637451171875, 205.86862182617188, 218.26507568359375, 456.86248779296875, 381.345458984375, -16.38829803466797, -847.0496826171875, 12.56169319152832, 526.1318359375, -73.39840698242188, 796.6625366210938, -327.6615295410156, -105.29421997070312, 775.578125, 97.1393814086914, 122.46638488769531, 223.21707153320312, 559.1273803710938, 135.17430114746094, 375.9483642578125, 129.20303344726562, 457.8924560546875, 130.49884033203125, 216.99755859375, 190.04751586914062, 366.80218505859375, 167.42356872558594, 519.4319458007812, 489.4370422363281, 62.45051574707031, 388.3713073730469, -7.3218994140625, 652.5374145507812, 1473.2222900390625, 398.84637451171875, -103.30391693115234, 175.56796264648438, 1439.7088623046875, 1111.9000244140625, 252.11721801757812, 498.58251953125, 78.56270599365234, 468.1145935058594, 678.3333129882812, -99.775634765625, 443.6896667480469, -102.79588317871094, 161.5426483154297, 87.45912170410156, 817.2952880859375, 17.113941192626953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000454.npy"}
{"epoch": 0.6666666666666666, "step": 455, "batch_size": 64, "mean": 361.5738220214844, "std": 421.0751037597656, "min": -753.1860961914062, "p10": -99.0847145080566, "median": 253.58267974853516, "p90": 1100.6028808593753, "max": 1371.8192138671875, "pos_frac": 0.875, "sample": [116.28813171386719, 164.2843017578125, 762.7968139648438, 1125.306884765625, -381.4869384765625, 345.9407653808594, 709.6527709960938, -115.70907592773438, 1188.430908203125, -200.15631103515625, 1129.004150390625, 249.60755920410156, 1042.960205078125, -269.1232604980469, 1154.3089599609375, -111.4267349243164, 222.37457275390625, 666.3836669921875, 271.1894836425781, 225.9605255126953, 83.66677856445312, 535.52197265625, 257.55780029296875, 11.143442153930664, 438.99658203125, 1160.50830078125, 754.0383911132812, -138.44287109375, 320.43597412109375, 495.673828125, 222.20721435546875, 517.265625, 232.560302734375, 165.43014526367188, 225.2461700439453, 204.18759155273438, 271.9771728515625, 331.8404846191406, 117.28793334960938, -753.1860961914062, 367.3900451660156, 54.656612396240234, 221.2272491455078, 589.2747802734375, 45.83685302734375, 50.255226135253906, 52.08349609375, 488.876708984375, 821.6832885742188, 229.56240844726562, 696.9804077148438, 111.91241455078125, 431.02728271484375, 516.0382080078125, 95.09174346923828, 109.52421569824219, 14.941211700439453, 833.475830078125, 474.43524169921875, 436.11871337890625, 1230.96875, 217.32705688476562, -70.28666687011719, 1371.8192138671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000455.npy"}
{"epoch": 0.6681350954478708, "step": 456, "batch_size": 64, "mean": 341.6557312011719, "std": 365.0517272949219, "min": -198.82437133789062, "p10": -19.246002197265618, "median": 244.1196746826172, "p90": 887.5202087402345, "max": 1454.42041015625, "pos_frac": 0.84375, "sample": [126.79962158203125, 165.9325408935547, 800.6246948242188, 76.03643798828125, -22.402099609375, 330.1128845214844, 59.75240707397461, -3.547515869140625, 230.29220581054688, 747.6513061523438, 457.82891845703125, -1.5327739715576172, 72.13710021972656, 1454.42041015625, 288.88165283203125, -116.08057403564453, -54.09471130371094, 234.96725463867188, 291.3562316894531, 398.8462219238281, 286.21435546875, 63.115928649902344, -69.95925903320312, 1079.7401123046875, -82.9898681640625, 429.0364990234375, 1403.744384765625, 374.7223205566406, 644.6802978515625, 30.336334228515625, 374.75958251953125, 510.7001647949219, 671.9236450195312, 179.6322021484375, 169.0574951171875, 239.33238220214844, 87.98857116699219, 451.2724609375, 1018.9888305664062, 97.27877807617188, 899.7079467773438, 479.027099609375, 305.43804931640625, 66.12810516357422, -198.82437133789062, 423.81396484375, 72.2354507446289, -69.63265991210938, 859.0821533203125, 54.955894470214844, 952.8629760742188, 1042.1580810546875, 349.8684997558594, -11.88177490234375, 201.76065063476562, 93.49699401855469, 340.5051574707031, 85.48165893554688, 434.1912841796875, 190.0562744140625, 221.35202026367188, 528.7176513671875, 799.0018310546875, 248.90696716308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000456.npy"}
{"epoch": 0.6696035242290749, "step": 457, "batch_size": 64, "mean": 334.63519287109375, "std": 350.2214050292969, "min": -340.7384033203125, "p10": -72.36526718139648, "median": 392.60040283203125, "p90": 777.6267883300782, "max": 1293.507568359375, "pos_frac": 0.828125, "sample": [-57.66661834716797, 467.8211975097656, 436.6343994140625, 383.32781982421875, 589.764892578125, 692.3865966796875, 796.7479858398438, 786.111328125, 97.39742279052734, 54.09228515625, 1069.420166015625, 402.818603515625, 20.392948150634766, 229.14344787597656, 81.16603088378906, 236.81736755371094, 525.8312377929688, 204.20985412597656, -48.849143981933594, 546.9783935546875, 757.8295288085938, -11.557109832763672, 178.06398010253906, 508.98846435546875, 432.8822021484375, 1036.9647216796875, -340.7384033203125, -281.3984375, 582.8621826171875, 621.0372314453125, 97.05656433105469, 585.6505126953125, 439.7930603027344, 540.138916015625, 319.47137451171875, -78.66468811035156, 650.4738159179688, 455.0255432128906, 85.29763793945312, 20.02911376953125, 415.8963623046875, 827.8037719726562, 511.9053955078125, 117.42137145996094, 4.684192657470703, 127.71882629394531, 562.3057861328125, -129.1368408203125, 478.33209228515625, 380.9913024902344, -291.9955749511719, 704.3899536132812, 266.56781005859375, 851.9669189453125, -311.35516357421875, 438.52239990234375, 75.06443786621094, -151.39736938476562, 401.87298583984375, -23.757946014404297, 214.8433837890625, 505.19183349609375, 31.55828094482422, 1293.507568359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000457.npy"}
{"epoch": 0.671071953010279, "step": 458, "batch_size": 64, "mean": 307.7383117675781, "std": 386.9480285644531, "min": -451.6098327636719, "p10": -136.56593170166013, "median": 282.4566955566406, "p90": 678.3927612304688, "max": 1849.35205078125, "pos_frac": 0.78125, "sample": [-451.6098327636719, 828.0601196289062, 477.1475830078125, 346.1850280761719, 634.5269775390625, -70.96836853027344, 1849.35205078125, -104.61820983886719, 287.44635009765625, 524.3157958984375, 154.40521240234375, 208.5213623046875, 470.35150146484375, 333.6457214355469, 731.1144409179688, -84.18804931640625, -173.3830108642578, -1.999155044555664, 211.18826293945312, 681.5423583984375, 458.9644470214844, 516.1729736328125, 89.11064910888672, 534.0446166992188, 353.67364501953125, 372.79571533203125, 348.254150390625, -78.13739013671875, 277.467041015625, 543.7692260742188, 263.66571044921875, 623.2470703125, 563.3646240234375, 855.59228515625, 78.64697265625, -20.8162841796875, 390.5968933105469, 73.69664001464844, 347.30731201171875, 400.4764709472656, 712.3175048828125, 380.1193542480469, -94.64998626708984, -329.3311462402344, 181.47430419921875, -220.796875, 84.0902099609375, -150.2578125, 148.2613525390625, 1615.2642822265625, 582.5164184570312, 137.75601196289062, 269.6117858886719, 91.03807067871094, 191.47854614257812, 671.043701171875, 333.7274169921875, 458.0475769042969, 249.01817321777344, 273.6297302246094, -179.19325256347656, 449.37005615234375, -237.9456787109375, 235.7332305908203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000458.npy"}
{"epoch": 0.6725403817914831, "step": 459, "batch_size": 64, "mean": 339.218505859375, "std": 434.5468444824219, "min": -698.048583984375, "p10": -203.12803649902344, "median": 273.3023376464844, "p90": 914.4524658203127, "max": 1513.4561767578125, "pos_frac": 0.8125, "sample": [70.02104187011719, 131.1697998046875, 376.80706787109375, 119.0673828125, 245.31878662109375, 424.7197570800781, -229.68426513671875, 114.29820251464844, 667.4583129882812, -358.3658752441406, 559.6982421875, 390.9974365234375, 466.38629150390625, 568.1162719726562, -19.244279861450195, 482.68487548828125, -91.23440551757812, 679.85107421875, 344.45172119140625, 931.5040283203125, 31.433319091796875, 971.5980834960938, 614.8897705078125, -121.37834167480469, 365.6293640136719, -324.5653991699219, 878.0576782226562, -207.65293884277344, 87.69221496582031, 10.518377304077148, 246.2117156982422, -37.723411560058594, 858.4422607421875, -698.048583984375, 1513.4561767578125, 314.45172119140625, 1273.6181640625, 568.1024169921875, 185.18589782714844, -279.1663818359375, 184.64584350585938, 229.49569702148438, 442.24346923828125, 211.62548828125, 80.79449462890625, 266.40374755859375, -409.68292236328125, 202.63088989257812, 466.8848571777344, 689.5173950195312, 813.257080078125, 95.42805480957031, 331.183837890625, 1288.5411376953125, 262.14508056640625, 115.12786102294922, 1050.380859375, -192.56993103027344, 872.489013671875, 756.590087890625, 930.0502319335938, 280.200927734375, 58.84866714477539, 558.9780883789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000459.npy"}
{"epoch": 0.6740088105726872, "step": 460, "batch_size": 64, "mean": 372.6073913574219, "std": 416.76416015625, "min": -420.4366455078125, "p10": -155.191731262207, "median": 317.98651123046875, "p90": 929.4992065429689, "max": 1667.1707763671875, "pos_frac": 0.828125, "sample": [860.12451171875, 723.0352172851562, 612.9554443359375, -195.81842041015625, 106.93952941894531, 996.8741455078125, 936.6767578125, 281.49359130859375, 141.72232055664062, 228.9670867919922, -156.94308471679688, 708.7259521484375, 154.98912048339844, 527.0406494140625, 165.9467010498047, 224.36614990234375, 324.2654113769531, -420.4366455078125, 158.01026916503906, 1401.334716796875, 1372.9744873046875, 210.0211181640625, 419.0596618652344, 60.42572021484375, 1034.7208251953125, 266.9972229003906, 1667.1707763671875, 369.3831787109375, -54.905296325683594, 25.977264404296875, 311.7076110839844, -171.30233764648438, 575.1805419921875, -127.43305969238281, 584.663330078125, 583.2471313476562, -151.10523986816406, 417.96484375, 257.9907531738281, -23.608116149902344, 912.7515869140625, 617.63525390625, 518.7216186523438, 443.0876770019531, 557.0026245117188, 154.24505615234375, 142.63865661621094, 84.08180236816406, 339.456787109375, 308.50323486328125, -208.2510223388672, 104.77535247802734, 162.77764892578125, 437.1747131347656, -234.7305450439453, 608.557373046875, 948.5999755859375, 486.23138427734375, -226.87545776367188, 821.3177490234375, 550.57470703125, 458.5971984863281, 5.131614685058594, 445.46942138671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000460.npy"}
{"epoch": 0.6754772393538914, "step": 461, "batch_size": 64, "mean": 295.65155029296875, "std": 433.9674072265625, "min": -567.2540283203125, "p10": -195.18454132080075, "median": 211.42481994628906, "p90": 805.4691650390625, "max": 1755.4818115234375, "pos_frac": 0.765625, "sample": [112.49282836914062, 13.262619018554688, 808.7501220703125, 393.4448547363281, -232.23272705078125, 552.93408203125, -567.2540283203125, 1432.0220947265625, 86.28131103515625, 637.5215454101562, 108.57930755615234, -99.73249053955078, 331.48016357421875, 677.8333129882812, 1056.03955078125, 458.2052001953125, 646.468017578125, -107.99554443359375, 207.43687438964844, 348.3853759765625, 326.7095947265625, 733.5007934570312, 797.8135986328125, 994.971923828125, -236.19361877441406, -59.8321533203125, 1257.5963134765625, 1755.4818115234375, 207.94381713867188, 377.646484375, 463.4516906738281, 183.0615692138672, 239.68621826171875, 20.816375732421875, 66.12255859375, 278.4775390625, -258.71392822265625, -13.49835205078125, -100.61795043945312, 30.347564697265625, 863.6055297851562, -83.87860870361328, 670.7362060546875, 685.0494384765625, -203.3241424560547, 328.5209655761719, 230.64659118652344, 620.8865356445312, 17.94940185546875, -176.192138671875, 627.932373046875, 106.1861801147461, 105.15231323242188, 114.0087890625, 464.0851135253906, -102.29126739501953, -343.5395202636719, 115.99923706054688, 381.55426025390625, 206.6651611328125, 162.39984130859375, -310.34832763671875, 296.29449462890625, 214.90582275390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000461.npy"}
{"epoch": 0.6769456681350955, "step": 462, "batch_size": 64, "mean": 244.05181884765625, "std": 434.33453369140625, "min": -526.9102172851562, "p10": -257.35097198486324, "median": 211.36275482177734, "p90": 702.4861877441409, "max": 1443.679931640625, "pos_frac": 0.703125, "sample": [424.4847412109375, 70.30606842041016, 2.3930740356445312, -234.4580841064453, 439.3924865722656, 573.2196655273438, 24.847795486450195, 516.573486328125, 636.7680053710938, 934.2268676757812, 494.785400390625, -214.82705688476562, -400.37188720703125, 73.55029296875, 159.76321411132812, 499.30584716796875, 550.54931640625, 123.10610961914062, -223.31427001953125, 730.651123046875, -265.0523376464844, 92.93594360351562, 466.3080139160156, -526.9102172851562, -190.1862030029297, 76.90896606445312, 589.3547973632812, -438.7569580078125, 14.042469024658203, -73.69239807128906, 1310.3414306640625, 483.2684631347656, -344.7411804199219, 252.5489044189453, 573.7361450195312, 1100.6824951171875, -190.82705688476562, -78.37784576416016, 1406.0439453125, 257.5201416015625, 190.08522033691406, -118.67027282714844, -239.38111877441406, 218.23492431640625, -327.5409240722656, -83.31947326660156, 368.3000793457031, 559.2536010742188, -304.9921569824219, 207.94393920898438, -66.92556762695312, 447.21484375, 1443.679931640625, 564.1978759765625, 406.608642578125, -142.55804443359375, 272.38690185546875, 489.3578796386719, 791.69384765625, 551.4061279296875, 96.82759094238281, 157.22927856445312, 214.7815704345703, 227.4031524658203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000462.npy"}
{"epoch": 0.6784140969162996, "step": 463, "batch_size": 64, "mean": 327.8647766113281, "std": 465.2637634277344, "min": -511.038330078125, "p10": -284.09768218994134, "median": 256.7659149169922, "p90": 920.2683532714847, "max": 1459.193359375, "pos_frac": 0.796875, "sample": [59.42826843261719, 569.0010986328125, 78.76329803466797, 249.92987060546875, -180.54290771484375, 391.7397155761719, -126.22779083251953, 85.20616912841797, 469.56866455078125, 47.99043273925781, 641.1515502929688, -312.6264343261719, 633.194580078125, 678.9461669921875, 184.87326049804688, 363.1101989746094, 354.8066711425781, 681.9392700195312, 948.6903686523438, 12.467174530029297, 26.445131301879883, 1459.193359375, 477.4169616699219, 219.57345581054688, 1422.6007080078125, 701.0803833007812, 143.33883666992188, 328.2466735839844, 21.499603271484375, 234.07864379882812, -331.1313781738281, -13.972877502441406, -468.2843933105469, 150.3179931640625, 800.7959594726562, -342.9718933105469, 6.7496795654296875, -141.6865234375, 451.9618225097656, -226.66831970214844, 602.6815185546875, 321.5815734863281, 738.8475952148438, 263.6019592285156, 838.7267456054688, 1117.64306640625, -83.71408081054688, 210.11785888671875, 1070.0438232421875, -308.71026611328125, 142.5609893798828, 1385.140625, 279.7693786621094, 853.9503173828125, -393.5792236328125, 58.29052734375, 431.3007507324219, 211.97293090820312, -511.038330078125, 415.1277770996094, 463.8326721191406, 550.9161376953125, 1392.75341796875, 181.53504943847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000463.npy"}
{"epoch": 0.6798825256975036, "step": 464, "batch_size": 64, "mean": 494.1038818359375, "std": 493.57781982421875, "min": -501.2527770996094, "p10": -79.62045669555663, "median": 500.16510009765625, "p90": 952.6771606445313, "max": 2146.0693359375, "pos_frac": 0.875, "sample": [-287.6328125, 956.8585205078125, 373.7027282714844, 234.45639038085938, 59.36944580078125, 850.3048095703125, 708.5238647460938, 172.5402374267578, 79.11721801757812, 448.06915283203125, 746.1209716796875, 621.9625244140625, -427.4883117675781, 742.8011474609375, 344.6239013671875, 578.9126586914062, 678.19091796875, 216.9393768310547, 547.7015991210938, 775.5624389648438, 815.32958984375, 487.89678955078125, 809.1543579101562, -88.02134704589844, 1056.56298828125, 305.8592834472656, 615.6477661132812, 512.4334106445312, -471.1640319824219, 275.42523193359375, 913.2730712890625, 236.5169677734375, 557.50927734375, 59.42636489868164, 382.2857360839844, 671.154541015625, 844.9274291992188, 322.3072509765625, 533.198974609375, 209.3603515625, 215.347412109375, -140.60369873046875, 942.920654296875, 686.73486328125, 131.8329620361328, 347.2237854003906, -100.67768096923828, 302.24578857421875, 16.196434020996094, 678.5670166015625, 559.1401977539062, 1572.4896240234375, 2146.0693359375, 1430.020263671875, 432.74163818359375, 1061.27587890625, 809.0944213867188, -60.01837921142578, 1889.8736572265625, 767.0230712890625, 522.1654052734375, -501.2527770996094, 361.7491455078125, 82.76969146728516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000464.npy"}
{"epoch": 0.6813509544787077, "step": 465, "batch_size": 64, "mean": 392.1781005859375, "std": 536.2352905273438, "min": -1505.8883056640625, "p10": -179.3786956787109, "median": 436.9705047607422, "p90": 911.7634582519532, "max": 1868.1697998046875, "pos_frac": 0.859375, "sample": [694.3238525390625, 1570.0972900390625, 579.8464965820312, 268.7207946777344, -305.8169250488281, 85.88259887695312, 592.646728515625, 347.0877380371094, 14.045234680175781, 23.181976318359375, 945.2554321289062, 272.199951171875, 329.7881164550781, 122.85894012451172, 605.462890625, 624.9469604492188, 456.56707763671875, 487.3229064941406, 233.92237854003906, 92.57347869873047, -867.9180297851562, 1020.7164916992188, 551.1112060546875, 201.74703979492188, 1868.1697998046875, -134.54396057128906, 582.6139526367188, 145.47625732421875, 428.92095947265625, 487.47882080078125, 755.5616455078125, 484.75994873046875, -201.39962768554688, 109.24594116210938, 268.77703857421875, 576.5736083984375, 23.841957092285156, 316.22808837890625, -405.1749572753906, 797.395263671875, 957.6257934570312, 256.5840759277344, 598.970703125, -974.1533813476562, 752.6685791015625, 651.136962890625, 185.26734924316406, 865.7178344726562, 112.96424865722656, 894.5665893554688, 851.089599609375, -198.5935821533203, 1191.466064453125, -1505.8883056640625, 864.781494140625, 879.6111450195312, -75.85951232910156, 431.424072265625, 705.471923828125, 442.5169372558594, 60.33000183105469, 387.2735595703125, 919.133544921875, 794.7999267578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000465.npy"}
{"epoch": 0.6828193832599119, "step": 466, "batch_size": 64, "mean": 346.8487854003906, "std": 514.1136474609375, "min": -1233.105712890625, "p10": -363.1240783691406, "median": 309.0800323486328, "p90": 979.65908203125, "max": 1488.062255859375, "pos_frac": 0.78125, "sample": [651.9598999023438, -377.31890869140625, 133.8702392578125, -427.34271240234375, 875.0018920898438, 307.71484375, 613.0594482421875, 160.3328094482422, 768.7825317382812, 154.1973114013672, 213.96063232421875, 1106.16259765625, -462.1494140625, 736.8634033203125, -42.76479721069336, 150.17169189453125, 387.5439453125, 7.137348175048828, 450.3047790527344, 158.54461669921875, 584.90625, 185.63653564453125, 569.0662841796875, 408.08123779296875, 892.2335815429688, 167.20468139648438, 63.470802307128906, 827.12060546875, 209.3889617919922, 530.7079467773438, 385.429931640625, 161.87930297851562, 931.2020263671875, 1488.062255859375, -1233.105712890625, 971.6422729492188, 457.8091735839844, 742.153564453125, 1213.726806640625, 455.826416015625, 974.337646484375, 4.137088775634766, 150.351806640625, 172.98117065429688, 626.912841796875, 1202.2767333984375, -240.06954956054688, -446.4403991699219, -58.364295959472656, 10.818727493286133, 1277.0216064453125, 324.0521240234375, 981.939697265625, -34.773841857910156, 943.974853515625, 471.5971984863281, -468.2232971191406, -23.734039306640625, 1038.9666748046875, -330.0028076171875, 310.4452209472656, -108.99108123779297, -419.5088806152344, 260.1427307128906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000466.npy"}
{"epoch": 0.684287812041116, "step": 467, "batch_size": 64, "mean": 373.8319091796875, "std": 721.4420166015625, "min": -1501.4473876953125, "p10": -500.08205871582027, "median": 399.5828399658203, "p90": 1261.3607910156252, "max": 2082.515625, "pos_frac": 0.765625, "sample": [843.7258911132812, 1985.4373779296875, 752.6032104492188, 1429.9283447265625, -736.095703125, -1501.4473876953125, 637.99853515625, 411.0616149902344, 485.1272277832031, 388.10406494140625, 274.3965759277344, 439.99249267578125, 1874.977294921875, 180.35182189941406, 480.5322265625, 509.94073486328125, 88.08667755126953, 1328.9608154296875, 1722.6412353515625, 535.5926513671875, 431.8963623046875, -45.65419387817383, 14.893009185791016, 791.0770263671875, -369.5579833984375, -481.1653747558594, -696.96337890625, 543.7242431640625, 1223.494140625, 431.14337158203125, 1109.2000732421875, 159.03868103027344, 436.1414794921875, -517.3489990234375, 1074.5596923828125, 24.616363525390625, 1163.3699951171875, 251.10894775390625, -140.3892822265625, 980.5590209960938, 276.4285888671875, -1201.1202392578125, 205.75738525390625, -274.556884765625, 71.42262268066406, 460.73394775390625, 114.592041015625, -366.3526611328125, -325.03485107421875, 746.9888916015625, 928.9031372070312, 633.81103515625, 1277.58935546875, 331.7690124511719, 906.755126953125, -5.144355773925781, 683.7332153320312, 76.57919311523438, -508.189208984375, 2082.515625, -1145.4056396484375, 4.755458831787109, 134.51934814453125, 298.53076171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000467.npy"}
{"epoch": 0.6857562408223201, "step": 468, "batch_size": 64, "mean": 470.14892578125, "std": 619.7252197265625, "min": -1108.3807373046875, "p10": -77.7124244689941, "median": 417.6086120605469, "p90": 1467.3194458007815, "max": 2551.257568359375, "pos_frac": 0.859375, "sample": [463.2832336425781, -3.2218379974365234, 1541.79541015625, 513.6487426757812, 1579.788818359375, 1058.30419921875, 478.7313232421875, 660.5029296875, 451.7273864746094, 584.6552734375, 407.89056396484375, 745.3258056640625, 142.81353759765625, 474.5649719238281, 557.55712890625, 116.0162353515625, 245.9919891357422, -170.78492736816406, 422.11590576171875, 265.6196594238281, 192.68414306640625, 7.862102508544922, 68.11046600341797, 370.7762756347656, 405.9285888671875, 1505.5894775390625, 440.34027099609375, 1595.904541015625, 1005.754638671875, 115.45150756835938, -399.7792663574219, 22.979202270507812, 671.0255737304688, 250.66314697265625, 508.3697204589844, 686.3294677734375, 128.592041015625, 184.91119384765625, 706.40478515625, -1108.3807373046875, 2045.1134033203125, 81.1756362915039, 597.3137817382812, 533.3430786132812, 413.101318359375, 60.855133056640625, -335.343994140625, 434.0841979980469, 7.3170013427734375, 1378.022705078125, 398.1289978027344, 2075.13916015625, 296.3839416503906, 722.8330078125, -42.47863006591797, 435.181884765625, 428.6307067871094, 2551.257568359375, 615.9054565429688, -92.8126220703125, 256.6136474609375, -423.92718505859375, 0.2263946533203125, -242.3768310546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000468.npy"}
{"epoch": 0.6872246696035242, "step": 469, "batch_size": 64, "mean": 470.13177490234375, "std": 554.444091796875, "min": -798.9064331054688, "p10": -195.25982055664056, "median": 502.8377227783203, "p90": 1252.3284423828125, "max": 2057.90380859375, "pos_frac": 0.828125, "sample": [1145.2135009765625, 652.6915283203125, 103.99004364013672, 609.5903930664062, 2057.90380859375, 514.71728515625, 1305.046630859375, 137.517333984375, -78.35154724121094, 60.28844451904297, 620.69873046875, 142.7959442138672, -546.0553588867188, 106.02798461914062, 1175.347412109375, 16.83000946044922, 530.0091552734375, 528.15185546875, 528.990478515625, 578.594970703125, 1258.48291015625, 207.43118286132812, 141.83750915527344, 202.31602478027344, 1258.791259765625, -0.488037109375, 897.3375854492188, 535.2611083984375, 1334.006591796875, -315.87139892578125, 836.1175537109375, -250.73622131347656, 878.1368408203125, 242.11683654785156, 706.0707397460938, 272.86151123046875, -129.16946411132812, -223.58425903320312, 5.686370849609375, -481.56170654296875, 620.8461303710938, 918.6780395507812, 1264.3922119140625, 44.24958419799805, -343.177490234375, 184.063232421875, 273.8867492675781, 1203.213134765625, 418.6435546875, 127.92847442626953, 1166.477294921875, 871.3167114257812, 490.9581604003906, 1237.968017578125, 1352.2689208984375, 700.9700927734375, 595.7596435546875, 374.27142333984375, 47.54478454589844, -80.5090103149414, 174.71725463867188, 851.6189575195312, 826.2129516601562, -798.9064331054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000469.npy"}
{"epoch": 0.6886930983847284, "step": 470, "batch_size": 64, "mean": 408.45806884765625, "std": 553.426025390625, "min": -1272.4635009765625, "p10": -298.4161743164062, "median": 435.3231201171875, "p90": 1066.2550537109375, "max": 1801.3975830078125, "pos_frac": 0.75, "sample": [370.7542724609375, 495.70989990234375, -34.78985595703125, 120.69330596923828, 226.69924926757812, -103.15276336669922, 344.46368408203125, 110.93169403076172, -24.358184814453125, -635.13623046875, 611.1150512695312, 931.0735473632812, 547.23486328125, 378.2007751464844, 685.2371826171875, 1074.8580322265625, -354.56005859375, 674.0298461914062, -1272.4635009765625, 709.5845336914062, -102.25204467773438, 776.05615234375, 229.048828125, -334.6494140625, 343.66888427734375, 221.03262329101562, -246.33682250976562, 421.9682922363281, 576.7545776367188, 448.6779479980469, 701.3938598632812, -297.51434326171875, 1553.758544921875, 89.80049133300781, -29.64740562438965, 679.1142578125, 540.3336791992188, 569.2911376953125, 1801.3975830078125, 707.6964721679688, 1394.857177734375, 102.08377075195312, 819.8521728515625, 1062.247314453125, -100.49037170410156, 1561.4510498046875, 526.0166625976562, 612.0135498046875, 678.31591796875, 1030.5257568359375, 333.1024169921875, -329.1044921875, -355.153076171875, 220.45999145507812, 543.400634765625, 822.337158203125, -298.80267333984375, 1302.8739013671875, 798.9498291015625, -137.6981964111328, 1067.97265625, 103.60140991210938, 382.4131164550781, 494.3733825683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000470.npy"}
{"epoch": 0.6901615271659325, "step": 471, "batch_size": 64, "mean": 336.6220397949219, "std": 558.6133422851562, "min": -922.4022216796875, "p10": -262.07717742919914, "median": 279.27015686035156, "p90": 904.0467163085938, "max": 1948.3660888671875, "pos_frac": 0.703125, "sample": [1344.1331787109375, -22.841766357421875, -44.877685546875, 702.592529296875, 337.294189453125, 747.8413696289062, 680.2832641601562, 1948.3660888671875, 909.2176513671875, 277.64697265625, -71.75965881347656, 297.46197509765625, -53.77912902832031, 38.446197509765625, 379.0671691894531, -116.51348876953125, -184.79718017578125, -188.32310485839844, 158.90521240234375, -41.58249282836914, 1816.07666015625, -491.9879150390625, 321.9813537597656, 338.9059753417969, 815.4768676757812, -27.59845542907715, -383.6924743652344, -70.97970581054688, -452.0910339355469, 527.4105834960938, -290.2594299316406, 1148.1923828125, 841.0169067382812, 880.9595336914062, 502.4894714355469, 686.5735473632812, 859.82080078125, 147.30677795410156, 183.8551483154297, 583.8563842773438, 145.91006469726562, 280.8933410644531, 85.81572723388672, 36.7325553894043, 786.7935791015625, 1909.5550537109375, 13.28509521484375, -922.4022216796875, 891.981201171875, 32.2694091796875, -373.6240234375, 121.39142608642578, 38.87824249267578, 285.29876708984375, 74.80279541015625, 654.3711547851562, -12.862386703491211, -196.31858825683594, 484.9122314453125, 345.1819152832031, 708.5064086914062, 945.19189453125, -377.9378356933594, 551.0886840820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000471.npy"}
{"epoch": 0.6916299559471366, "step": 472, "batch_size": 64, "mean": 391.5537109375, "std": 652.6005859375, "min": -1355.848876953125, "p10": -174.1043716430664, "median": 250.97806549072266, "p90": 1272.8382568359377, "max": 2388.244873046875, "pos_frac": 0.796875, "sample": [-1355.848876953125, -457.52142333984375, 765.871337890625, -509.6831359863281, 307.76519775390625, 144.85177612304688, 619.7042236328125, -163.57229614257812, -96.25164794921875, -803.0084838867188, 290.1247863769531, 10.769290924072266, 136.57701110839844, 378.8955383300781, 55.250518798828125, 905.4273681640625, -20.268783569335938, 505.86700439453125, 268.49420166015625, 28.6988525390625, 1182.662353515625, 93.84336853027344, 25.727294921875, 11.85516357421875, 219.42242431640625, 449.07684326171875, 353.8373718261719, -244.14974975585938, 220.99468994140625, 888.0685424804688, 34.729766845703125, 980.61962890625, 259.7099914550781, 1070.998046875, 2292.797607421875, 1606.292236328125, 106.8600845336914, 1302.7342529296875, 313.60711669921875, 584.680908203125, 397.42657470703125, 1018.61865234375, 98.32646179199219, 242.2461395263672, -148.6571807861328, 1416.379638671875, 1613.9951171875, 168.0769805908203, 1395.812255859375, 18.149307250976562, 2388.244873046875, -72.44419860839844, -178.6181182861328, -86.5188980102539, 1203.0809326171875, 823.0245971679688, 613.9229125976562, 195.14010620117188, 379.6504821777344, -310.30645751953125, 286.0211486816406, 429.7192687988281, 236.78366088867188, 164.85472106933594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000472.npy"}
{"epoch": 0.6930983847283406, "step": 473, "batch_size": 64, "mean": 185.667236328125, "std": 796.5276489257812, "min": -2017.946044921875, "p10": -515.6864440917968, "median": 195.5361785888672, "p90": 976.7841918945314, "max": 3566.90380859375, "pos_frac": 0.671875, "sample": [-308.5307312011719, 933.4876708984375, 314.497802734375, -231.26124572753906, -531.7155151367188, 1379.3331298828125, -135.74029541015625, 161.207275390625, 245.87611389160156, 367.30902099609375, 1237.0316162109375, 650.2155151367188, -115.54731750488281, 77.23223876953125, -478.2852783203125, -448.494140625, 75.18380737304688, -31.736557006835938, 197.6034393310547, 1290.3323974609375, -340.64697265625, -35.09639358520508, 697.9978637695312, 343.0135192871094, 123.48605346679688, 138.24462890625, 259.5750732421875, 220.49876403808594, 204.46267700195312, 101.55923461914062, -2017.946044921875, 76.22782135009766, -630.6790771484375, 101.48841857910156, -1592.211181640625, -1692.64697265625, 3566.90380859375, 244.59661865234375, 501.83416748046875, 1236.144287109375, 274.0069580078125, 235.52194213867188, 769.3355102539062, 776.638671875, 193.4689178466797, 713.1109008789062, -533.1578369140625, -406.7528991699219, 219.9453887939453, 918.9471435546875, 597.9784545898438, 995.33984375, -394.94476318359375, -242.30780029296875, 101.67288208007812, -128.64675903320312, 830.8855590820312, -1400.631591796875, 480.8689270019531, 211.48483276367188, -349.90533447265625, 363.06756591796875, 173.53167724609375, 1328.4400634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000473.npy"}
{"epoch": 0.6945668135095447, "step": 474, "batch_size": 64, "mean": 338.5450439453125, "std": 612.193115234375, "min": -1601.80126953125, "p10": -278.9833862304687, "median": 344.6103820800781, "p90": 961.6655639648437, "max": 2141.251953125, "pos_frac": 0.765625, "sample": [-1138.5833740234375, 243.78939819335938, 191.91607666015625, 963.1300048828125, 878.46484375, -568.2671508789062, 958.24853515625, 140.0838623046875, 326.5431823730469, 534.7145385742188, 1158.8531494140625, 2141.251953125, 29.04725456237793, 356.9814758300781, 27.489463806152344, 802.589111328125, 343.4491882324219, -12.602561950683594, -422.802978515625, 435.3246765136719, 90.53248596191406, 301.4950866699219, -205.47296142578125, 174.34866333007812, 345.7715759277344, 681.6767578125, 765.074462890625, 220.34417724609375, 556.0707397460938, 436.63800048828125, 670.2105102539062, -67.82501220703125, 63.06035614013672, 352.05206298828125, 543.2031860351562, 280.8826904296875, 640.8006591796875, 103.93084716796875, -58.84251403808594, -309.36065673828125, 858.07080078125, -78.20394897460938, 655.2584228515625, 557.3447875976562, 162.89328002929688, 307.2916564941406, 1395.5072021484375, 1113.8773193359375, -105.22557067871094, -576.2406005859375, -208.10308837890625, 865.2535400390625, 437.15087890625, 596.7110595703125, 599.554931640625, 151.18141174316406, 413.64361572265625, -923.3259887695312, 781.3399658203125, -141.93450927734375, 350.85357666015625, 1346.9654541015625, -1601.80126953125, 1734.607421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000474.npy"}
{"epoch": 0.6960352422907489, "step": 475, "batch_size": 64, "mean": 427.5458679199219, "std": 451.60137939453125, "min": -361.1188049316406, "p10": -160.9650421142578, "median": 417.64085388183594, "p90": 1058.1525390625004, "max": 1521.1192626953125, "pos_frac": 0.78125, "sample": [96.45947265625, -91.48158264160156, 467.4467468261719, 197.46144104003906, -298.30145263671875, 583.2603759765625, 1473.658203125, 426.4094543457031, 1217.496826171875, 1105.4896240234375, 313.72528076171875, 239.00808715820312, 408.87225341796875, -6.692169189453125, 647.2719116210938, -361.1188049316406, 651.4945678710938, -0.6666088104248047, 841.1904907226562, -166.11029052734375, 846.9813232421875, 751.87548828125, 773.4998779296875, 238.65252685546875, 109.28128051757812, 326.42767333984375, 440.7686767578125, 437.1042785644531, 37.39265060424805, 501.3604736328125, 286.459228515625, 430.9451599121094, -121.005615234375, 357.0018615722656, 635.9414672851562, 924.896484375, 439.97900390625, 728.8132934570312, -161.96205139160156, 1253.6783447265625, 439.7892150878906, -19.74695587158203, 257.10205078125, -230.4897918701172, 860.3543090820312, 106.93928527832031, 585.1971435546875, -158.63868713378906, 300.85382080078125, 816.5304565429688, -119.15353393554688, 947.6993408203125, 618.4683837890625, 12.578094482421875, 1521.1192626953125, -210.6957244873047, 278.1175537109375, 1319.66455078125, 696.6808471679688, 1112.3897705078125, 334.4344177246094, 760.2216186523438, -228.7399444580078, 379.29541015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000475.npy"}
{"epoch": 0.697503671071953, "step": 476, "batch_size": 64, "mean": 329.54150390625, "std": 447.2165832519531, "min": -558.1226806640625, "p10": -159.27927093505858, "median": 274.6832580566406, "p90": 948.7928039550789, "max": 2044.6177978515625, "pos_frac": 0.796875, "sample": [673.259521484375, 108.53400421142578, -131.3092041015625, 628.4274291992188, 274.79547119140625, 560.1385498046875, 1186.2918701171875, 139.11843872070312, 164.23684692382812, -161.182373046875, 1069.7520751953125, 132.23223876953125, 140.2002410888672, 448.83465576171875, 770.6384887695312, 647.763671875, 711.0404052734375, 149.30654907226562, 260.8600769042969, 141.29049682617188, 527.5932006835938, 318.7723388671875, -28.477005004882812, 1212.3131103515625, 560.7944946289062, 581.760986328125, -508.32489013671875, -250.3336181640625, 425.77001953125, 80.7522201538086, 9.269935607910156, 144.239013671875, -261.5999755859375, -259.8625793457031, 534.4515991210938, -105.28013610839844, -125.5108871459961, -324.14666748046875, 636.0852661132812, 191.1982421875, 234.08367919921875, 538.0908203125, 287.05206298828125, -154.8386993408203, 141.5111541748047, 274.571044921875, 751.2234497070312, 1060.499755859375, 2044.6177978515625, 1025.1446533203125, 373.6121826171875, -31.667953491210938, 283.8609313964844, 265.70782470703125, 154.94090270996094, 105.40919494628906, 389.7276916503906, 435.79473876953125, 102.29032897949219, 332.0660705566406, 276.781005859375, 1072.529541015625, 412.0775146484375, -558.1226806640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000476.npy"}
{"epoch": 0.6989720998531571, "step": 477, "batch_size": 64, "mean": 353.25738525390625, "std": 586.5377197265625, "min": -1667.0718994140625, "p10": -276.34150085449215, "median": 293.6494903564453, "p90": 1029.646990966797, "max": 2142.24072265625, "pos_frac": 0.734375, "sample": [-462.71160888671875, 465.40203857421875, 994.1152954101562, -478.65460205078125, 1544.0123291015625, 253.85206604003906, 312.9518737792969, 838.8543701171875, 815.0255126953125, 999.5022583007812, 53.505348205566406, 87.24320220947266, 235.6717987060547, 821.0948486328125, 1042.566162109375, -133.2743377685547, 750.630859375, 479.7615661621094, 1191.3004150390625, -288.44073486328125, 557.208251953125, 168.12567138671875, -10.636444091796875, 230.76683044433594, -248.10995483398438, 713.7544555664062, 379.1246643066406, 117.12588500976562, -3.906766891479492, -2.734678268432617, -568.2581176757812, 489.2379455566406, -1667.0718994140625, -601.9730224609375, 1398.796142578125, 268.7789001464844, 280.4029846191406, 195.56192016601562, 537.07080078125, 85.86294555664062, 643.7276611328125, -86.29156494140625, 644.23828125, 1105.603515625, 257.6808776855469, 396.6878967285156, 309.4432067871094, 878.7676391601562, 491.6991271972656, 182.643310546875, 1155.602783203125, -245.57815551757812, -55.463035583496094, 306.89599609375, -5.195648193359375, 959.384033203125, 739.1097412109375, -149.69473266601562, 58.799407958984375, -361.59881591796875, 57.034446716308594, 700.34814453125, 2142.24072265625, 640.8515014648438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000477.npy"}
{"epoch": 0.7004405286343612, "step": 478, "batch_size": 64, "mean": 392.1885986328125, "std": 499.2119445800781, "min": -735.4822387695312, "p10": -201.9390151977539, "median": 351.7679748535156, "p90": 1158.8174682617191, "max": 1667.6767578125, "pos_frac": 0.8125, "sample": [-212.6004180908203, 135.51014709472656, 686.8975219726562, 192.48651123046875, 18.068748474121094, 450.92291259765625, 309.59478759765625, -355.9720458984375, 434.4436950683594, 23.224609375, 363.37420654296875, 533.3279418945312, -570.137451171875, -336.1380615234375, 282.9462585449219, 891.9248657226562, 805.3946533203125, 1667.6767578125, 400.25091552734375, 149.44207763671875, -272.7852783203125, -177.06240844726562, -383.8892822265625, 271.7707824707031, 391.8524169921875, 980.36572265625, 419.69586181640625, 494.6114196777344, -41.245758056640625, 198.9606475830078, 1477.9427490234375, 1053.374755859375, 340.1617431640625, 254.77337646484375, 434.1005859375, 324.9477233886719, 1303.4791259765625, 1386.269775390625, 621.010498046875, -153.24607849121094, 598.791015625, -61.29094696044922, 120.79983520507812, -101.99678802490234, 636.072021484375, 484.7660827636719, 156.84129333496094, 1204.0072021484375, 248.88255310058594, 420.23748779296875, 559.47705078125, 31.616661071777344, 321.11376953125, 755.75634765625, 310.00177001953125, 282.1036682128906, 1410.7286376953125, 669.9556274414062, 1298.671630859375, -735.4822387695312, 590.683837890625, 406.2939147949219, 690.5015258789062, 5.811187744140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000478.npy"}
{"epoch": 0.7019089574155654, "step": 479, "batch_size": 64, "mean": 376.192626953125, "std": 630.46484375, "min": -1506.421875, "p10": -290.602847290039, "median": 328.459716796875, "p90": 1109.4802001953126, "max": 2945.06787109375, "pos_frac": 0.71875, "sample": [18.938087463378906, 91.62776947021484, 1171.7322998046875, 626.7474365234375, 982.1981811523438, 1179.951904296875, -404.85455322265625, -240.40414428710938, 339.43212890625, 10.185819625854492, 822.0228271484375, -171.18185424804688, -191.16494750976562, 317.4873046875, 794.8807373046875, 257.14202880859375, 220.65235900878906, -94.50225830078125, 1035.089111328125, 493.6830139160156, 210.23892211914062, 544.8826904296875, -100.09492492675781, 432.3681640625, 2945.06787109375, 1196.7249755859375, 648.7008056640625, 757.0796508789062, 762.9605102539062, 429.6864013671875, 1127.6202392578125, 259.0703430175781, 232.94970703125, 836.9239501953125, -588.3688354492188, 1639.05517578125, -34.228973388671875, 482.49090576171875, 495.9902038574219, 138.80296325683594, 400.3768005371094, 1067.1534423828125, 742.4440307617188, 434.1432189941406, -69.16148376464844, -1506.421875, 408.91986083984375, -321.6585693359375, -312.1165771484375, -80.69287109375, 144.60317993164062, -9.110450744628906, -411.951171875, -527.136474609375, 165.22389221191406, -28.497764587402344, 551.5604248046875, -1.54864501953125, 142.98583984375, 1382.0855712890625, 383.011962890625, 1033.5657958984375, 158.1243133544922, 652.841552734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000479.npy"}
{"epoch": 0.7033773861967695, "step": 480, "batch_size": 64, "mean": 296.2763366699219, "std": 574.5997924804688, "min": -830.8179321289062, "p10": -433.01636657714835, "median": 286.8660888671875, "p90": 998.1537292480468, "max": 2306.4892578125, "pos_frac": 0.671875, "sample": [-461.5265197753906, 397.28021240234375, -121.67379760742188, 93.34979248046875, 369.5934143066406, 155.15652465820312, 298.32501220703125, 949.9603881835938, 608.5711669921875, 852.6743774414062, 414.5863037109375, 1318.14208984375, 448.3233642578125, -319.985107421875, -705.7297973632812, -598.6629638671875, 642.3060302734375, 770.1565551757812, -162.24728393554688, 350.241455078125, 86.81929779052734, -158.6689910888672, 22.008377075195312, 408.58123779296875, 506.1515808105469, 177.35191345214844, 2306.4892578125, 275.40716552734375, 582.755126953125, -157.11134338378906, 842.697998046875, 996.5553588867188, 998.8387451171875, -152.18846130371094, -561.147705078125, 263.3061828613281, 538.0297241210938, -222.60350036621094, 477.31903076171875, -830.8179321289062, -682.317626953125, -643.510009765625, 675.285400390625, 1074.869140625, 1461.0693359375, -145.11215209960938, 377.8473205566406, 632.1774291992188, -25.96657943725586, 459.2016906738281, -9.669357299804688, -366.49267578125, 173.98760986328125, 166.36685180664062, 176.62957763671875, -27.177654266357422, 120.96562957763672, -53.23819351196289, 604.4168090820312, 1021.1405029296875, 517.8120727539062, -191.8328094482422, 1066.24658203125, 880.3718872070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000480.npy"}
{"epoch": 0.7048458149779736, "step": 481, "batch_size": 64, "mean": 462.3815002441406, "std": 551.6420288085938, "min": -693.0579833984375, "p10": -155.968147277832, "median": 392.55328369140625, "p90": 1024.429168701172, "max": 1961.847900390625, "pos_frac": 0.765625, "sample": [1961.847900390625, -693.0579833984375, 587.0864868164062, 418.50225830078125, -218.67332458496094, -19.087448120117188, -106.84967041015625, 332.9029235839844, 518.2355346679688, 1572.5869140625, 119.86394500732422, -214.42581176757812, 128.8858184814453, -268.5623779296875, 454.87127685546875, 954.1239624023438, -109.46371459960938, 684.6056518554688, 1471.1146240234375, -55.30223083496094, -16.84625244140625, 850.7772827148438, 332.5658874511719, 792.0237426757812, 420.7169189453125, 198.34381103515625, 196.712158203125, 970.8860473632812, 706.0631103515625, -342.8678894042969, 67.13616943359375, 806.3698120117188, 366.60430908203125, -136.1888427734375, 749.5151977539062, 772.23583984375, -260.9373779296875, 21.849102020263672, 941.3790283203125, 534.7716064453125, -134.94427490234375, 143.50733947753906, 926.0338134765625, 920.4553833007812, 31.036231994628906, 188.19146728515625, 1791.3114013671875, 324.7619934082031, 1611.0081787109375, 1047.376220703125, 242.9197998046875, -7.6112518310546875, 453.7339782714844, 570.8468627929688, 543.0867919921875, 363.74560546875, 153.13015747070312, 880.1435546875, 789.7726440429688, 730.6771240234375, 1590.0296630859375, -164.4449920654297, 363.27020263671875, 744.0648193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000481.npy"}
{"epoch": 0.7063142437591777, "step": 482, "batch_size": 64, "mean": 213.993896484375, "std": 620.3377075195312, "min": -2125.4208984375, "p10": -430.1016662597656, "median": 272.96434020996094, "p90": 910.6365783691411, "max": 1801.79931640625, "pos_frac": 0.6875, "sample": [174.77407836914062, -45.14799499511719, 379.1991882324219, -872.921875, 449.65032958984375, 572.3421630859375, -1172.399658203125, 615.4345092773438, 659.7581787109375, 361.14154052734375, 339.8738098144531, 683.76806640625, -11.805221557617188, 360.84912109375, 712.6784057617188, 1152.640380859375, 229.16891479492188, -311.96868896484375, 340.2363586425781, 396.250244140625, 17.559703826904297, 371.8133239746094, 360.5838623046875, 964.5796508789062, -868.6409301757812, 356.1563720703125, 133.72491455078125, 962.0665283203125, 316.759765625, 463.7989196777344, 699.4962768554688, 653.7081298828125, 790.6333618164062, 208.64321899414062, -364.8540954589844, 154.22756958007812, 964.308349609375, -458.0649108886719, 1368.2979736328125, -343.662109375, 697.8472290039062, -17.013418197631836, -183.9607696533203, 50.76258087158203, 212.06614685058594, 77.39260864257812, -2125.4208984375, 342.8441162109375, 1801.79931640625, 43.20677947998047, -7.616363525390625, -83.86451721191406, -1.82354736328125, 465.73760986328125, -998.8471069335938, 367.8324279785156, 138.61929321289062, 1335.1302490234375, -193.3497314453125, 126.66654968261719, -200.54263305664062, -51.882606506347656, 725.6380615234375, -590.2698364257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000482.npy"}
{"epoch": 0.7077826725403817, "step": 483, "batch_size": 64, "mean": 478.30712890625, "std": 598.7930297851562, "min": -491.6479797363281, "p10": -235.89148864746087, "median": 399.04241943359375, "p90": 1227.114831542969, "max": 3140.325927734375, "pos_frac": 0.78125, "sample": [-148.1928253173828, 96.61801147460938, 1370.0767822265625, 755.6799926757812, -339.58453369140625, 758.7431030273438, 398.21405029296875, 327.15386962890625, 657.1382446289062, 662.6397094726562, 399.87078857421875, 795.7860717773438, 463.1856689453125, 91.67616271972656, 409.8036804199219, 248.9238739013672, 944.4617309570312, 1025.4051513671875, -379.48333740234375, 550.1942138671875, 343.54949951171875, -46.28862762451172, 647.58203125, 826.262939453125, 63.49169921875, -173.61184692382812, 1039.650146484375, 716.0554809570312, 31.943496704101562, 1071.1121826171875, -291.1407775878906, 1352.75439453125, -25.440284729003906, 930.1414794921875, -1.1569976806640625, 1346.7763671875, 363.6849060058594, 969.68505859375, 194.50277709960938, 1184.06201171875, 752.6475219726562, 714.7208251953125, 518.43310546875, -464.5052490234375, 170.14547729492188, 313.75390625, 497.0035705566406, 327.10107421875, 1245.5660400390625, -83.79664611816406, 188.249267578125, -491.6479797363281, 188.62924194335938, 3140.325927734375, -310.8929443359375, 250.19248962402344, 1353.84716796875, 303.539306640625, 496.23602294921875, -262.582763671875, 749.161376953125, 1311.5406494140625, 237.75091552734375, -165.68878173828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000483.npy"}
{"epoch": 0.7092511013215859, "step": 484, "batch_size": 64, "mean": 445.344970703125, "std": 536.588623046875, "min": -576.7673950195312, "p10": -104.58787841796874, "median": 370.3786926269531, "p90": 1177.0624877929688, "max": 1950.3570556640625, "pos_frac": 0.78125, "sample": [415.366943359375, -106.54960632324219, 1680.819580078125, 120.82359313964844, -100.01051330566406, -244.61256408691406, 493.00860595703125, 73.86351776123047, 1950.3570556640625, 181.81573486328125, 93.78079223632812, 523.9630737304688, 520.7604370117188, 630.9246826171875, 1050.559814453125, 571.7070922851562, 409.552001953125, 102.6060791015625, -173.9598388671875, -576.7673950195312, 478.3550109863281, 36.8746223449707, 1158.3074951171875, 1081.37890625, 1654.018798828125, 38.7684326171875, 360.47760009765625, 767.785888671875, 6.622280120849609, -73.4749755859375, -53.18294906616211, -51.131591796875, 912.5199584960938, 1343.5159912109375, 25.778541564941406, 228.916015625, 289.2015380859375, 141.48583984375, 169.31761169433594, -172.31886291503906, -8.687156677246094, 316.7468566894531, 744.232421875, 380.27978515625, 1287.736572265625, -26.1070556640625, 666.4013061523438, 1185.100341796875, 473.6079406738281, -258.4658508300781, -36.98869323730469, 855.4979858398438, 1129.8045654296875, 766.3935546875, 62.87516784667969, 1198.73193359375, 1087.24267578125, 423.30029296875, 464.43829345703125, 61.339698791503906, 873.0150146484375, 46.87898254394531, -131.25160217285156, 978.7278442382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000484.npy"}
{"epoch": 0.71071953010279, "step": 485, "batch_size": 64, "mean": 425.968505859375, "std": 598.8422241210938, "min": -954.0516967773438, "p10": -138.18357620239254, "median": 314.0285949707031, "p90": 1142.8302856445314, "max": 2323.55029296875, "pos_frac": 0.765625, "sample": [814.6837158203125, 747.4845581054688, -65.54374694824219, 213.89413452148438, 485.4739685058594, 74.61356353759766, 1149.8846435546875, 197.05557250976562, 411.05511474609375, 545.3883056640625, 208.2386474609375, -54.57166290283203, 1407.9342041015625, 1256.6328125, 180.7760009765625, 1068.110595703125, -559.7911987304688, -19.21708106994629, 1033.22705078125, 978.385498046875, 1027.895263671875, 1153.894775390625, 203.63418579101562, 539.1712646484375, 2323.55029296875, 1214.7393798828125, -34.26313018798828, 69.23273468017578, 810.5768432617188, -112.3758773803711, 34.238548278808594, -88.38540649414062, -566.727294921875, 147.72833251953125, 261.5620422363281, 968.9072875976562, 614.086669921875, 135.5289306640625, 167.45767211914062, 781.0950927734375, -42.85870361328125, 139.04812622070312, 2040.671142578125, 531.4542846679688, -149.2440185546875, 384.7975769042969, 435.9092102050781, 692.6726684570312, 347.46044921875, -684.798828125, 1126.3701171875, 1067.6375732421875, 53.57095718383789, -283.3042907714844, 27.831436157226562, -241.87684631347656, 929.203857421875, 300.0666809082031, 327.9905090332031, 223.45159912109375, -20.87804412841797, -954.0516967773438, 910.1847534179688, 375.4123229980469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000485.npy"}
{"epoch": 0.7121879588839941, "step": 486, "batch_size": 64, "mean": 476.5207824707031, "std": 726.9288330078125, "min": -1246.3443603515625, "p10": -370.0424163818359, "median": 450.60755920410156, "p90": 1374.1772094726564, "max": 3047.2646484375, "pos_frac": 0.796875, "sample": [1056.009765625, 527.8863525390625, 110.72016906738281, 494.1235656738281, 370.8614501953125, -989.8116455078125, 154.71597290039062, 215.88980102539062, 1009.1360473632812, -136.83065795898438, 624.39501953125, 1494.7415771484375, 671.6047973632812, -885.8290405273438, -386.7142333984375, 1607.4766845703125, 971.2652587890625, 981.337890625, -331.1415100097656, 1261.3248291015625, 1407.1365966796875, -316.93603515625, 1580.16064453125, 1227.257568359375, 207.38331604003906, -287.3914489746094, 540.943603515625, 449.4673767089844, 36.932037353515625, 1588.99755859375, 1178.14013671875, 451.74774169921875, -559.3660888671875, -759.4120483398438, 403.41748046875, 353.3592529296875, 140.67144775390625, 1385.6048583984375, 919.9241333007812, 893.5826416015625, 355.8353271484375, 815.64599609375, 486.87530517578125, -129.56472778320312, 575.6531982421875, 891.7877197265625, 949.274658203125, 649.2372436523438, 956.0573120117188, 1347.5126953125, 154.35330200195312, 350.6221618652344, 5.466703414916992, 359.90435791015625, 683.7798461914062, 223.32301330566406, 36.154518127441406, 513.9480590820312, -1246.3443603515625, 86.52815246582031, 378.39605712890625, -10.239532470703125, -646.9248046875, 3047.2646484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000486.npy"}
{"epoch": 0.7136563876651982, "step": 487, "batch_size": 64, "mean": 354.3516540527344, "std": 577.5410766601562, "min": -1879.5528564453125, "p10": -244.39152832031246, "median": 293.2348175048828, "p90": 1076.6990173339848, "max": 2035.2833251953125, "pos_frac": 0.796875, "sample": [283.4404296875, 1251.14892578125, 202.08914184570312, -263.1217346191406, 1156.0933837890625, 1113.4561767578125, 740.5206298828125, 478.9035339355469, 1599.170654296875, -57.74462127685547, 137.00750732421875, -504.752197265625, 1186.5555419921875, -200.68771362304688, 213.3851318359375, 2035.2833251953125, 263.79364013671875, -378.88055419921875, 825.1787109375, 813.6713256835938, 385.38189697265625, 663.5226440429688, -1879.5528564453125, 89.6564712524414, -91.86748504638672, 646.0821533203125, -3.9488296508789062, 938.3572998046875, 1420.482177734375, -27.61991310119629, 224.7465057373047, 342.3628234863281, 398.3120422363281, 499.32562255859375, 208.15713500976562, -608.1359252929688, 218.84579467773438, -542.4197998046875, 625.6842651367188, 324.2756652832031, 383.8140869140625, 50.97998046875, 688.254150390625, -152.2831573486328, 467.99322509765625, 171.80758666992188, 195.24940490722656, 301.52911376953125, 491.58172607421875, 244.128173828125, 150.58677673339844, 613.357421875, 240.8743133544922, 421.6793212890625, 39.46656799316406, -330.40753173828125, 284.9405212402344, 990.9323120117188, 806.019287109375, 673.9725341796875, 21.675384521484375, 454.5666198730469, 20.798919677734375, 720.83056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000487.npy"}
{"epoch": 0.7151248164464024, "step": 488, "batch_size": 64, "mean": 370.16302490234375, "std": 504.1206970214844, "min": -557.7685546875, "p10": -130.77005615234376, "median": 351.1059265136719, "p90": 1115.7972412109377, "max": 1918.21240234375, "pos_frac": 0.703125, "sample": [-3.4936141967773438, -8.01766586303711, 1556.593505859375, 195.4104461669922, 1073.675048828125, 127.46018981933594, 693.6004028320312, -72.69229888916016, -120.40774536132812, 339.1687316894531, 498.970947265625, 210.7169647216797, 566.4389038085938, 1177.248779296875, 346.99700927734375, 869.7759399414062, -153.52474975585938, 45.533241271972656, -557.7685546875, -128.7791748046875, 616.572265625, -88.02278900146484, 463.24444580078125, 392.6507263183594, -0.3625335693359375, -284.2559814453125, 34.391231536865234, 580.909423828125, 1918.21240234375, 172.06898498535156, 462.39154052734375, 678.8724365234375, -421.6833801269531, 1133.849609375, 771.0877075195312, -28.196247100830078, 1392.4385986328125, 570.486572265625, 355.21484375, 125.31635284423828, 632.6937866210938, 979.80615234375, 218.25762939453125, 492.40570068359375, 210.16737365722656, 450.41937255859375, 596.1673583984375, 1407.943359375, -119.3558578491211, -107.7265396118164, -164.80491638183594, 451.7760314941406, 435.7129211425781, 36.559730529785156, 432.2032470703125, 496.9847717285156, -70.81547546386719, 466.9528503417969, -12.163043975830078, -131.623291015625, -518.3716430664062, 476.7648620605469, 258.8203125, 1269.5655517578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000488.npy"}
{"epoch": 0.7165932452276065, "step": 489, "batch_size": 64, "mean": 366.00018310546875, "std": 556.5921630859375, "min": -755.4341430664062, "p10": -192.54015808105464, "median": 195.11882781982422, "p90": 1229.2572509765625, "max": 2002.260009765625, "pos_frac": 0.703125, "sample": [909.0051879882812, 37.14949035644531, 237.92747497558594, 119.67970275878906, -11.799020767211914, 140.85296630859375, 221.5347442626953, -430.0750427246094, 99.69158935546875, 141.19461059570312, -216.3869171142578, 177.62399291992188, 612.9173583984375, 734.3745727539062, 501.9396667480469, 165.51194763183594, -262.69561767578125, 227.27606201171875, -134.732177734375, 957.3727416992188, -45.73857116699219, -54.591796875, 275.5506591796875, 183.26220703125, -49.74135971069336, 150.6139678955078, 206.97544860839844, -25.354976654052734, 156.51329040527344, 696.407958984375, -87.66888427734375, 48.44325256347656, -25.110750198364258, -286.4044189453125, 1348.95947265625, 619.8051147460938, 20.405315399169922, 107.28007507324219, 341.1978759765625, -755.4341430664062, 759.49560546875, -313.8084411621094, 1431.332763671875, -399.6053466796875, 1228.8717041015625, 1229.4224853515625, 720.4095458984375, 2002.260009765625, 363.1369934082031, 760.908203125, 1838.9312744140625, 522.60107421875, 507.55853271484375, 1487.640625, -136.89772033691406, -63.900978088378906, 784.6533203125, 499.3753967285156, 560.5775146484375, 497.158935546875, -0.461883544921875, 1426.0592041015625, 669.7725830078125, -5.2119140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000489.npy"}
{"epoch": 0.7180616740088106, "step": 490, "batch_size": 64, "mean": 385.1979064941406, "std": 708.7413330078125, "min": -1379.1064453125, "p10": -363.56342468261715, "median": 190.2949981689453, "p90": 1271.8459594726567, "max": 2170.629150390625, "pos_frac": 0.765625, "sample": [72.59967041015625, -1285.2222900390625, 1170.733642578125, -209.43797302246094, 152.69873046875, -13.124473571777344, 170.59393310546875, 945.6244506835938, 936.9642333984375, -645.7032470703125, 105.63748168945312, -47.0838623046875, 1428.9697265625, 78.15701293945312, 491.2117919921875, -321.2976989746094, -205.3095703125, 1117.4560546875, 643.3616333007812, -776.3885498046875, 65.93180847167969, 141.44436645507812, -781.5108642578125, 135.0905303955078, -1379.1064453125, 1419.678466796875, 107.38742065429688, 479.6785888671875, 473.1852111816406, 748.9461669921875, 328.15838623046875, 2170.629150390625, 103.12139892578125, 689.0119018554688, 38.17961883544922, -266.72705078125, 1724.3695068359375, 949.1556396484375, 1876.110107421875, 115.83000183105469, 136.78228759765625, 209.99606323242188, 1315.1798095703125, 787.4990234375, -15.35365104675293, 828.9705200195312, -381.67730712890625, 938.1073608398438, 42.46759033203125, 477.03497314453125, 422.24652099609375, -513.990478515625, 1994.0147705078125, -26.0035400390625, 1001.7852783203125, 1002.1253051757812, 50.21917724609375, 128.12997436523438, 636.3526611328125, 969.1034545898438, 58.73481369018555, 872.6868896484375, 505.65594482421875, 263.594482421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000490.npy"}
{"epoch": 0.7195301027900147, "step": 491, "batch_size": 64, "mean": 259.3618469238281, "std": 498.0423583984375, "min": -854.7910766601562, "p10": -295.1734649658203, "median": 197.8345489501953, "p90": 884.2482116699221, "max": 1734.04052734375, "pos_frac": 0.71875, "sample": [314.16839599609375, -262.7571716308594, 841.7811889648438, 578.59423828125, 1407.3843994140625, 152.6851043701172, 400.3301086425781, -31.58601188659668, 462.0853271484375, 110.45501708984375, 299.5987243652344, 84.19148254394531, 36.879737854003906, -598.9454345703125, 502.1401062011719, 89.71357727050781, 773.1356811523438, 393.6737060546875, -61.62406921386719, 432.4283752441406, 205.48263549804688, -20.893789291381836, 1033.87451171875, -421.4378356933594, 235.58103942871094, 280.4337158203125, 727.0680541992188, -247.6502227783203, 902.4483642578125, 82.95384979248047, 934.3895263671875, 444.5604553222656, -854.7910766601562, 161.47024536132812, -246.21876525878906, 248.87518310546875, 25.096458435058594, 1114.55517578125, 278.77978515625, 296.84808349609375, 676.4186401367188, -689.4727172851562, 1734.04052734375, 111.34896850585938, -178.18763732910156, 190.18646240234375, -309.066162109375, 218.9944305419922, -51.91834259033203, 654.1522827148438, 98.8388671875, -440.42352294921875, 771.4306030273438, -92.51719665527344, -73.89209747314453, 115.25090026855469, 354.865966796875, 807.552734375, 117.79237365722656, -49.119110107421875, 1400.609375, 131.02545166015625, 512.7503051757812, -517.2610473632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000491.npy"}
{"epoch": 0.7209985315712188, "step": 492, "batch_size": 64, "mean": 249.57762145996094, "std": 587.8971557617188, "min": -1006.5714721679688, "p10": -501.17475891113276, "median": 180.85095977783203, "p90": 1139.1705810546878, "max": 1451.4764404296875, "pos_frac": 0.734375, "sample": [213.1519775390625, 230.6920623779297, 605.431884765625, 23.5028018951416, 255.71035766601562, 37.05406188964844, 32.619232177734375, 680.7911376953125, -630.8626708984375, -175.5293426513672, 208.84219360351562, 702.1978759765625, 810.335205078125, -322.11297607421875, -990.6884155273438, 1092.9765625, -122.20780181884766, 1303.08349609375, -204.40255737304688, 1451.4764404296875, -527.9402465820312, 1282.3074951171875, 328.2228698730469, -378.3906555175781, 394.835693359375, 381.62261962890625, 280.5039367675781, -579.0909423828125, 103.92289733886719, 315.27294921875, 20.5606689453125, 20.104421615600586, 162.26901245117188, -1006.5714721679688, -47.15054702758789, 1206.489990234375, 877.0511474609375, 252.1656494140625, -806.0676879882812, 73.44274139404297, 1158.968017578125, -798.3548583984375, 1222.6279296875, 55.78235626220703, -177.2402801513672, 771.480712890625, 199.4329071044922, 79.82991027832031, 977.8236694335938, 16.45510482788086, 512.2352294921875, -89.37583923339844, 95.72832489013672, -438.7219543457031, 1080.6689453125, 887.604248046875, 600.4019165039062, 50.76982116699219, 62.37373733520508, 588.1214599609375, -262.755859375, 1369.82470703125, 48.728614807128906, 404.93707275390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000492.npy"}
{"epoch": 0.7224669603524229, "step": 493, "batch_size": 64, "mean": 295.969970703125, "std": 540.3995361328125, "min": -1916.8348388671875, "p10": -190.21840515136716, "median": 209.1218719482422, "p90": 1093.6062255859379, "max": 1409.9234619140625, "pos_frac": 0.734375, "sample": [1138.2421875, 107.05785369873047, 515.0906982421875, 314.6993713378906, -94.47781372070312, 1278.813232421875, 981.6594848632812, -15.617767333984375, 168.86245727539062, -369.76300048828125, 503.16552734375, 69.91407775878906, 1000.3855590820312, 441.88897705078125, 493.8077392578125, 143.89486694335938, -310.3514709472656, 330.9103088378906, 162.09542846679688, 42.05470275878906, 195.71519470214844, 393.4062805175781, 447.1496887207031, 401.85662841796875, -138.60430908203125, 334.05853271484375, 657.8268432617188, -156.91845703125, -197.7467041015625, -138.83584594726562, 44.92523193359375, -1916.8348388671875, 355.54827880859375, -23.943832397460938, 795.2013549804688, 4.933130264282227, 1349.7735595703125, 1387.623779296875, 188.94842529296875, -217.23851013183594, -34.422149658203125, 414.297119140625, 1170.320556640625, 826.6212158203125, 331.1136169433594, 1024.067138671875, -502.9437255859375, 413.7286376953125, 1409.9234619140625, 1123.40869140625, 156.303955078125, 674.8809814453125, -285.52325439453125, 226.61111450195312, 312.002685546875, -172.65237426757812, -100.69348907470703, 211.00489807128906, 703.2276611328125, 207.2388458251953, 181.27392578125, 13.81890869140625, -79.48495483398438, 48.77662658691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000493.npy"}
{"epoch": 0.723935389133627, "step": 494, "batch_size": 64, "mean": 459.6351318359375, "std": 509.01898193359375, "min": -643.2147216796875, "p10": -105.47897338867182, "median": 447.7499542236328, "p90": 1212.2382568359376, "max": 1885.8992919921875, "pos_frac": 0.828125, "sample": [-2.4168243408203125, 334.92266845703125, 607.28564453125, 394.8480529785156, 1885.8992919921875, 1442.482666015625, -643.2147216796875, 752.3253173828125, 728.9631958007812, 54.70465087890625, 452.9595031738281, -42.07533645629883, 597.19384765625, 413.23236083984375, -340.16790771484375, 1227.4561767578125, 748.8565673828125, -208.65179443359375, 468.0865173339844, 853.2095947265625, 141.43408203125, -301.019775390625, 282.1600341796875, 931.6911010742188, 799.2672119140625, 49.730628967285156, 879.0330810546875, 1211.833740234375, 244.99253845214844, 391.36822509765625, -522.485595703125, 35.65809631347656, 321.7012023925781, -298.3953857421875, 1690.5545654296875, 394.2203063964844, 466.14599609375, -3.1805191040039062, 536.0660400390625, 1400.7930908203125, 1238.780517578125, 480.9146728515625, -48.26612854003906, 32.065940856933594, 525.9689331054688, 697.20751953125, 737.7496948242188, 972.1484985351562, 471.2986145019531, 708.2623901367188, -129.99876403808594, 263.66455078125, 525.2084350585938, 139.40963745117188, 142.44012451171875, 540.474853515625, 824.908935546875, 588.3792114257812, 181.6746826171875, 442.5404052734375, 1212.41162109375, 72.95976257324219, 364.5766906738281, 54.39935302734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000494.npy"}
{"epoch": 0.7254038179148311, "step": 495, "batch_size": 64, "mean": 392.8917236328125, "std": 618.7874145507812, "min": -1089.258544921875, "p10": -210.3155563354492, "median": 294.92022705078125, "p90": 1128.3706298828124, "max": 2515.85888671875, "pos_frac": 0.765625, "sample": [365.2462158203125, 83.85105895996094, -178.34271240234375, -216.63616943359375, 246.0858154296875, 328.22479248046875, -542.789306640625, 167.02154541015625, 295.73638916015625, 32.010101318359375, 156.47386169433594, 720.9326171875, 959.6860961914062, -699.5560913085938, 447.20794677734375, 444.1463317871094, -316.3931579589844, 903.8399047851562, 78.93508911132812, 758.2923583984375, 137.8472442626953, -130.81417846679688, 261.1771545410156, 62.030147552490234, 378.6784362792969, 1129.844482421875, 977.6957397460938, 876.364501953125, 260.4961242675781, 504.7484130859375, 293.45013427734375, 312.9139404296875, 134.61911010742188, 489.43267822265625, 1124.931640625, 1156.4432373046875, 878.9185791015625, -195.5674591064453, 2515.85888671875, 1393.32861328125, 436.4463806152344, -17.471847534179688, 213.70083618164062, 653.0985107421875, 349.41644287109375, -687.7188720703125, -24.690994262695312, 294.10406494140625, 72.68636322021484, -334.9566345214844, -168.91094970703125, -8.412353515625, 371.09539794921875, 597.503173828125, 1459.22119140625, 596.2359619140625, 265.0946960449219, 877.4723510742188, -1089.258544921875, 1314.800537109375, 1017.9097900390625, 293.49371337890625, 2162.8271484375, -94.98531341552734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000495.npy"}
{"epoch": 0.7268722466960352, "step": 496, "batch_size": 64, "mean": 197.38121032714844, "std": 484.35321044921875, "min": -1170.425048828125, "p10": -357.6348480224609, "median": 154.64617919921875, "p90": 886.187048339844, "max": 1200.5552978515625, "pos_frac": 0.71875, "sample": [160.67257690429688, -553.511962890625, 1137.0252685546875, 902.4625244140625, -870.8290405273438, 763.3357543945312, 95.20365142822266, 270.553466796875, 800.0537719726562, 148.61978149414062, 1200.5552978515625, 307.61968994140625, 387.9552917480469, 245.18954467773438, -267.24072265625, -26.367549896240234, -250.2252197265625, 483.864501953125, -829.8733520507812, 1158.6234130859375, 917.69140625, 328.7642517089844, 848.2109375, 371.3739929199219, 951.2305297851562, 176.97222900390625, -79.93565368652344, -100.46510314941406, 73.40046691894531, -378.8509216308594, 199.41461181640625, 144.67771911621094, 192.03060913085938, 465.3019104003906, 107.11125946044922, -91.21449279785156, 533.3766479492188, 212.73251342773438, -716.7417602539062, 227.8615264892578, -469.66552734375, -308.13067626953125, 678.73828125, 30.810401916503906, 145.94068908691406, 544.06787109375, 102.99732971191406, 464.1263732910156, 620.8036499023438, 474.9718933105469, 132.21951293945312, -230.6719207763672, 401.87353515625, 14.945180892944336, 124.79470825195312, -1170.425048828125, 24.123462677001953, 963.6741333007812, 23.577476501464844, -18.796470642089844, 462.9920959472656, -52.28440475463867, -7.843513488769531, 32.92974853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000496.npy"}
{"epoch": 0.7283406754772394, "step": 497, "batch_size": 64, "mean": 207.71688842773438, "std": 495.0066223144531, "min": -955.7325439453125, "p10": -322.1953491210937, "median": 143.17077255249023, "p90": 919.5088867187501, "max": 1404.6500244140625, "pos_frac": 0.6875, "sample": [-341.5212707519531, 933.373779296875, 622.4695434570312, 718.6083374023438, 11.287099838256836, -255.65008544921875, 527.0885009765625, -202.84071350097656, -87.20689392089844, -226.9839630126953, 164.31907653808594, 1014.261474609375, 1260.05517578125, 462.83013916015625, 46.57334899902344, 119.25469970703125, 387.0940856933594, 204.74388122558594, 667.0326538085938, 492.9971618652344, -106.24790954589844, 3.8729934692382812, 661.8917236328125, 966.4669189453125, -40.68211364746094, 71.96379089355469, 887.157470703125, -190.03675842285156, 41.15238952636719, -775.7993774414062, 498.07208251953125, 73.10020446777344, -6.873332977294922, 368.89300537109375, 69.59356689453125, 1302.541259765625, 26.99658203125, 1404.6500244140625, 14.092910766601562, 76.86380004882812, 122.6728744506836, 282.87359619140625, 330.667236328125, 687.864013671875, -948.0170288085938, 191.04229736328125, 589.2483520507812, -419.9007873535156, 335.4239807128906, 163.66867065429688, 325.0193176269531, 267.19781494140625, 962.499755859375, -277.1015319824219, -164.5925750732422, 252.56765747070312, -109.37400817871094, -408.5347900390625, -75.61665344238281, -955.7325439453125, 652.20556640625, -519.678466796875, 312.7001037597656, -168.67820739746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000497.npy"}
{"epoch": 0.7298091042584435, "step": 498, "batch_size": 64, "mean": 352.5992431640625, "std": 619.0040893554688, "min": -1379.2598876953125, "p10": -280.7842559814453, "median": 350.4651794433594, "p90": 1066.2609375000002, "max": 2094.2802734375, "pos_frac": 0.71875, "sample": [-957.8635864257812, 237.80783081054688, 648.0178833007812, 149.32699584960938, -196.67391967773438, 643.5081787109375, 655.330322265625, 1083.500244140625, 292.26153564453125, 730.5841674804688, 479.88262939453125, 697.2978515625, 288.8871154785156, 957.3648071289062, -1379.2598876953125, 725.8504638671875, 1017.1870727539062, 1516.2320556640625, 46.19928741455078, 600.2639770507812, 125.57170867919922, 753.5374755859375, -123.5350341796875, 1026.035888671875, -262.5988464355469, 316.8954162597656, 388.98004150390625, 85.83496856689453, -463.97796630859375, -5.128044128417969, 290.8055114746094, 384.0349426269531, -1106.3277587890625, 1306.95556640625, 515.9656982421875, 5.8038330078125, -153.25247192382812, 538.3560180664062, 612.4917602539062, 495.25982666015625, 1570.0345458984375, -0.8145065307617188, 0.1478271484375, 45.72346115112305, 593.8341674804688, 641.9501342773438, -399.6899719238281, -38.57476806640625, 1355.6112060546875, 993.6987915039062, 261.6744384765625, -288.5780029296875, 390.7061462402344, -241.1973876953125, -210.85450744628906, -37.111724853515625, 501.12408447265625, 161.39300537109375, 982.3886108398438, 484.65277099609375, -307.9024353027344, 1176.993408203125, 2094.2802734375, -130.5526123046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000498.npy"}
{"epoch": 0.7312775330396476, "step": 499, "batch_size": 64, "mean": 430.9838562011719, "std": 577.1480712890625, "min": -1002.7814331054688, "p10": -191.10684356689453, "median": 461.8444061279297, "p90": 1052.2104370117188, "max": 2342.245849609375, "pos_frac": 0.796875, "sample": [1047.247802734375, 89.48121643066406, 199.10150146484375, -1002.7814331054688, -56.555137634277344, 537.4251708984375, 538.552734375, 659.2630004882812, -79.94721221923828, -95.44802856445312, 533.5247802734375, 639.2406616210938, 169.528564453125, 1054.3372802734375, 617.0344848632812, 185.86453247070312, 734.3896484375, -689.730224609375, 2342.245849609375, 281.05401611328125, 197.925048828125, 1038.4808349609375, 770.785400390625, 30.552528381347656, -552.0579223632812, 512.1795654296875, 288.9795227050781, 995.9991455078125, -251.49037170410156, 1207.001953125, 233.01174926757812, 52.44293212890625, 597.0679321289062, -179.38211059570312, 281.4685363769531, 29.211923599243164, 574.7269897460938, 909.447265625, 937.558349609375, -231.75267028808594, 38.273529052734375, 592.3666381835938, 637.5677490234375, 614.0653076171875, 620.79345703125, 122.57982635498047, 708.2343139648438, 1748.71142578125, 1460.7144775390625, 168.01303100585938, 411.5092468261719, 721.9366455078125, 379.68170166015625, 882.317138671875, -63.59519577026367, 819.7703857421875, 1281.095703125, 372.64935302734375, -609.348876953125, -159.4316864013672, -196.13172912597656, 560.255859375, 50.231651306152344, 1274.722412109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000499.npy"}
{"epoch": 0.7327459618208517, "step": 500, "batch_size": 64, "mean": 352.3294677734375, "std": 564.7052612304688, "min": -1080.2764892578125, "p10": -379.49642333984366, "median": 276.84129333496094, "p90": 956.5584350585939, "max": 1723.73876953125, "pos_frac": 0.78125, "sample": [152.84054565429688, -209.18922424316406, -647.8468627929688, 1723.73876953125, -594.5127563476562, 556.5709228515625, 971.5223999023438, 479.1544189453125, 834.279052734375, 74.01598358154297, 894.07763671875, 674.0467529296875, 798.2965698242188, -411.2840270996094, 29.596342086791992, 514.5596923828125, 32.89869689941406, 140.569091796875, 287.7557678222656, 1372.8922119140625, 598.480712890625, -43.00492858886719, 747.4580688476562, 1416.94140625, -39.62952423095703, 739.2802124023438, -459.48626708984375, 921.6425170898438, 896.5335083007812, -734.1600952148438, 792.148681640625, 824.4873046875, -57.88111877441406, 117.80223846435547, 62.73822784423828, 876.2421875, -663.53369140625, 157.82369995117188, 981.4095458984375, -1080.2764892578125, -131.1619415283203, 65.43711853027344, 117.57471466064453, 686.9151000976562, 54.598365783691406, -96.80902862548828, 590.8161010742188, 79.45098114013672, 106.9273681640625, 265.92681884765625, 1308.6912841796875, 893.2111206054688, 849.505615234375, 199.69332885742188, 617.7105712890625, 486.8327331542969, 674.0836791992188, 349.70098876953125, 26.79781150817871, 590.43701171875, 90.13624572753906, 1263.950439453125, -305.3253479003906, 34.98756408691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000500.npy"}
{"epoch": 0.7342143906020558, "step": 501, "batch_size": 64, "mean": 379.6474304199219, "std": 600.6553955078125, "min": -1086.092529296875, "p10": -306.8445602416991, "median": 357.8916931152344, "p90": 1187.9832763671877, "max": 1945.726806640625, "pos_frac": 0.734375, "sample": [-99.51273345947266, 416.55029296875, 351.748046875, -345.5299987792969, -40.068912506103516, 1340.6195068359375, 246.25071716308594, 1026.445556640625, 276.9337158203125, -9.493804931640625, 181.92190551757812, -183.3497772216797, 325.279296875, 218.7650909423828, 1945.726806640625, 721.2390747070312, 1049.2073974609375, 430.3120422363281, 434.6856384277344, 975.8134765625, 1210.370849609375, -69.60429382324219, 314.75616455078125, 139.18038940429688, 875.1604614257812, -526.825439453125, -723.9758911132812, 383.53729248046875, 1130.5504150390625, 571.1578979492188, -793.46484375, 243.602783203125, 495.949462890625, 458.4597473144531, 453.2442626953125, 1398.6400146484375, -210.52903747558594, 572.4803466796875, 352.1810607910156, 919.9841918945312, 1284.0621337890625, 12.748954772949219, -146.6732635498047, -378.8871765136719, 680.1344604492188, 30.17535400390625, 29.41421890258789, 637.30126953125, 100.22085571289062, -1086.092529296875, 363.6023254394531, 628.5426025390625, -216.5785369873047, -119.63119506835938, 83.2274169921875, -559.0242919921875, 1135.74560546875, 800.1490478515625, 1331.8310546875, 438.3727111816406, 1780.6373291015625, 572.6959228515625, 459.2879638671875, -22.22616195678711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000501.npy"}
{"epoch": 0.73568281938326, "step": 502, "batch_size": 64, "mean": 373.10076904296875, "std": 475.6739807128906, "min": -624.6331176757812, "p10": -65.33530883789062, "median": 329.93121337890625, "p90": 994.2436401367188, "max": 2353.671630859375, "pos_frac": 0.8125, "sample": [658.0737915039062, -327.07891845703125, 905.7958374023438, 427.6104431152344, 58.8555908203125, 671.1065063476562, -174.63424682617188, 7.06982421875, 1256.9276123046875, 364.04180908203125, 395.5667724609375, 216.58029174804688, 363.2724609375, 1094.088134765625, 937.6455078125, 10.4130859375, 23.59429931640625, 234.5985565185547, -624.6331176757812, 95.7996826171875, 655.4929809570312, 1095.068603515625, 121.84178924560547, 406.3633728027344, 35.15495300292969, 201.82749938964844, 36.59535217285156, 345.9316711425781, 467.466796875, 332.8201904296875, -355.88385009765625, 1140.0167236328125, -7.292366027832031, 523.6337890625, 487.3739013671875, 1001.599609375, -163.2270965576172, 2353.671630859375, 59.36125183105469, -7.768354415893555, 758.369384765625, 17.796981811523438, 269.95367431640625, 348.64105224609375, 387.36370849609375, 557.8344116210938, 1214.907958984375, -3.974721908569336, 700.771728515625, 505.85174560546875, 787.0406494140625, -33.63063049316406, -57.09929656982422, 117.14584350585938, 977.0797119140625, -95.44784545898438, 246.03883361816406, 77.72750854492188, 709.142333984375, 308.020263671875, -68.86502838134766, 327.042236328125, 168.82850646972656, 333.1689453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000502.npy"}
{"epoch": 0.737151248164464, "step": 503, "batch_size": 64, "mean": 345.94140625, "std": 473.9827880859375, "min": -353.71484375, "p10": -148.71579437255858, "median": 264.2348175048828, "p90": 854.0924499511721, "max": 2147.0087890625, "pos_frac": 0.765625, "sample": [1.7654228210449219, -72.6202621459961, 1160.030517578125, 55.97441101074219, -153.7719268798828, -168.1676025390625, 517.2630615234375, 284.3988342285156, 87.72260284423828, -81.93099212646484, -353.71484375, 334.17498779296875, -208.17807006835938, 256.2122802734375, 1001.0369262695312, 806.7694091796875, 349.201171875, 702.9339599609375, 91.5758056640625, 800.7261962890625, -41.18122863769531, 13.495922088623047, 44.07975769042969, 63.48371124267578, 383.03802490234375, -61.04380798339844, -136.91815185546875, 588.7252197265625, 192.94131469726562, 143.057373046875, 517.60498046875, -18.03759765625, 113.09178161621094, 708.2406616210938, 18.265708923339844, -295.42047119140625, 2147.0087890625, 1849.8677978515625, 583.5289306640625, 384.98284912109375, 120.56683349609375, 697.2776489257812, 764.1688842773438, 1.286977767944336, -82.94085693359375, 632.5270385742188, 56.517738342285156, 818.2904663085938, 225.87509155273438, 394.4796447753906, 174.08566284179688, 616.95556640625, 494.02685546875, 277.6510009765625, 1000.8320922851562, 549.96240234375, -186.54830932617188, 763.5596923828125, -240.68331909179688, 869.4361572265625, 272.2573547363281, 432.32147216796875, 961.9227905273438, -83.79122924804688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000503.npy"}
{"epoch": 0.7386196769456681, "step": 504, "batch_size": 64, "mean": 423.2089538574219, "std": 577.7997436523438, "min": -705.88818359375, "p10": -331.68663940429684, "median": 363.2411193847656, "p90": 1096.4610839843751, "max": 1770.5911865234375, "pos_frac": 0.78125, "sample": [52.025604248046875, 137.30157470703125, 889.7483520507812, 398.774658203125, 134.04457092285156, 353.6415710449219, 722.3212280273438, -261.8118591308594, 1770.5911865234375, 1417.76513671875, 1749.4488525390625, -650.9148559570312, 600.195068359375, 751.9707641601562, -46.42103576660156, -69.21882629394531, 673.991455078125, 209.11309814453125, 9.167594909667969, -26.678314208984375, 39.17925262451172, -37.139808654785156, 395.12432861328125, -315.17626953125, 233.5792999267578, 255.6664581298828, -525.5301513671875, 568.3243408203125, 220.87374877929688, 496.28155517578125, -454.9219665527344, 1099.338623046875, -457.3909606933594, 1410.50634765625, 850.143310546875, 712.8423461914062, 840.1517333984375, 580.3191528320312, 223.12124633789062, 356.5740966796875, 101.70745849609375, 1291.2388916015625, 906.7727661132812, 511.4492492675781, 981.3488159179688, -159.50003051757812, -504.8214416503906, 846.3082885742188, 55.44044494628906, 1041.6875, 189.78866577148438, 787.3369750976562, 1089.746826171875, 732.524658203125, 342.2662353515625, 369.90814208984375, -338.76251220703125, 980.4727172851562, 1680.5389404296875, 324.2940673828125, -705.88818359375, 543.9487915039062, 694.7543334960938, 15.8909912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000504.npy"}
{"epoch": 0.7400881057268722, "step": 505, "batch_size": 64, "mean": 226.26625061035156, "std": 454.06243896484375, "min": -834.6461181640625, "p10": -229.44584045410156, "median": 161.66693878173828, "p90": 881.632489013672, "max": 1388.9892578125, "pos_frac": 0.6875, "sample": [10.809379577636719, 517.3289184570312, -221.3739013671875, 625.2424926757812, -51.49674987792969, 878.6336059570312, 196.22743225097656, -31.982650756835938, 36.776790618896484, 13.875289916992188, 812.9345703125, 882.917724609375, 1388.9892578125, -16.301437377929688, 604.251708984375, 494.22039794921875, -161.76890563964844, 352.50933837890625, 134.71783447265625, -218.5295867919922, 280.25738525390625, 687.416259765625, 174.1536865234375, 81.58786010742188, -434.78509521484375, -664.6210327148438, 149.18019104003906, 293.71270751953125, 959.5315551757812, 245.22576904296875, -154.4510040283203, 408.5242919921875, 369.890380859375, -232.90524291992188, 217.41549682617188, 675.438720703125, 129.03836059570312, 703.6083984375, -71.14766693115234, 507.06683349609375, -678.4641723632812, -834.6461181640625, 143.8813934326172, 260.30401611328125, 13.443267822265625, 126.87999725341797, 1010.4337768554688, 578.0438842773438, -218.3300323486328, 916.094482421875, 148.9939422607422, -180.8720703125, -398.59893798828125, 933.9468994140625, 207.66709899902344, -99.18370056152344, 860.5416259765625, -393.7850341796875, -207.27294921875, 79.91529846191406, 928.9445190429688, 187.64987182617188, 546.638916015625, -23.304180145263672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000505.npy"}
{"epoch": 0.7415565345080763, "step": 506, "batch_size": 64, "mean": 368.8091125488281, "std": 524.0787353515625, "min": -786.2079467773438, "p10": -189.96503753662105, "median": 335.1687927246094, "p90": 1068.4536865234375, "max": 1987.586181640625, "pos_frac": 0.75, "sample": [-452.72650146484375, 31.955322265625, 731.9141235351562, 5.1784515380859375, 268.0343933105469, 289.5367431640625, -205.8428955078125, 541.2155151367188, 850.3590087890625, 66.6761474609375, 499.8001403808594, 300.10723876953125, 288.29473876953125, 219.8424835205078, -703.184814453125, 1069.0076904296875, 446.0708923339844, -29.236770629882812, 317.7701721191406, 645.7093505859375, 448.60736083984375, 1356.4163818359375, 394.23248291015625, 89.73046112060547, 1987.586181640625, -152.9167022705078, -285.09588623046875, 519.3606567382812, 682.8806762695312, 265.4422607421875, 395.72076416015625, 1216.353271484375, 400.6626281738281, 104.63032531738281, 540.5072631835938, -768.8469848632812, 352.5674133300781, 167.260986328125, 1209.65234375, 943.6727294921875, 558.8285522460938, 676.4631958007812, -119.30009460449219, 488.92919921875, 64.2028579711914, -786.2079467773438, 302.0356140136719, -20.808876037597656, 1153.7923583984375, 979.7244873046875, 1067.1610107421875, -3.374235153198242, 359.39697265625, 767.90380859375, 766.3334350585938, 1108.60107421875, -10.892837524414062, 1045.7772216796875, 185.59861755371094, -22.97510528564453, -330.87994384765625, -143.81301879882812, 519.66552734375, -51.28443145751953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000506.npy"}
{"epoch": 0.7430249632892805, "step": 507, "batch_size": 64, "mean": 325.80413818359375, "std": 627.0028686523438, "min": -1124.8377685546875, "p10": -337.7032073974609, "median": 287.6995849609375, "p90": 931.2552978515628, "max": 2491.114013671875, "pos_frac": 0.671875, "sample": [-5.039438247680664, 478.052978515625, -72.06251525878906, -1124.8377685546875, 1200.95849609375, -106.68487548828125, 27.217941284179688, 412.19378662109375, -6.985801696777344, 133.74835205078125, -73.50495910644531, 398.6456298828125, 44.98670959472656, 423.31195068359375, 682.465087890625, 800.8906860351562, -449.5360107421875, 300.99029541015625, 138.5888214111328, 2132.096923828125, 584.5521850585938, 725.1469116210938, 32.77244186401367, -184.76315307617188, 424.947998046875, 327.1910400390625, 693.1810302734375, 874.4924926757812, -357.8561706542969, 335.85858154296875, 292.28057861328125, -63.278289794921875, -290.67962646484375, -30.52311897277832, -5.171655654907227, 607.549560546875, 533.9776611328125, 1488.5506591796875, 635.5322875976562, 61.14354705810547, 283.11859130859375, 18.174362182617188, -62.4373779296875, 421.1661682128906, 2491.114013671875, 955.5822143554688, 165.20654296875, 297.09613037109375, -474.6080627441406, 767.8577270507812, 823.1982421875, -479.46881103515625, -44.75395202636719, 728.69580078125, 2035.2216796875, 640.6556396484375, 984.6807861328125, 446.67364501953125, 214.07110595703125, -42.42351531982422, 30.369529724121094, -134.88021850585938, -425.8010559082031, -807.44677734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000507.npy"}
{"epoch": 0.7444933920704846, "step": 508, "batch_size": 64, "mean": 281.86297607421875, "std": 639.6566772460938, "min": -1552.25146484375, "p10": -380.4529266357422, "median": 253.72866821289062, "p90": 953.8649414062504, "max": 2220.959228515625, "pos_frac": 0.6875, "sample": [210.08596801757812, 161.43927001953125, -712.0643920898438, 708.0481567382812, 592.4080810546875, -89.2998046875, 94.38143920898438, 2013.344482421875, 427.7407531738281, 713.7962036132812, 376.1997985839844, 1098.6156005859375, 274.0516357421875, 2220.959228515625, 576.783447265625, 289.39031982421875, 172.00796508789062, -690.2109375, 1289.5386962890625, 845.248046875, -577.1375732421875, -281.168701171875, 460.4381103515625, -1026.98876953125, 1000.4150390625, -259.9439392089844, 67.0869140625, -166.58267211914062, -110.12034606933594, 394.45147705078125, -113.52436828613281, 337.51873779296875, 1.4085845947265625, -30.049152374267578, 1819.0731201171875, -1552.25146484375, 133.22808837890625, 136.89944458007812, 189.98883056640625, 813.2882080078125, 371.8183288574219, 233.40570068359375, 110.5667724609375, 497.6527099609375, 647.544189453125, 293.1419677734375, 590.072265625, 673.8638916015625, 766.3435668945312, 583.1435546875, 631.5062866210938, -209.09292602539062, -244.7992706298828, 1046.373046875, 629.057373046875, -167.6778564453125, -113.53717041015625, -44.0262451171875, -490.39520263671875, 700.8184814453125, -384.486328125, 379.7720947265625, 100.7139663696289, -371.0416564941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000508.npy"}
{"epoch": 0.7459618208516887, "step": 509, "batch_size": 64, "mean": 301.72552490234375, "std": 495.1992492675781, "min": -744.2820434570312, "p10": -320.6261688232422, "median": 258.55950927734375, "p90": 994.9968505859375, "max": 1869.89990234375, "pos_frac": 0.78125, "sample": [-38.100440979003906, -75.65234375, -635.169189453125, 903.8072509765625, -144.13275146484375, 521.5113525390625, 89.75074005126953, 56.51667022705078, 516.7925415039062, 290.81707763671875, 722.2650146484375, 49.80921173095703, 292.183837890625, 141.66128540039062, 223.1425323486328, 186.84361267089844, 995.1016235351562, -562.2169189453125, -471.50079345703125, 435.8829345703125, 1182.18994140625, 492.4694519042969, 149.28457641601562, 268.16339111328125, 410.234375, 780.5996704101562, 125.19497680664062, 141.63827514648438, 332.55328369140625, 420.7992248535156, 564.4307250976562, -348.21893310546875, 96.83238220214844, -271.3174743652344, 275.0775451660156, 994.7523803710938, 763.9358520507812, 60.51509094238281, 248.95562744140625, -322.7525634765625, -5.207794189453125, 525.7617797851562, 292.8984375, 191.229248046875, 47.02845764160156, -744.2820434570312, 1256.5919189453125, 1317.0689697265625, -135.83155822753906, 1869.89990234375, -315.6645812988281, 175.3350830078125, -409.4713439941406, 94.33377075195312, 289.8435363769531, 363.82696533203125, 149.28778076171875, 231.05673217773438, 286.6505126953125, 516.5036010742188, 799.6333618164062, 1168.661376953125, 428.09307861328125, 1052.5362548828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000509.npy"}
{"epoch": 0.7474302496328928, "step": 510, "batch_size": 64, "mean": 396.84149169921875, "std": 447.57135009765625, "min": -611.979736328125, "p10": -207.35707473754874, "median": 375.81996154785156, "p90": 924.1010070800783, "max": 1380.1953125, "pos_frac": 0.796875, "sample": [220.79202270507812, 867.552490234375, 1249.6097412109375, 444.2912292480469, -245.26852416992188, 943.1702880859375, 769.5164184570312, 342.55841064453125, 125.43571472167969, 735.8069458007812, 400.3366394042969, 534.8623657226562, -287.9905090332031, 541.5450439453125, 1199.13720703125, 834.0509643554688, 717.9346923828125, -611.979736328125, -37.29625701904297, 879.6060180664062, 275.4454650878906, 351.30328369140625, 538.4091796875, 155.60653686523438, 314.6387634277344, 716.3570556640625, 581.8145141601562, 971.3738403320312, 1160.034912109375, -330.49249267578125, 857.1112060546875, 1380.1953125, -100.04776000976562, 146.42718505859375, 821.7289428710938, 105.30960845947266, 315.0111389160156, 1184.8492431640625, -347.11944580078125, 264.1129150390625, 277.4000244140625, 44.011573791503906, 700.8143310546875, 274.62530517578125, 638.9886474609375, -91.30167388916016, 494.6479187011719, 483.5649108886719, 50.97465515136719, 418.24310302734375, 759.19384765625, -118.89702606201172, 134.47076416015625, 752.1670532226562, -80.78675842285156, 509.7745361328125, -331.6501770019531, 851.4344482421875, -37.17961883544922, -279.77154541015625, 4.601358413696289, 675.4942016601562, 238.05380249023438, 43.242610931396484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000510.npy"}
{"epoch": 0.748898678414097, "step": 511, "batch_size": 64, "mean": 311.1535339355469, "std": 587.685791015625, "min": -1495.482666015625, "p10": -166.70490417480468, "median": 251.25189208984375, "p90": 1051.4988952636725, "max": 1816.775390625, "pos_frac": 0.75, "sample": [-165.95404052734375, 1305.0916748046875, 219.00546264648438, 151.8013458251953, 359.766845703125, 119.11265563964844, 1116.04443359375, 1437.050048828125, 542.6661987304688, 127.8447036743164, 35.92847442626953, 693.2382202148438, 698.95703125, 662.49560546875, 162.7318878173828, 1210.498291015625, -1495.482666015625, 365.54168701171875, -487.1387939453125, 676.5322875976562, 1542.8978271484375, 771.0647583007812, 101.81788635253906, 505.953369140625, 258.8212890625, 3.6411819458007812, -723.0719604492188, 390.78466796875, -36.111175537109375, 640.5905151367188, 449.4501953125, -4.8118133544921875, -22.50230598449707, 299.6217041015625, 313.35546875, 65.0627670288086, 900.8926391601562, -167.02670288085938, -9.651123046875, 258.64422607421875, 866.5152587890625, 181.94784545898438, 22.657184600830078, -303.7673034667969, -90.7291488647461, 703.5202026367188, 706.3017578125, 43.10603332519531, -28.283689498901367, 20.671646118164062, -229.83767700195312, 477.5188903808594, 534.4369506835938, -9.964212417602539, 243.85955810546875, 482.43817138671875, 260.7454528808594, -15.094383239746094, 1816.775390625, 86.45466613769531, 475.5499572753906, -1411.0283203125, 158.36538696289062, 1646.51318359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000511.npy"}
{"epoch": 0.750367107195301, "step": 512, "batch_size": 64, "mean": 434.24078369140625, "std": 456.37841796875, "min": -451.13360595703125, "p10": -97.4338516235351, "median": 379.37391662597656, "p90": 1061.9115234375004, "max": 2019.456787109375, "pos_frac": 0.859375, "sample": [1117.09619140625, 962.124267578125, 796.9238891601562, -307.0640869140625, -141.2942352294922, 24.075836181640625, 2019.456787109375, 260.31341552734375, 58.308074951171875, -42.561012268066406, 163.51524353027344, 742.0032958984375, 3.782135009765625, 457.0078125, -258.4102478027344, 44.65395736694336, 559.906494140625, 535.4546508789062, 403.667236328125, 10.229803085327148, -172.01937866210938, 785.746337890625, 101.01736450195312, 562.182861328125, 1214.2359619140625, 798.5327758789062, 243.59141540527344, 420.1184387207031, 54.84838104248047, 765.9462890625, 351.43682861328125, 297.4378662109375, 537.7706298828125, 489.26177978515625, 219.6985626220703, 164.4779510498047, 1104.677490234375, 301.4036560058594, -120.9507827758789, 925.0361328125, 128.71762084960938, 1186.36669921875, 416.6243591308594, 1105.6883544921875, 32.607513427734375, -451.13360595703125, 344.01080322265625, 61.701908111572266, 275.26312255859375, 399.30133056640625, 1171.5693359375, -1.2023086547851562, 297.0650939941406, 777.1807861328125, 360.2543029785156, 664.548095703125, 738.537841796875, 259.7765808105469, 398.4935302734375, 877.792236328125, -251.8625030517578, 935.126220703125, 820.6115112304688, 790.73193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000512.npy"}
{"epoch": 0.7518355359765051, "step": 513, "batch_size": 64, "mean": 306.30194091796875, "std": 497.4742736816406, "min": -717.2280883789062, "p10": -180.91434326171876, "median": 228.79302215576172, "p90": 1109.7954223632817, "max": 1435.70263671875, "pos_frac": 0.765625, "sample": [723.8206787109375, 353.5076904296875, 308.888916015625, 153.18637084960938, 286.703369140625, 198.0361785888672, 335.38519287109375, 181.2303009033203, -155.26597595214844, 1009.8289794921875, 681.4979858398438, -174.7244873046875, 738.908447265625, 57.400596618652344, 82.0948715209961, 762.1414184570312, 31.821073532104492, 58.7269287109375, 192.1135711669922, 363.812744140625, 961.7178955078125, 1171.357421875, 1312.5379638671875, 318.7252197265625, 575.4410400390625, 7.855136871337891, 259.54986572265625, 182.73822021484375, 342.77386474609375, -183.567138671875, 127.49649047851562, 909.5437622070312, 515.3077392578125, 459.94921875, 322.17669677734375, -57.29620361328125, -91.55094909667969, -650.2671508789062, 276.0416259765625, 1435.70263671875, -115.49809265136719, 141.40740966796875, 51.8946533203125, -717.2280883789062, 584.3521118164062, 6.19268798828125, -535.4222412109375, -53.00330352783203, 1152.63818359375, 869.718505859375, -588.957275390625, 4.265205383300781, 1158.4244384765625, 1307.6678466796875, 440.1044921875, 179.24313354492188, -393.8203125, 661.4276123046875, -120.9603042602539, -147.72532653808594, -369.667236328125, 1211.44140625, 163.931884765625, 327.5494689941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000513.npy"}
{"epoch": 0.7533039647577092, "step": 514, "batch_size": 64, "mean": 301.52301025390625, "std": 524.6412353515625, "min": -857.6136474609375, "p10": -223.9020263671875, "median": 204.31349182128906, "p90": 974.5205993652347, "max": 1984.45849609375, "pos_frac": 0.703125, "sample": [-522.1104125976562, -151.243408203125, 125.37037658691406, -92.50798034667969, 1119.808349609375, 144.75192260742188, 608.99169921875, 660.7630004882812, 1906.489990234375, 847.8512573242188, 166.4123992919922, 597.5360717773438, -150.900390625, 213.34991455078125, 306.87518310546875, -112.57188415527344, -281.1817932128906, 198.427734375, 1172.35693359375, 1084.838623046875, -224.1636199951172, -102.22657012939453, 288.7284240722656, -107.55086517333984, 36.140907287597656, 310.8824157714844, 401.407958984375, 137.52841186523438, 507.8663330078125, -156.2801513671875, 835.2622680664062, 556.4091186523438, -138.29473876953125, -64.75390625, 153.83639526367188, 379.718505859375, 1232.8114013671875, -174.52154541015625, 102.87084197998047, -260.8903503417969, 383.44573974609375, -422.6546630859375, -203.17425537109375, 530.477783203125, 219.20828247070312, 122.26194763183594, -857.6136474609375, -223.29164123535156, -342.2235107421875, 880.85693359375, 210.19924926757812, 498.87176513671875, 462.48095703125, 642.1630859375, 1014.6621704101562, 153.13748168945312, 302.1121826171875, 690.0405883789062, 712.771728515625, 114.30314636230469, 706.0552368164062, 26.695484161376953, 134.13992309570312, 1984.45849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000514.npy"}
{"epoch": 0.7547723935389133, "step": 515, "batch_size": 64, "mean": 231.98915100097656, "std": 441.6803894042969, "min": -1571.38818359375, "p10": -235.5767852783203, "median": 223.2297821044922, "p90": 731.3203308105473, "max": 1356.677001953125, "pos_frac": 0.796875, "sample": [-63.49677658081055, -120.23316955566406, 483.9380187988281, 768.725830078125, 550.5039672851562, -1571.38818359375, 481.2971496582031, 444.07501220703125, 153.5802001953125, -490.3074951171875, -306.60784912109375, 9.861373901367188, 12.334869384765625, 184.5662841796875, 278.0335388183594, 323.24969482421875, 188.8906707763672, 614.6982421875, -353.45458984375, 43.57745361328125, 366.8691711425781, -123.90702819824219, 138.5663299560547, 345.94281005859375, 218.73411560058594, 227.72544860839844, 628.7711791992188, -180.242919921875, 566.3096313476562, 67.9645767211914, 817.826416015625, 29.79001808166504, -232.96197509765625, 597.2900390625, 275.60107421875, 356.5865478515625, 644.0408325195312, 11.769989013671875, 1237.4879150390625, -236.69741821289062, 158.13827514648438, 965.3496704101562, -366.01953125, 11.450965881347656, -376.3232727050781, 207.84072875976562, 106.63687896728516, 812.4204711914062, 0.12318801879882812, 110.76483154296875, 15.789695739746094, 1073.3974609375, 256.8753967285156, 136.556396484375, 265.3816833496094, 566.396240234375, 441.2460632324219, -129.43417358398438, 355.09759521484375, 392.9769287109375, 362.9834289550781, 263.7255554199219, 1356.677001953125, 469.94268798828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000515.npy"}
{"epoch": 0.7562408223201175, "step": 516, "batch_size": 64, "mean": 335.4332275390625, "std": 418.75091552734375, "min": -721.6815185546875, "p10": -212.33054809570308, "median": 357.7476501464844, "p90": 880.9788208007815, "max": 1146.54052734375, "pos_frac": 0.78125, "sample": [233.85232543945312, 238.38296508789062, 718.700927734375, -119.48767852783203, 578.2188720703125, 648.5484008789062, 459.4759521484375, -125.58985900878906, 221.5943145751953, 429.0904541015625, 1146.54052734375, 485.03912353515625, 509.2816162109375, 525.3693237304688, 200.5188446044922, -158.61563110351562, 663.3034057617188, -68.91505432128906, 88.87567138671875, 1102.9813232421875, -721.6815185546875, 99.95901489257812, -235.35122680664062, -494.44049072265625, 1004.5218505859375, 438.13037109375, -535.3544311523438, 813.5180053710938, 1104.090087890625, 166.09837341308594, 367.3932800292969, 815.6887817382812, 1001.4387817382812, 282.65679931640625, 559.1362915039062, 609.2845458984375, 262.833740234375, 53.84743881225586, 477.72833251953125, 323.106689453125, 156.0686492919922, -31.830080032348633, 908.9602661132812, 321.2622375488281, 18.912601470947266, 1091.1083984375, 186.76783752441406, 140.77734375, -22.432228088378906, 443.5874328613281, 348.1020202636719, 523.6135864257812, 88.2637939453125, 589.305419921875, 675.6096801757812, -252.87826538085938, -260.112548828125, -374.1029052734375, 527.9335327148438, 632.951416015625, 497.60235595703125, 778.375, 368.53582763671875, -58.42683792114258], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000516.npy"}
{"epoch": 0.7577092511013216, "step": 517, "batch_size": 64, "mean": 412.10162353515625, "std": 555.355712890625, "min": -595.7506713867188, "p10": -184.89706115722655, "median": 371.75999450683594, "p90": 922.3923461914062, "max": 2517.56884765625, "pos_frac": 0.765625, "sample": [636.0557250976562, 749.4773559570312, 45.85028076171875, 385.8820495605469, 833.6376342773438, 186.31704711914062, -325.65264892578125, 888.4232177734375, 532.2913208007812, -281.6479797363281, 474.4938049316406, 1993.5194091796875, 1358.758056640625, 402.81597900390625, 603.450439453125, -138.94613647460938, 246.96969604492188, 531.7872314453125, 101.82946014404297, 680.6410522460938, 1511.59326171875, 619.180908203125, -24.55281639099121, 301.3807067871094, -595.7506713867188, -275.759765625, 118.82782745361328, 444.2981262207031, -354.1282653808594, 2517.56884765625, 347.4473876953125, -57.86277770996094, -170.90577697753906, 919.8670654296875, 131.1295166015625, 575.9586791992188, -498.589599609375, 462.2666931152344, -35.6995849609375, 369.71734619140625, 756.201416015625, -102.38008880615234, 308.7360534667969, 247.37399291992188, 598.3108520507812, 558.9525756835938, 764.4127197265625, 386.7106018066406, 33.03015899658203, 1420.6434326171875, 127.5191421508789, 1188.6290283203125, 373.8026428222656, 923.474609375, 179.42822265625, 709.9483642578125, -190.89332580566406, 213.01583862304688, 279.6993713378906, 487.3873291015625, -118.17828369140625, 823.3192138671875, 257.1324768066406, -63.713783264160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000517.npy"}
{"epoch": 0.7591776798825257, "step": 518, "batch_size": 64, "mean": 339.1503601074219, "std": 431.4859924316406, "min": -826.397216796875, "p10": -186.44239807128903, "median": 312.0154724121094, "p90": 916.993621826172, "max": 1262.85986328125, "pos_frac": 0.796875, "sample": [117.299072265625, 1136.068115234375, 529.4142456054688, 369.73443603515625, 200.11082458496094, 400.321533203125, 435.32293701171875, 546.865234375, 72.02632141113281, 238.93185424804688, 1002.7764282226562, 649.375732421875, 80.2562255859375, -154.55039978027344, 478.89483642578125, 218.11280822753906, 425.1227722167969, 176.4381103515625, 127.60272216796875, 568.7266235351562, 930.2774658203125, -29.750221252441406, -285.0699768066406, 153.78408813476562, -31.960636138916016, 327.4465637207031, 826.8916625976562, 67.11480712890625, 722.7823486328125, 863.4212036132812, 848.3618774414062, 49.90187072753906, 1262.85986328125, 1193.939453125, 827.8616943359375, 715.6619873046875, 153.27239990234375, -106.66946411132812, 411.5249328613281, 624.2939453125, 309.02569580078125, -100.61978149414062, 552.6481323242188, -259.8567810058594, 426.85577392578125, -395.107177734375, -402.17315673828125, -826.397216796875, 381.4478454589844, 499.4953308105469, 17.666976928710938, 253.99093627929688, -200.1103973388672, 198.03189086914062, -393.7001953125, 1089.3521728515625, 1022.478271484375, 298.1576232910156, -48.06235122680664, 885.9979858398438, 315.0052490234375, 139.64735412597656, 554.0623779296875, 242.99000549316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000518.npy"}
{"epoch": 0.7606461086637298, "step": 519, "batch_size": 64, "mean": 235.43357849121094, "std": 457.8655700683594, "min": -988.13427734375, "p10": -309.4398651123047, "median": 245.02144622802734, "p90": 857.5864074707032, "max": 1264.4114990234375, "pos_frac": 0.6875, "sample": [-5.490447998046875, 558.1163940429688, 367.4563293457031, -508.0257873535156, 944.3120727539062, -301.288330078125, 59.07951354980469, 527.5269775390625, 1010.6661987304688, 599.6802368164062, 127.93232727050781, 586.6347045898438, -222.47076416015625, 925.0355834960938, 383.4254455566406, -918.4188232421875, 1202.98828125, 534.375244140625, 23.18645477294922, -133.66993713378906, 287.9588623046875, -98.65742492675781, 47.45008850097656, 373.8731994628906, 533.3695678710938, 162.91000366210938, 280.59429931640625, 641.1262817382812, 532.1417846679688, 64.6539535522461, -30.61529541015625, 681.7816162109375, -206.78073120117188, -84.5933837890625, 372.5337219238281, 79.75116729736328, 509.08935546875, -33.367530822753906, -375.3443603515625, -31.573989868164062, 439.64971923828125, -312.9333801269531, 981.4496459960938, 411.88287353515625, -988.13427734375, 557.1973266601562, 1264.4114990234375, 32.86798858642578, -51.66143798828125, 18.335617065429688, 166.2549285888672, 867.2186889648438, -377.5218200683594, 612.1115112304688, -272.72650146484375, 258.52484130859375, 835.111083984375, 18.008953094482422, 359.9089050292969, 516.1257934570312, 231.51805114746094, -50.9716911315918, 439.3941345214844, -355.6260986328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000519.npy"}
{"epoch": 0.762114537444934, "step": 520, "batch_size": 64, "mean": 316.26123046875, "std": 486.62811279296875, "min": -1019.6341552734375, "p10": -191.46908416748045, "median": 230.03292083740234, "p90": 936.8826049804688, "max": 2142.03857421875, "pos_frac": 0.75, "sample": [-118.70401763916016, -267.2269287109375, 1035.2027587890625, 154.20677185058594, 540.2667846679688, 182.82659912109375, 356.3585510253906, -73.53219604492188, -237.7022705078125, 150.80670166015625, 39.84767150878906, -57.623809814453125, 646.2186279296875, -83.79023742675781, -85.0959243774414, 605.692626953125, 1314.0821533203125, 2142.03857421875, -202.22836303710938, 365.9839782714844, 1087.71142578125, 1294.439697265625, -386.2972106933594, 217.6881103515625, -44.593902587890625, 765.3853759765625, 122.76570892333984, 574.7789306640625, 528.371826171875, 256.91778564453125, 249.02899169921875, 242.3777313232422, 1214.9031982421875, 150.7689971923828, 63.837188720703125, 497.91644287109375, 565.1156616210938, 435.5307312011719, 771.1571044921875, -1019.6341552734375, 534.7296142578125, 151.03018188476562, 184.2771759033203, 364.48553466796875, 62.36541748046875, 924.2904663085938, 123.46996307373047, 773.1827392578125, 453.549560546875, 142.7655029296875, 184.52781677246094, -202.04019165039062, -179.56967163085938, 262.29498291015625, 402.2956237792969, 578.316162109375, -108.717529296875, 942.2792358398438, -196.56883239746094, 528.9802856445312, 304.32391357421875, -152.1530303955078, 38.931976318359375, 127.87396240234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000520.npy"}
{"epoch": 0.7635829662261381, "step": 521, "batch_size": 64, "mean": 342.0847473144531, "std": 460.04351806640625, "min": -789.15478515625, "p10": -123.61929397583008, "median": 255.62297821044922, "p90": 1011.131658935547, "max": 1622.242431640625, "pos_frac": 0.75, "sample": [404.58868408203125, 799.79296875, 266.74761962890625, 26.07103729248047, -27.52518653869629, -789.15478515625, -175.5614013671875, -19.39215087890625, -125.99850463867188, 539.265625, 50.02099609375, -178.35955810546875, 23.99508285522461, 481.88421630859375, 510.2540588378906, 1208.3995361328125, 95.28955078125, 57.92654800415039, -280.5467529296875, 991.074462890625, 263.97308349609375, -23.274276733398438, -118.06780242919922, 247.2728729248047, 488.8154296875, 676.716552734375, 603.7448120117188, 141.756591796875, 72.91641998291016, -21.581298828125, 409.2919921875, 580.12939453125, -4.108314514160156, 1047.3192138671875, 163.99697875976562, 632.6514892578125, 606.845947265625, 229.62759399414062, 522.41162109375, -91.9860610961914, 239.947998046875, 321.92755126953125, 298.10284423828125, 70.96818542480469, 238.8763427734375, 1019.7276000976562, -136.67481994628906, 1110.186279296875, -7.152923583984375, -560.2310791015625, 571.31005859375, 431.1224365234375, 690.7200317382812, -2.4438018798828125, 794.2913818359375, 1077.0335693359375, 127.33891296386719, 1510.64404296875, 10.330314636230469, 1622.242431640625, 476.3919982910156, 81.41459655761719, 821.2618408203125, 798.8636474609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000521.npy"}
{"epoch": 0.7650513950073421, "step": 522, "batch_size": 64, "mean": 309.32373046875, "std": 491.3858337402344, "min": -771.6734619140625, "p10": -158.60409240722655, "median": 220.54083251953125, "p90": 1076.2465698242188, "max": 1369.217041015625, "pos_frac": 0.796875, "sample": [251.30906677246094, 743.9556884765625, 879.8294067382812, -14.374626159667969, 1093.36181640625, 100.99261474609375, 1369.217041015625, 232.77053833007812, 69.33601379394531, 5.283855438232422, -135.0046844482422, 406.1280822753906, 733.64306640625, 40.29460144042969, 480.8914794921875, -66.17935180664062, 318.8146057128906, -103.60398864746094, 462.7606201171875, 213.917236328125, 615.7263793945312, 640.5106811523438, 125.04895782470703, 308.0587158203125, -617.507080078125, 1119.73095703125, 343.87738037109375, 162.69155883789062, 540.9006958007812, 581.7362670898438, 342.9686279296875, 191.1348114013672, 138.5188751220703, 49.647705078125, 154.07708740234375, -771.6734619140625, 600.2042846679688, 111.95800018310547, 212.546142578125, -363.556884765625, -168.71812438964844, 417.609375, 1061.0924072265625, 96.1332778930664, -426.99462890625, 504.1226806640625, 859.391845703125, 237.54791259765625, -768.670166015625, 1266.3614501953125, 1082.7412109375, -19.818756103515625, 1331.4410400390625, 81.53070831298828, 160.81646728515625, -613.3622436523438, 12.871437072753906, -121.96163177490234, 591.227783203125, 227.1644287109375, 1011.3783569335938, 91.55789184570312, 1218.715576171875, 94.59684753417969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000522.npy"}
{"epoch": 0.7665198237885462, "step": 523, "batch_size": 64, "mean": 330.5179443359375, "std": 560.617431640625, "min": -985.0519409179688, "p10": -336.9925231933593, "median": 314.3672637939453, "p90": 1074.7602294921878, "max": 1756.2271728515625, "pos_frac": 0.6875, "sample": [-190.3776092529297, 1210.4444580078125, 130.7215118408203, 342.12957763671875, 391.63311767578125, -350.24945068359375, 425.53216552734375, 598.499755859375, 994.0146484375, 382.3955078125, -754.6654663085938, 761.6099853515625, -4.423826217651367, -43.566619873046875, -434.1619567871094, 116.70274353027344, 233.98681640625, 798.5748291015625, -526.56884765625, 202.1478271484375, 485.0311279296875, 921.9666748046875, -138.04498291015625, 29.318864822387695, 528.1423950195312, 112.95341491699219, 262.0371398925781, 97.13839721679688, -37.73078155517578, 1563.8389892578125, 540.7808227539062, 127.14558410644531, -116.00173950195312, -985.0519409179688, 1153.5150146484375, 822.7196655273438, -490.3374938964844, -57.09223937988281, 500.73260498046875, -102.12944030761719, 750.818115234375, -92.73681640625, 1756.2271728515625, 348.9589538574219, 726.5955810546875, -57.562835693359375, 341.2574157714844, 287.47711181640625, 1671.7506103515625, 427.3834228515625, 640.08837890625, 138.04690551757812, 550.8245239257812, 115.04332733154297, 762.8152465820312, 1184.4189453125, 844.4937744140625, -81.79620361328125, 640.2294921875, 1109.365478515625, 578.7569580078125, -650.6600341796875, -306.0596923828125, -35.898983001708984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000523.npy"}
{"epoch": 0.7679882525697503, "step": 524, "batch_size": 64, "mean": 345.46832275390625, "std": 514.7507934570312, "min": -1609.3436279296875, "p10": -87.75897903442383, "median": 424.17149353027344, "p90": 905.89140625, "max": 1656.380126953125, "pos_frac": 0.734375, "sample": [32.16096496582031, 956.909912109375, 405.73516845703125, 565.5655517578125, 484.9715270996094, 300.4378662109375, 336.39300537109375, 22.466773986816406, 586.3765258789062, 320.97369384765625, 457.4363098144531, 44.097381591796875, 480.6003723144531, 534.8238525390625, 724.770263671875, 393.9808044433594, -10.7764892578125, 915.2941284179688, 435.8193359375, 606.2221069335938, -1609.3436279296875, 489.502197265625, 535.9906616210938, 1656.380126953125, -4.9061431884765625, -51.49658203125, 788.1688842773438, 432.8133239746094, 478.5745849609375, -29.170902252197266, -7.310214996337891, 412.1790771484375, 552.840087890625, -236.22862243652344, 737.2115478515625, 246.42623901367188, 1252.048828125, 1488.3790283203125, 637.3856201171875, 493.2470703125, -60.71746063232422, -869.51806640625, 0.932159423828125, -69.98601531982422, 726.4758911132812, -87.60385131835938, 379.840087890625, 6.506675720214844, 456.903076171875, 472.4687194824219, 1106.63525390625, 1012.4357299804688, 578.8001098632812, -839.058349609375, -87.8254623413086, -11.632675170898438, 883.9517211914062, 597.953857421875, -47.121055603027344, 375.2866516113281, 600.9302978515625, 415.5296630859375, -195.10382080078125, -93.06060791015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000524.npy"}
{"epoch": 0.7694566813509545, "step": 525, "batch_size": 64, "mean": 441.43646240234375, "std": 546.4650268554688, "min": -812.2141723632812, "p10": -98.51968994140624, "median": 333.86631774902344, "p90": 1304.1084472656253, "max": 1634.548095703125, "pos_frac": 0.828125, "sample": [403.5028076171875, 1244.21435546875, 387.4947509765625, 156.60940551757812, 66.14714813232422, 470.75482177734375, 1551.7322998046875, 799.2320556640625, 398.4622497558594, -262.4564514160156, 1329.77734375, -439.35125732421875, 970.4617309570312, 247.34584045410156, 241.04190063476562, -247.90122985839844, 149.0398406982422, 281.4945983886719, 998.953125, 719.6549682617188, 575.3112182617188, 66.7669677734375, -78.99151611328125, 104.92861938476562, -158.90875244140625, 329.36846923828125, 573.1556396484375, 587.60205078125, 464.4515075683594, 1153.2091064453125, 1099.5928955078125, 11.344688415527344, 1491.0582275390625, 2.0375900268554688, 1634.548095703125, 342.69915771484375, 144.1065673828125, 214.02450561523438, 376.5653076171875, -89.42868041992188, 1627.045166015625, -812.2141723632812, -101.28985595703125, -9.138246536254883, 338.3641662597656, 288.64154052734375, 559.7923583984375, 133.26449584960938, 594.8170166015625, 168.67642211914062, 691.7243041992188, 1395.33544921875, -92.05596923828125, 21.31201171875, 779.1480102539062, 288.41058349609375, 313.5992736816406, 61.19579315185547, -478.8175048828125, 1026.0020751953125, 204.529052734375, 1572.608154296875, 402.6863708496094, 968.6463012695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000525.npy"}
{"epoch": 0.7709251101321586, "step": 526, "batch_size": 64, "mean": 435.17706298828125, "std": 566.6396484375, "min": -1001.7420654296875, "p10": -207.5873580932617, "median": 496.70684814453125, "p90": 1238.09208984375, "max": 1602.8929443359375, "pos_frac": 0.765625, "sample": [1254.618408203125, 894.3623657226562, 73.18122863769531, 175.9891357421875, 339.27740478515625, 314.7445068359375, -174.55433654785156, 4.437433242797852, 546.0518798828125, -758.4757080078125, 571.9600219726562, 756.6817016601562, 744.51220703125, 253.28500366210938, 1358.06201171875, 1081.2120361328125, -131.13983154296875, 751.6234130859375, 311.6122131347656, 616.9177856445312, 344.7394714355469, 599.9947509765625, 271.9361877441406, 1057.1865234375, 534.538330078125, 512.3097534179688, 1602.8929443359375, 34.95210266113281, 371.3934326171875, -228.7050018310547, -214.5402069091797, 607.580078125, -452.6441345214844, 1321.572021484375, -174.86468505859375, 335.2105407714844, 1002.7852783203125, 883.6043090820312, 216.3837890625, -1001.7420654296875, 848.4780883789062, 607.006103515625, 522.96728515625, 1263.044189453125, -487.8603515625, 739.4434814453125, 769.8825073242188, 1119.654052734375, -71.01399993896484, 509.94580078125, -39.93878173828125, 221.9341583251953, 334.2382507324219, 1224.04833984375, 1036.02197265625, -191.36404418945312, 1244.11083984375, 671.4806518554688, 483.4678955078125, -27.689193725585938, -126.76842498779297, 49.12940216064453, -778.025634765625, 1320.197265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000526.npy"}
{"epoch": 0.7723935389133627, "step": 527, "batch_size": 64, "mean": 295.2120361328125, "std": 439.24688720703125, "min": -645.70947265625, "p10": -250.59631347656247, "median": 265.51319885253906, "p90": 804.2431762695313, "max": 1265.3848876953125, "pos_frac": 0.75, "sample": [-510.78424072265625, 555.091796875, 369.0017395019531, 389.4927062988281, 695.8265380859375, 153.20156860351562, 1265.3848876953125, -63.55870056152344, -262.5423889160156, 759.94482421875, -157.2846221923828, -222.72213745117188, 676.1456298828125, 294.5322570800781, -645.70947265625, -601.4216918945312, 525.4517211914062, -142.12423706054688, 706.8681030273438, 432.31402587890625, -286.04156494140625, 1041.237548828125, 106.49456787109375, 97.03729248046875, 163.32272338867188, 793.37548828125, -13.668960571289062, 123.27021026611328, 1038.0943603515625, 697.452880859375, 587.883544921875, 1069.546630859375, 445.55975341796875, 692.82177734375, 1172.4637451171875, 1066.8443603515625, 808.9007568359375, -21.86774253845215, 52.98075866699219, 274.2480163574219, 610.3076782226562, 165.80999755859375, 309.98052978515625, 247.2788848876953, -131.00106811523438, 64.46328735351562, 256.77838134765625, 336.0519714355469, 154.15087890625, -547.4170532226562, 290.91204833984375, 171.53579711914062, 610.339599609375, 230.0828857421875, -9.157196044921875, 75.38249206542969, 697.4440307617188, 648.2349853515625, 509.1297302246094, 357.68670654296875, 173.21719360351562, -206.79568481445312, 63.41942596435547, -311.32891845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000527.npy"}
{"epoch": 0.7738619676945668, "step": 528, "batch_size": 64, "mean": 363.74908447265625, "std": 583.2496948242188, "min": -692.963134765625, "p10": -358.2716186523437, "median": 302.126953125, "p90": 1184.62763671875, "max": 1909.074462890625, "pos_frac": 0.71875, "sample": [-48.463356018066406, 1188.419677734375, 339.2878723144531, 331.4342956542969, 204.73382568359375, -428.63580322265625, -7.296882629394531, 709.6046142578125, 602.985107421875, 1277.67822265625, 439.66259765625, -126.39732360839844, 384.235595703125, -372.73248291015625, 145.6415252685547, 544.9058227539062, 316.3570556640625, -130.24392700195312, 1175.779541015625, 1909.074462890625, -6.419288635253906, 913.0875244140625, 469.02520751953125, 721.4736938476562, 456.1871643066406, 920.5894165039062, -624.8665161132812, 1383.6346435546875, -275.97119140625, 1080.4620361328125, -584.1618041992188, 287.8968505859375, 421.7347412109375, 244.75961303710938, 361.23388671875, 182.25221252441406, 528.8449096679688, 188.75648498535156, 491.89288330078125, 36.265541076660156, -109.3074722290039, 901.6594848632812, -163.00436401367188, -692.963134765625, 1505.008544921875, 221.01321411132812, 596.564208984375, 1722.6240234375, 187.7316436767578, 2.9127197265625, -585.0314331054688, -631.6929931640625, 771.3342895507812, 1225.3983154296875, 135.20974731445312, 886.6732788085938, 638.5987548828125, 212.47593688964844, -324.52960205078125, -15.64080810546875, 85.61468505859375, -101.02864074707031, 1059.166259765625, 98.44490814208984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000528.npy"}
{"epoch": 0.775330396475771, "step": 529, "batch_size": 64, "mean": 240.1451873779297, "std": 581.5812377929688, "min": -1487.697509765625, "p10": -428.16297912597656, "median": 208.70394897460938, "p90": 984.9620971679689, "max": 1564.1142578125, "pos_frac": 0.671875, "sample": [155.84278869628906, -172.4997100830078, 1564.1142578125, -80.88397216796875, 166.50888061523438, 658.3615112304688, 417.2757873535156, 643.1240844726562, 423.4666442871094, 1517.341064453125, -353.0295104980469, 541.9236450195312, 97.2258529663086, -435.5675354003906, 553.64208984375, -138.74131774902344, 136.53460693359375, 21.863792419433594, 775.796142578125, -109.1259536743164, -357.0201721191406, 321.22320556640625, 1119.077392578125, 968.9318237304688, 991.8322143554688, -679.31298828125, -103.88822174072266, 334.3382263183594, -557.6708374023438, 703.5693359375, 171.20718383789062, 467.1395568847656, -124.2309341430664, 368.9197998046875, -57.128204345703125, 33.90751647949219, 716.6256103515625, 38.2267951965332, 632.7234497070312, 684.8185424804688, -1487.697509765625, 104.98546600341797, 1035.3106689453125, -548.2931518554688, 590.0117797851562, -1078.331298828125, 106.98592376708984, 621.80810546875, 1018.0313110351562, 98.24271392822266, -770.4498901367188, 428.9959716796875, -50.928260803222656, 564.3158569335938, -410.88568115234375, 486.1827087402344, 246.20071411132812, 890.0554809570312, 581.1947021484375, 1274.545166015625, -305.4620666503906, -112.41423034667969, -270.9192810058594, 301.3453063964844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000529.npy"}
{"epoch": 0.7767988252569751, "step": 530, "batch_size": 64, "mean": 395.99542236328125, "std": 694.19873046875, "min": -2165.776611328125, "p10": -183.7158218383789, "median": 268.0205383300781, "p90": 1427.7277709960938, "max": 2190.6923828125, "pos_frac": 0.71875, "sample": [402.4126281738281, -356.7978210449219, -12.457176208496094, 144.02175903320312, 279.2657470703125, -172.35574340820312, 1792.5850830078125, 569.8798217773438, 340.6494445800781, -585.0272216796875, 123.80860137939453, -88.59770965576172, -39.139404296875, -117.43081665039062, 759.1122436523438, 764.7054443359375, -36.19648742675781, 39.1942138671875, -365.8916015625, 1398.06005859375, 1812.1123046875, 604.2200317382812, 989.6546020507812, 731.5516967773438, -109.57398986816406, 784.6295776367188, -129.8170166015625, 175.42929077148438, -188.5844268798828, 646.4635620117188, -6.497663497924805, 1641.5341796875, 1179.1170654296875, 496.0372314453125, 371.0441589355469, -8.407184600830078, 872.2454833984375, 152.79794311523438, -357.38336181640625, -432.0257568359375, 99.89714050292969, 1722.9930419921875, 701.1446533203125, 2190.6923828125, 40.6507568359375, 387.8943786621094, 392.7510681152344, 256.77532958984375, 1539.919677734375, 186.56448364257812, 373.179443359375, 21.88385581970215, 858.7728881835938, -2165.776611328125, 1332.250244140625, 76.89735412597656, -25.173492431640625, 1440.4425048828125, 535.229736328125, 290.6329345703125, 79.64891052246094, 118.91033935546875, 647.7361450195312, 175.44210815429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000530.npy"}
{"epoch": 0.7782672540381792, "step": 531, "batch_size": 64, "mean": 339.31494140625, "std": 558.1561279296875, "min": -1148.2510986328125, "p10": -243.23834991455078, "median": 275.8150634765625, "p90": 1010.8175292968754, "max": 1853.74951171875, "pos_frac": 0.765625, "sample": [572.1248779296875, 122.9298324584961, 700.8858642578125, 1053.1773681640625, 771.4121704101562, -483.8731689453125, 199.60693359375, 190.54434204101562, 1766.05078125, -165.62545776367188, -238.37452697753906, 491.5682067871094, 9.672430038452148, 449.4646301269531, 278.73779296875, 191.6575469970703, 406.43927001953125, 284.85809326171875, -1148.2510986328125, 357.67852783203125, 898.8604125976562, 749.7011108398438, 352.95037841796875, 81.85018920898438, 787.0548095703125, 158.68148803710938, 464.7010803222656, 283.17913818359375, 478.4344177246094, -60.330833435058594, 1174.5074462890625, -89.46624755859375, 622.4114990234375, 632.5333251953125, 73.86485290527344, 261.2303466796875, -30.05658721923828, -589.4346923828125, -16.321502685546875, 137.02532958984375, 1853.331787109375, 391.6318359375, 55.548179626464844, -93.19108581542969, 1853.74951171875, -367.49761962890625, 548.067138671875, 1381.38916015625, 272.892333984375, 911.9779052734375, 1173.875732421875, -48.002845764160156, 599.8096923828125, -245.32284545898438, 262.5488586425781, 145.79388427734375, 477.41259765625, 172.5323028564453, 405.7799987792969, 82.60823059082031, 233.23196411132812, 554.1023559570312, -822.2844848632812, -265.88916015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000531.npy"}
{"epoch": 0.7797356828193832, "step": 532, "batch_size": 64, "mean": 362.3275146484375, "std": 672.24951171875, "min": -1388.0382080078125, "p10": -498.3867187499999, "median": 283.77268981933594, "p90": 1258.3185180664068, "max": 1910.8194580078125, "pos_frac": 0.75, "sample": [1107.4822998046875, 445.8349609375, 1599.7427978515625, -280.1801452636719, 1407.5301513671875, 230.30511474609375, 861.87353515625, 865.386962890625, -1388.0382080078125, 299.3670349121094, -315.9056396484375, 1394.041748046875, 512.2537841796875, 745.2840576171875, -548.873291015625, 790.67333984375, 564.3277587890625, -1045.9527587890625, 256.5142822265625, 326.7358093261719, 408.4416198730469, 1712.1044921875, 1910.8194580078125, 879.1585693359375, 585.2832641601562, 713.100830078125, -140.48866271972656, 1146.0904541015625, 1306.416259765625, 180.95989990234375, -87.48529815673828, 343.6285705566406, -152.48324584960938, 1027.95703125, 750.113525390625, 175.41317749023438, 250.02261352539062, 1026.0975341796875, -101.04312896728516, 260.7225036621094, 18.96514129638672, 802.1430053710938, 181.76873779296875, 597.1978759765625, 167.13990783691406, -74.0339126586914, 12.484321594238281, 772.8711547851562, 218.59725952148438, 29.31554412841797, -569.4894409179688, -596.3075561523438, 268.1783447265625, 1722.2930908203125, 266.8360595703125, -92.46002197265625, 310.4883117675781, -1059.2840576171875, 115.50018310546875, 204.28573608398438, -380.584716796875, 617.4561767578125, -698.7664184570312, 331.1343688964844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000532.npy"}
{"epoch": 0.7812041116005873, "step": 533, "batch_size": 64, "mean": 265.591552734375, "std": 579.4927978515625, "min": -1519.04052734375, "p10": -303.9934539794922, "median": 224.12490844726562, "p90": 1002.5224914550782, "max": 2233.74755859375, "pos_frac": 0.6875, "sample": [535.66455078125, -412.07080078125, 745.656005859375, -495.9911804199219, 446.49456787109375, 197.826171875, 1248.322998046875, 613.4439697265625, 278.0306396484375, 322.0848388671875, 706.60205078125, 619.97607421875, 596.561279296875, 760.285888671875, -195.3671875, 418.1038513183594, 1012.8751831054688, 167.06625366210938, 250.3900146484375, -803.84033203125, 1318.161865234375, 224.8280029296875, 12.25677490234375, 573.6800537109375, 44.53520584106445, 595.5488891601562, -44.628211975097656, -59.29957580566406, 223.42181396484375, 2233.74755859375, 458.75933837890625, 417.1475524902344, -93.06600952148438, -14.260704040527344, -8.04831314086914, -1519.04052734375, -342.4850769042969, 357.6632995605469, -304.4922790527344, 398.5833435058594, 124.63047790527344, 166.22633361816406, 1257.580078125, 1293.380126953125, -198.93612670898438, 5.8476409912109375, -284.22503662109375, -4.3527679443359375, 353.0409240722656, 278.7900085449219, 321.6878356933594, -105.16331481933594, 978.3662109375, 84.38121795654297, -302.82952880859375, -293.325927734375, -809.5487060546875, 158.91217041015625, 170.902099609375, 1163.9808349609375, -177.26275634765625, 4.063446044921875, 787.0365600585938, 539.5496215820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000533.npy"}
{"epoch": 0.7826725403817915, "step": 534, "batch_size": 64, "mean": 354.456787109375, "std": 543.5641479492188, "min": -857.911865234375, "p10": -252.00228881835938, "median": 278.47438049316406, "p90": 1007.9321655273437, "max": 1898.53173828125, "pos_frac": 0.78125, "sample": [386.64337158203125, 1007.998046875, -41.416748046875, 1898.53173828125, 58.31851577758789, 830.8383178710938, -557.5572509765625, 269.7289733886719, 328.64776611328125, -551.0974731445312, 123.27362823486328, 1181.22900390625, -208.43063354492188, 110.78530883789062, 1215.2950439453125, 933.1981811523438, 792.811767578125, -254.41195678710938, 362.4057312011719, 849.9826049804688, 270.59283447265625, -170.71359252929688, 580.8291015625, 471.4840087890625, 286.3559265136719, 369.2567138671875, 1053.9654541015625, -92.52676391601562, 368.4805908203125, 166.76693725585938, 200.11874389648438, 899.8743896484375, 256.68206787109375, 127.89730072021484, 650.9400634765625, -582.6083984375, -113.23324584960938, 1574.9891357421875, 803.8501586914062, 1007.7784423828125, 156.31582641601562, 178.18882751464844, -111.89495849609375, 149.31875610351562, 815.6685180664062, 189.57713317871094, 85.19258117675781, 926.6822509765625, 1476.9031982421875, 893.5477294921875, 653.5062866210938, 330.9313659667969, 644.89697265625, 47.31523513793945, 403.7720642089844, 119.8408203125, 289.7669982910156, 235.58453369140625, 293.4277038574219, 84.42169189453125, -246.37973022460938, -857.911865234375, -279.82916259765625, -661.162841796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000534.npy"}
{"epoch": 0.7841409691629956, "step": 535, "batch_size": 64, "mean": 412.41912841796875, "std": 558.0743408203125, "min": -888.400390625, "p10": -218.32346496582028, "median": 388.7114562988281, "p90": 1057.8584228515626, "max": 2353.594482421875, "pos_frac": 0.796875, "sample": [-381.83001708984375, 2353.594482421875, -355.75689697265625, 496.0716552734375, -228.93812561035156, 224.1131591796875, 98.31805419921875, 944.2677001953125, 122.02099609375, 660.6766967773438, 244.06129455566406, 98.33206939697266, 248.54957580566406, 971.2576904296875, 451.0444030761719, 425.651611328125, 126.2780990600586, 1411.845458984375, 311.8752136230469, 350.5689697265625, 985.01025390625, 560.75390625, 571.4996337890625, 470.46295166015625, 9.991958618164062, 1477.064208984375, 1145.56689453125, 509.3882751464844, -65.94277954101562, 333.6531677246094, 1527.8011474609375, 775.4349975585938, 1060.3433837890625, -377.7213439941406, -362.4046936035156, -888.400390625, 497.0347595214844, 730.9007568359375, 141.44195556640625, 451.3133544921875, 379.79150390625, 1052.0601806640625, 841.0843505859375, 208.71359252929688, 504.1744384765625, -62.29899597167969, -17.08679962158203, 771.2144775390625, -193.55592346191406, -579.96435546875, 604.2424926757812, 50.50646209716797, 501.88409423828125, 78.8399429321289, -139.56163024902344, -90.9290771484375, 948.1348876953125, 174.19070434570312, 1484.4622802734375, 317.2412109375, 29.53411293029785, 539.085205078125, 397.63140869140625, 470.2344055175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000535.npy"}
{"epoch": 0.7856093979441997, "step": 536, "batch_size": 64, "mean": 493.9730529785156, "std": 694.5618896484375, "min": -1156.2452392578125, "p10": -81.2103809356689, "median": 294.50360107421875, "p90": 1226.6305786132812, "max": 4038.584228515625, "pos_frac": 0.8125, "sample": [135.64271545410156, 736.1466674804688, 291.72467041015625, 996.34326171875, 1228.964111328125, 11.487716674804688, 172.7620849609375, 164.58953857421875, 69.43722534179688, -27.61236572265625, 899.8978271484375, 340.3760681152344, 1221.1856689453125, 894.9749755859375, -207.97755432128906, 534.7900390625, 1278.72216796875, 105.84945678710938, 819.255126953125, 815.7184448242188, 2100.97412109375, 545.603759765625, -141.41311645507812, 844.0142822265625, -100.05606842041016, 1273.231689453125, 26.562286376953125, 653.269287109375, 679.0934448242188, -4.001708984375, 4038.584228515625, 764.264404296875, 297.28253173828125, 199.64004516601562, 487.9483642578125, 723.3970336914062, 105.65263366699219, 1151.397216796875, -315.6992492675781, 105.6749267578125, 155.5172882080078, -1156.2452392578125, 1592.6400146484375, 944.498779296875, 644.5338745117188, -127.62156677246094, 90.18707275390625, 253.7376708984375, 303.82958984375, 145.4813232421875, 288.004638671875, -19.89759063720703, 220.66180419921875, 244.36770629882812, 246.2137451171875, 792.7206420898438, 132.14390563964844, -37.23711013793945, -302.22210693359375, -12.703033447265625, 880.2265625, 755.4982299804688, 368.03912353515625, 1294.203857421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000536.npy"}
{"epoch": 0.7870778267254038, "step": 537, "batch_size": 64, "mean": 336.547119140625, "std": 570.430419921875, "min": -1287.856689453125, "p10": -308.5430969238281, "median": 294.53167724609375, "p90": 1101.289990234375, "max": 1405.43408203125, "pos_frac": 0.78125, "sample": [-1287.856689453125, -701.3470458984375, 25.24774932861328, 116.0058364868164, -433.10491943359375, 351.5430908203125, 25.20521354675293, 934.5762939453125, 11.751190185546875, -60.938175201416016, 493.8050231933594, -8.348373413085938, 526.2413330078125, 355.032958984375, -325.07720947265625, 926.881103515625, 1198.0164794921875, -17.487642288208008, 219.10205078125, 0.33716583251953125, 7.694490432739258, 836.1318969726562, 43.837623596191406, -796.6882934570312, 961.009521484375, -269.9635009765625, 1381.5096435546875, 1210.0960693359375, 336.7694091796875, 544.0089111328125, 1105.349365234375, 409.645263671875, 1018.1609497070312, 318.27911376953125, 785.2227172851562, -16.572410583496094, 219.31748962402344, 572.0779418945312, 270.78424072265625, -513.6068115234375, 233.08102416992188, 440.9522705078125, -91.59056091308594, -923.1226806640625, 54.69559860229492, 1091.818115234375, -83.11325073242188, 160.5536346435547, 321.5455017089844, 876.5037231445312, 151.96676635742188, 1405.43408203125, 150.80276489257812, 1268.8074951171875, 518.6207885742188, 99.1599349975586, 1281.02587890625, 104.19784545898438, 989.9808959960938, 395.6846008300781, 563.8091430664062, 682.942626953125, 96.337158203125, 976.2733154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000537.npy"}
{"epoch": 0.788546255506608, "step": 538, "batch_size": 64, "mean": 382.1604919433594, "std": 521.6039428710938, "min": -598.4373168945312, "p10": -233.8818130493164, "median": 378.05963134765625, "p90": 1036.4849243164062, "max": 2054.41357421875, "pos_frac": 0.75, "sample": [130.3001251220703, 514.5109252929688, 800.4747314453125, 1201.9716796875, 556.2587890625, -154.45770263671875, 985.0003662109375, 580.02392578125, 611.4021606445312, 451.8576965332031, -97.31591033935547, 1618.9189453125, 571.411376953125, 405.996337890625, 238.09056091308594, 1033.6937255859375, -523.422119140625, -13.172309875488281, 330.37799072265625, 1072.1749267578125, 278.51104736328125, 508.3822326660156, 102.16415405273438, 758.6702880859375, 553.6114501953125, 76.81452941894531, -69.863525390625, 1435.8648681640625, -24.657577514648438, -385.0526428222656, -598.4373168945312, 804.3512573242188, 152.64173889160156, -39.17472839355469, 2054.41357421875, 642.8846435546875, 751.0023803710938, 526.7821044921875, 350.1229248046875, 743.217041015625, 216.39907836914062, -102.82794952392578, 10.90053939819336, 649.6593627929688, 191.51162719726562, 667.9871826171875, -571.311279296875, 636.02294921875, -306.7992248535156, 102.70796203613281, 582.568603515625, 429.36077880859375, 318.16778564453125, 642.1598510742188, -236.62681579589844, -29.487327575683594, 139.7486572265625, 23.415599822998047, 1116.799560546875, 1037.68115234375, -371.214111328125, -227.476806640625, 581.427978515625, 21.152395248413086], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000538.npy"}
{"epoch": 0.7900146842878121, "step": 539, "batch_size": 64, "mean": 361.0832214355469, "std": 486.40185546875, "min": -788.9386596679688, "p10": -155.9830352783203, "median": 302.7950134277344, "p90": 921.3892456054691, "max": 1713.4300537109375, "pos_frac": 0.765625, "sample": [279.0475158691406, 313.39813232421875, 56.611167907714844, 6.863828659057617, 294.38623046875, 426.57464599609375, -188.36680603027344, 45.76813507080078, -32.925262451171875, 1330.110107421875, -298.90936279296875, -23.787744522094727, 1713.4300537109375, 156.0600128173828, 962.0079345703125, 632.130859375, 782.424560546875, 704.6963500976562, 29.73388671875, 826.6123046875, 1566.512939453125, 86.69354248046875, 311.20379638671875, 767.0167236328125, 135.04380798339844, 205.89968872070312, -79.77296447753906, 467.725830078125, -115.94898986816406, 704.0105590820312, -372.6196594238281, 685.5436401367188, -788.9386596679688, 571.2697143554688, 658.210693359375, 108.99200439453125, 79.3429946899414, 535.024658203125, 184.292724609375, -161.86203002929688, -84.93898010253906, 1483.12744140625, -188.00711059570312, 697.3697509765625, -38.3035774230957, -142.265380859375, 999.3551635742188, -226.58445739746094, 97.14424896240234, 13.881385803222656, 610.3405151367188, 250.70095825195312, 826.0667724609375, 391.4115905761719, 450.4615478515625, 334.03680419921875, 816.8549194335938, 560.0382080078125, 462.9404296875, 451.2880859375, -65.45185852050781, 1311.9849853515625, 493.5511474609375, 40.81584167480469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000539.npy"}
{"epoch": 0.7914831130690162, "step": 540, "batch_size": 64, "mean": 485.5020751953125, "std": 671.1478271484375, "min": -844.3829345703125, "p10": -302.49952392578115, "median": 364.8134307861328, "p90": 1497.8418701171877, "max": 2607.18505859375, "pos_frac": 0.765625, "sample": [526.9420776367188, 597.2116088867188, 746.6533203125, -452.12164306640625, 2033.9893798828125, -217.58154296875, 303.8939514160156, 124.7410888671875, -844.3829345703125, -503.455322265625, -28.02494239807129, 631.240966796875, -171.15370178222656, 256.20477294921875, 2607.18505859375, -99.84314727783203, 249.3370361328125, 71.27542114257812, 607.920654296875, 617.121337890625, 34.434173583984375, 178.13226318359375, 419.01507568359375, 830.448486328125, 816.7943115234375, 1417.2828369140625, 569.4229736328125, 1459.7769775390625, 1514.1553955078125, 385.65802001953125, 623.9822387695312, 454.21270751953125, 738.234375, 91.57715606689453, 334.30694580078125, 474.6159362792969, -115.72769165039062, -14.96331787109375, 1582.0635986328125, 280.3430480957031, 1183.365966796875, 1883.4671630859375, 1606.300537109375, 1280.5196533203125, -440.7981872558594, 711.505126953125, 226.67410278320312, 317.9764404296875, 811.0480346679688, 337.17724609375, -338.8929443359375, 343.9688415527344, 715.7555541992188, 849.747314453125, 176.31582641601562, 303.77935791015625, 712.454345703125, 172.2064208984375, 1589.907470703125, -67.92530059814453, -5.918968200683594, 684.08642578125, -712.4448852539062, -399.0629577636719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000540.npy"}
{"epoch": 0.7929515418502202, "step": 541, "batch_size": 64, "mean": 249.83155822753906, "std": 587.30322265625, "min": -1212.1220703125, "p10": -451.40711059570305, "median": 198.1755828857422, "p90": 964.6280578613286, "max": 1973.171875, "pos_frac": 0.734375, "sample": [-847.7862548828125, 188.8802490234375, 700.888916015625, 91.10586547851562, 176.6373291015625, 64.74935913085938, 327.1742858886719, 80.01509094238281, 203.0479736328125, -85.4093017578125, -122.18746948242188, 1365.2862548828125, -872.9452514648438, -52.72227478027344, 1005.191650390625, 51.797401428222656, 201.54022216796875, 1264.969970703125, 102.70211029052734, 793.2236328125, 1556.117431640625, 869.9796752929688, 39.438934326171875, -531.582763671875, 274.0296630859375, -291.62158203125, 431.29205322265625, 324.7894592285156, -473.9791259765625, 672.7252197265625, 618.1307373046875, 63.314292907714844, 771.8740234375, 176.47056579589844, 194.81094360351562, 187.01422119140625, 178.65570068359375, 221.4410858154297, 656.1570434570312, 465.51531982421875, -204.6018524169922, 151.64479064941406, 771.317626953125, 1040.426025390625, 698.845947265625, 313.71673583984375, -1212.1220703125, -616.2047119140625, 429.4014587402344, 675.949951171875, -348.7038879394531, 427.0123596191406, 267.1916809082031, 1973.171875, 1194.5068359375, -398.73907470703125, -913.5093383789062, 513.737060546875, -269.8940734863281, -250.53512573242188, -221.81781005859375, 177.43637084960938, 427.886474609375, 322.369384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000541.npy"}
{"epoch": 0.7944199706314243, "step": 542, "batch_size": 64, "mean": 472.7724609375, "std": 662.3981323242188, "min": -1275.292236328125, "p10": -279.5805282592773, "median": 393.8968048095703, "p90": 1260.0657226562503, "max": 2246.6455078125, "pos_frac": 0.796875, "sample": [954.1895751953125, 359.65667724609375, 272.91387939453125, 221.20614624023438, 226.62969970703125, 1079.968994140625, -378.08087158203125, 2246.6455078125, 1283.2275390625, 410.64349365234375, 26.898544311523438, 511.341796875, 904.3232421875, -95.02487182617188, -71.67574310302734, 476.716064453125, 2024.072509765625, 319.98687744140625, -66.57915496826172, 1921.6578369140625, 352.3209533691406, 146.78578186035156, 482.43853759765625, 500.839111328125, -301.2774963378906, 218.84410095214844, -140.11517333984375, -430.44805908203125, -228.9542694091797, 1027.166748046875, -352.866455078125, 917.969970703125, 816.3051147460938, 194.70956420898438, 1816.106689453125, -70.01632690429688, 699.859375, 529.88916015625, -579.0667114257812, 690.1240234375, 359.22998046875, 448.96356201171875, 817.4916381835938, 107.02571868896484, 1536.0556640625, 209.18411254882812, 229.83856201171875, -1275.292236328125, 735.8479614257812, 121.8714828491211, 707.5665893554688, 352.56207275390625, 102.30781555175781, 398.4413146972656, 1701.482177734375, 1206.021484375, 389.352294921875, -933.1012573242188, 1114.5345458984375, 246.47927856445312, 448.23065185546875, 657.8438720703125, 724.3447265625, 931.8226318359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000542.npy"}
{"epoch": 0.7958883994126285, "step": 543, "batch_size": 64, "mean": 336.0360107421875, "std": 782.145751953125, "min": -1004.4286499023438, "p10": -371.455517578125, "median": 160.56012725830078, "p90": 1434.3758911132823, "max": 3373.8740234375, "pos_frac": 0.65625, "sample": [21.99127197265625, -14.43206787109375, 717.2849731445312, -39.0784912109375, 13.549673080444336, 2470.1650390625, 3.4383316040039062, -24.265518188476562, -1004.4286499023438, 865.240966796875, -62.100746154785156, -879.4212646484375, 79.52435302734375, 219.3695068359375, 2373.4716796875, 59.12025451660156, 38.896148681640625, 3373.8740234375, -708.1924438476562, 34.98229217529297, 569.6385498046875, -483.09112548828125, -306.53948974609375, 187.6295623779297, -380.66058349609375, -153.34408569335938, -73.25408935546875, 558.9146728515625, -437.51580810546875, 238.6975860595703, 645.998779296875, 217.30564880371094, -362.36260986328125, 1527.3150634765625, -207.58859252929688, -225.3459014892578, 1971.115478515625, 628.98291015625, 1217.517822265625, 90.5053939819336, 1527.345947265625, 180.1327667236328, -212.68724060058594, 389.04974365234375, 850.4513549804688, -17.07361602783203, 289.76483154296875, 1836.6436767578125, -375.35247802734375, 627.9064331054688, 213.02810668945312, -182.772705078125, 418.36846923828125, -319.5780944824219, 453.67364501953125, 586.647216796875, 318.4722595214844, -102.19181060791016, 48.41252136230469, 355.8285827636719, 140.98748779296875, 600.0245361328125, 599.7904663085938, 516.524169921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000543.npy"}
{"epoch": 0.7973568281938326, "step": 544, "batch_size": 64, "mean": 384.9022216796875, "std": 589.7805786132812, "min": -1030.514404296875, "p10": -183.91379089355468, "median": 378.89353942871094, "p90": 1154.4454711914066, "max": 1953.4681396484375, "pos_frac": 0.75, "sample": [1185.1002197265625, 553.7642822265625, 358.28875732421875, 462.2815246582031, 1671.07958984375, -985.1202392578125, 417.2251281738281, -106.87114715576172, 766.9364013671875, 1.4945964813232422, 1306.79296875, -75.14085388183594, 707.8619384765625, 261.6165771484375, -925.961669921875, 376.7192077636719, 632.4596557617188, 201.5740966796875, -49.07928466796875, 417.2354431152344, 381.06787109375, 417.63262939453125, -207.31234741210938, 252.30197143554688, 87.78817749023438, 188.2085418701172, 743.47119140625, 77.18365478515625, -375.4515380859375, -188.97714233398438, 1620.8626708984375, 187.123046875, -79.60710906982422, 663.8754272460938, -1030.514404296875, 1082.917724609375, 535.7132568359375, 441.86376953125, 634.4693603515625, -181.26223754882812, 255.299072265625, 22.534358978271484, 267.24395751953125, 1677.421142578125, 784.8140258789062, -105.12089538574219, 142.35130310058594, 1953.4681396484375, 861.3984985351562, 585.3372802734375, -6.286014556884766, 158.77670288085938, 777.2271728515625, 1486.8712158203125, 662.03125, 441.8464050292969, 391.1131286621094, -151.0341033935547, 511.7468566894531, 119.63401794433594, -74.51292419433594, 821.166748046875, -185.0501708984375, 805.8519287109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000544.npy"}
{"epoch": 0.7988252569750367, "step": 545, "batch_size": 64, "mean": 480.9786376953125, "std": 612.5518798828125, "min": -747.8729248046875, "p10": -99.84180221557617, "median": 338.0866394042969, "p90": 1478.2558715820314, "max": 1898.72216796875, "pos_frac": 0.859375, "sample": [-131.56393432617188, 8.509368896484375, -667.2872314453125, 76.86734008789062, 252.5397186279297, 971.9497680664062, 234.79278564453125, 55.64189147949219, 633.8495483398438, -417.1264343261719, 485.2345886230469, 758.379638671875, 68.22600555419922, 681.1416625976562, 1197.3565673828125, 118.72650909423828, 1692.27001953125, 1431.0330810546875, 1493.0380859375, 350.33636474609375, 951.6661987304688, 64.21316528320312, 279.3165283203125, 252.9197540283203, 55.636680603027344, 359.9371643066406, 66.00653076171875, 461.39068603515625, 902.620361328125, -100.0689926147461, 1565.0731201171875, 533.5339965820312, 132.0595245361328, -99.31169128417969, 1898.72216796875, 1143.2554931640625, 18.297622680664062, 1614.833740234375, -633.1051635742188, 506.6282958984375, -29.718711853027344, 459.1432189941406, 110.746826171875, 585.9744873046875, 442.3326110839844, 681.0018920898438, 1007.210693359375, 1443.7640380859375, 271.0269470214844, 267.41583251953125, 167.6581573486328, 82.50093078613281, 345.8825988769531, 411.13507080078125, 136.85415649414062, 1724.6141357421875, 1436.0220947265625, -274.2776184082031, 330.2906799316406, 218.482421875, 716.7559814453125, -747.8729248046875, 191.3631134033203, 1536.7841796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000545.npy"}
{"epoch": 0.8002936857562408, "step": 546, "batch_size": 64, "mean": 300.89483642578125, "std": 506.4837646484375, "min": -1585.2960205078125, "p10": -255.3810089111328, "median": 268.6678924560547, "p90": 855.9471801757813, "max": 1332.51171875, "pos_frac": 0.75, "sample": [-234.00106811523438, 108.0320816040039, 357.67333984375, 50.645050048828125, 221.8082275390625, 505.6353759765625, 260.8885192871094, 483.92303466796875, 148.1793670654297, 220.00897216796875, -201.12832641601562, -243.87362670898438, -235.80162048339844, -135.04473876953125, 779.7366333007812, 129.23594665527344, 20.16504669189453, 264.0627136230469, 1085.627685546875, 281.45623779296875, -87.01714324951172, 789.8646850585938, -317.6789245605469, 1255.0054931640625, 629.2355346679688, 817.1951904296875, 807.6300048828125, 512.814453125, 838.93408203125, 391.8432312011719, 1332.51171875, -303.08660888671875, 15.348091125488281, 465.0153503417969, 529.556396484375, -751.1568603515625, 677.59033203125, 442.94769287109375, 738.5198974609375, 903.5968627929688, -1585.2960205078125, 783.442138671875, 228.38690185546875, 304.91815185546875, 466.5563659667969, 859.9371948242188, -0.11138916015625, 814.3994140625, -33.484901428222656, 179.14163208007812, 34.40338897705078, 68.89263153076172, 846.6371459960938, 273.2730712890625, -260.312744140625, 556.0927734375, 147.26272583007812, 1172.497802734375, -289.1869812011719, 72.6142807006836, -109.34974670410156, 429.31292724609375, -359.11407470703125, 1100.460205078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000546.npy"}
{"epoch": 0.801762114537445, "step": 547, "batch_size": 64, "mean": 404.60955810546875, "std": 576.495361328125, "min": -1284.840087890625, "p10": -124.51261291503906, "median": 361.9092712402344, "p90": 1048.303271484375, "max": 2129.8388671875, "pos_frac": 0.71875, "sample": [579.01513671875, 68.03668975830078, -1284.840087890625, 1018.738525390625, -8.694408416748047, 41.112884521484375, 295.65814208984375, 755.7949829101562, 183.62835693359375, 598.0170288085938, 164.0086669921875, 1862.134033203125, -28.751338958740234, 1847.269775390625, -115.88066101074219, -87.18506622314453, 527.4783325195312, 643.9176025390625, -148.29429626464844, 958.7216796875, -37.66753387451172, 106.60779571533203, 502.5037841796875, 296.3215637207031, 655.5550537109375, -173.33160400390625, 583.093994140625, 236.3207244873047, 215.50755310058594, 409.6197204589844, 1060.973876953125, -128.21202087402344, 605.170166015625, 680.031982421875, 624.5593872070312, 370.86431884765625, -111.10350036621094, 971.2886352539062, 380.08990478515625, -10.55157470703125, 731.4859619140625, 2129.8388671875, 588.8560791015625, -604.34423828125, -250.93692016601562, -22.012367248535156, -341.1171875, 1142.1151123046875, 117.60899353027344, 613.4613037109375, 1533.699951171875, 239.20274353027344, 582.4268798828125, 435.962890625, 215.78858947753906, 272.3179931640625, 798.8880615234375, -89.644287109375, 416.087890625, -72.08770751953125, 724.5181884765625, 1371.78466796875, 352.9542236328125, -99.37226104736328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000547.npy"}
{"epoch": 0.8032305433186491, "step": 548, "batch_size": 64, "mean": 355.14385986328125, "std": 617.1107788085938, "min": -977.9791259765625, "p10": -223.32734374999998, "median": 287.87596130371094, "p90": 907.8351013183594, "max": 3009.965087890625, "pos_frac": 0.734375, "sample": [-15.620956420898438, 776.2684936523438, 278.2801513671875, 333.1211853027344, -197.1953125, 315.2782897949219, -977.9791259765625, 1396.421142578125, 414.0939636230469, 374.9655456542969, 432.3671875, 499.85467529296875, 1073.6751708984375, 309.22711181640625, 698.63427734375, -298.0814208984375, -73.76368713378906, 360.780029296875, 912.910400390625, 188.2947998046875, 895.9927368164062, 241.78921508789062, 199.92230224609375, -542.4231567382812, 463.6707458496094, 64.1357650756836, 182.49879455566406, 310.4065856933594, 297.4717712402344, 132.51930236816406, -259.3854675292969, 3009.965087890625, 860.7880859375, 215.90805053710938, 137.40928649902344, -462.632568359375, -71.11167907714844, 696.701171875, -199.95480346679688, 2232.1728515625, -54.313358306884766, -120.34323120117188, -244.90870666503906, 524.923828125, -94.36428833007812, 57.231781005859375, 424.70281982421875, 136.23309326171875, 399.3581237792969, -233.34414672851562, 467.85516357421875, 370.5422058105469, -60.26527404785156, 1998.772705078125, 445.55169677734375, 588.8944091796875, 102.27662658691406, -53.955108642578125, 227.9818572998047, 260.9407653808594, 19.799400329589844, 638.1107177734375, 961.7691040039062, 758.3804321289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000548.npy"}
{"epoch": 0.8046989720998532, "step": 549, "batch_size": 64, "mean": 274.687255859375, "std": 579.717041015625, "min": -1188.4671630859375, "p10": -326.2873596191406, "median": 197.86541748046875, "p90": 965.8433776855469, "max": 1791.3455810546875, "pos_frac": 0.65625, "sample": [292.1175842285156, 921.2083740234375, 86.46467590332031, -25.356422424316406, 112.05198669433594, 99.2734375, -289.01898193359375, 533.011474609375, 756.589111328125, 967.9451904296875, -151.9417266845703, 1467.0526123046875, 66.36380767822266, 156.68283081054688, -128.84127807617188, -354.308837890625, 960.9391479492188, -342.259521484375, -175.50198364257812, -817.637451171875, 597.4254760742188, 1091.8455810546875, -401.5071105957031, 272.01275634765625, 347.98126220703125, 287.58392333984375, 510.64569091796875, -67.45635223388672, 782.5679931640625, 917.431884765625, -47.64520263671875, 243.58583068847656, 276.49481201171875, -191.09255981445312, 368.8189392089844, -1188.4671630859375, 181.81619262695312, 677.29443359375, 67.6400375366211, 61.01210403442383, -125.83255004882812, -79.17879486083984, 337.798095703125, 665.6171264648438, 1456.4400634765625, -102.61006164550781, -164.51206970214844, 255.93202209472656, -371.5164794921875, 1713.2969970703125, 1405.815673828125, -852.3919677734375, -70.39839172363281, 747.2781982421875, 453.2664794921875, -205.81588745117188, 213.91464233398438, 166.98056030273438, 666.9488525390625, 1791.3455810546875, 444.37060546875, 340.64544677734375, 137.93968200683594, -168.17190551757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000549.npy"}
{"epoch": 0.8061674008810573, "step": 550, "batch_size": 64, "mean": 334.016845703125, "std": 529.0935668945312, "min": -1016.1893310546875, "p10": -301.31885986328115, "median": 342.7616729736328, "p90": 894.9503906250002, "max": 2032.751708984375, "pos_frac": 0.8125, "sample": [378.14398193359375, 352.9645690917969, 1280.449951171875, 374.4503479003906, 336.6086730957031, 754.609130859375, 1067.9844970703125, 267.62261962890625, -150.9414520263672, 296.4736328125, 563.2415161132812, 387.1690673828125, 538.4708251953125, 174.3277587890625, 348.9146728515625, 1834.7625732421875, 47.68291473388672, -348.0276184082031, 80.54209899902344, 91.04754638671875, -1016.1893310546875, -9.134490966796875, 483.20751953125, 858.1651000976562, -53.66267395019531, 910.7155151367188, 851.2433471679688, 133.11492919921875, 252.39373779296875, 100.21250915527344, 37.06074523925781, -738.96240234375, 303.83642578125, -116.36385345458984, 422.6084289550781, 793.7840576171875, 75.92311096191406, 437.56884765625, 2032.751708984375, 453.7557678222656, 255.19444274902344, 706.0801391601562, 112.94075012207031, 33.461273193359375, 1336.2972412109375, 412.68585205078125, 1101.4222412109375, 95.01499938964844, 73.69930267333984, 636.7313842773438, 476.1531066894531, 629.5115356445312, -614.94921875, 536.8757934570312, -192.33175659179688, 388.33929443359375, 248.9421844482422, 516.1925048828125, 372.7128601074219, -365.0286865234375, 520.2972412109375, -432.86749267578125, -474.96923828125, 116.14842224121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000550.npy"}
{"epoch": 0.8076358296622613, "step": 551, "batch_size": 64, "mean": 553.640625, "std": 556.6996459960938, "min": -484.0754699707031, "p10": -111.1850448608398, "median": 494.8992156982422, "p90": 1455.9597290039062, "max": 1962.78955078125, "pos_frac": 0.828125, "sample": [39.92074966430664, -30.71490478515625, 451.0223083496094, 931.5975952148438, 673.0030517578125, 573.3369750976562, -484.0754699707031, -20.366954803466797, 1345.8953857421875, -325.004638671875, -131.451171875, 347.87225341796875, -63.89741516113281, 577.5770263671875, 578.6089477539062, 1645.7579345703125, 497.2231140136719, 409.32470703125, 574.2720336914062, 1719.725830078125, 1006.5458984375, 369.52484130859375, 667.5350341796875, 652.3521728515625, 277.58612060546875, 302.72222900390625, 986.6947631835938, 425.546142578125, 613.6433715820312, 375.8777770996094, 400.7711181640625, -24.62851333618164, 1491.512939453125, 170.30224609375, 492.5753173828125, 594.1953735351562, 859.089599609375, 690.678466796875, 536.3914794921875, 99.56565856933594, -245.78231811523438, 1249.0819091796875, 1456.1817626953125, 460.2202453613281, 111.64924621582031, 1623.29052734375, -416.2486267089844, -295.89410400390625, 1505.1558837890625, 827.9912719726562, -312.0002746582031, 137.57083129882812, 1455.441650390625, 951.22705078125, 433.5561828613281, 585.2755126953125, 763.2884521484375, 336.047607421875, 453.3674621582031, 1081.0167236328125, 122.62696838378906, 1962.78955078125, 552.0515747070312, 336.98883056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000551.npy"}
{"epoch": 0.8091042584434655, "step": 552, "batch_size": 64, "mean": 611.0692749023438, "std": 725.7998046875, "min": -724.1978759765625, "p10": -112.9920143127441, "median": 539.0704650878906, "p90": 1469.8100341796876, "max": 2758.57666015625, "pos_frac": 0.84375, "sample": [821.8900146484375, 93.63077545166016, 1104.8861083984375, 2758.57666015625, 852.5582275390625, 1288.720458984375, 630.7552490234375, 1070.294921875, 36.70489501953125, 541.6686401367188, 667.7239379882812, 551.425048828125, 141.8567352294922, 481.3603515625, 239.19859313964844, 705.379150390625, 207.51486206054688, 656.3970336914062, 1372.6148681640625, -413.5984191894531, 52.371368408203125, 586.8970947265625, 512.9549560546875, 366.34283447265625, -724.1978759765625, 14.036903381347656, 36.68950653076172, 1483.4302978515625, 243.57322692871094, -225.72586059570312, 1417.435791015625, -588.7910766601562, 823.8472290039062, -74.2521743774414, 539.8106079101562, 132.89317321777344, 511.00634765625, 2465.576416015625, 1438.0294189453125, 1988.1683349609375, 538.330322265625, -189.69338989257812, 530.56591796875, 981.5711669921875, 639.1331787109375, 2646.85205078125, 255.6307373046875, 407.96295166015625, 1488.5457763671875, 2381.615234375, 542.0740966796875, 153.69998168945312, 436.60626220703125, -345.8099060058594, -69.7020263671875, 548.7578125, -129.5948028564453, 143.9926300048828, 422.01690673828125, 591.9302978515625, 618.7468872070312, 993.564697265625, 721.217529296875, -9.234321594238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000552.npy"}
{"epoch": 0.8105726872246696, "step": 553, "batch_size": 64, "mean": 314.45751953125, "std": 589.4143676757812, "min": -801.7569580078125, "p10": -216.28503417968747, "median": 192.7423095703125, "p90": 999.8455688476563, "max": 2759.284423828125, "pos_frac": 0.734375, "sample": [2759.284423828125, 951.119873046875, -183.53704833984375, 501.7938537597656, 444.15032958984375, -114.5177993774414, 53.09861755371094, 42.28667449951172, 157.82057189941406, 315.98992919921875, 187.69131469726562, 276.84075927734375, 115.11454010009766, 82.34249877929688, -80.46295928955078, 230.10545349121094, -568.2596435546875, 13.91876220703125, 323.2777099609375, -180.2319793701172, 196.95797729492188, 169.21116638183594, 315.4935607910156, -465.2093505859375, 530.0089111328125, -83.25721740722656, 1187.064453125, 92.20111083984375, -2.5496749877929688, 282.490966796875, -161.5863800048828, 480.6319274902344, -230.31988525390625, -150.47132873535156, 714.3751220703125, 345.2750244140625, 349.023193359375, 133.66943359375, 942.5059814453125, -126.8468246459961, 424.19769287109375, -801.7569580078125, 633.6895141601562, 584.7074584960938, 188.52664184570312, 118.76171875, -712.3673095703125, 28.3780517578125, 504.1368408203125, 22.61492156982422, 1172.907958984375, -8.979713439941406, 1349.5675048828125, 514.6692504882812, 904.33837890625, 1152.957275390625, 267.42547607421875, 1002.1179809570312, -487.34564208984375, 1948.12939453125, -230.8154296875, 994.5432739257812, 35.59754180908203, 672.7849731445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000553.npy"}
{"epoch": 0.8120411160058737, "step": 554, "batch_size": 64, "mean": 430.4720153808594, "std": 719.0418701171875, "min": -957.6266479492188, "p10": -470.5794860839843, "median": 382.66773986816406, "p90": 1476.1581665039068, "max": 2335.486328125, "pos_frac": 0.765625, "sample": [164.41981506347656, -666.4711303710938, 887.9465942382812, 469.3446350097656, 219.68167114257812, 931.3486328125, 358.3284606933594, 433.9839172363281, -409.8787536621094, 351.5589599609375, 2335.486328125, 412.9105224609375, 418.21038818359375, 364.1973571777344, -207.3188018798828, 469.26751708984375, 610.079345703125, 575.6331787109375, 609.84228515625, 3.7271499633789062, -821.65869140625, -689.13232421875, -496.5940856933594, 5.912849426269531, 2025.9425048828125, -220.3211669921875, 249.33462524414062, 401.8624267578125, 451.8143310546875, 854.2120361328125, 568.0933837890625, 1531.803955078125, 1677.2808837890625, 176.21011352539062, 993.4906005859375, 347.1918640136719, 916.055908203125, -16.19803237915039, 1712.031005859375, -189.57359313964844, -84.18472290039062, 77.26091766357422, 1139.742919921875, 612.156494140625, 994.5809326171875, 122.14006042480469, 322.48443603515625, 2.615386962890625, 928.624267578125, 854.5491943359375, -957.6266479492188, 782.885498046875, 305.4807434082031, -664.0053100585938, 1100.8404541015625, -933.877197265625, -340.1078186035156, 33.92322540283203, 401.13812255859375, 1346.3179931640625, 1873.7335205078125, 1837.5130615234375, 105.5241928100586, -121.55792236328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000554.npy"}
{"epoch": 0.8135095447870778, "step": 555, "batch_size": 64, "mean": 504.0443420410156, "std": 777.2073364257812, "min": -1214.7677001953125, "p10": -233.6717803955078, "median": 419.50035095214844, "p90": 1623.0852416992188, "max": 2530.083984375, "pos_frac": 0.796875, "sample": [500.82147216796875, 407.24945068359375, 1458.481201171875, 295.9839172363281, 598.6405029296875, 1320.7567138671875, 486.86761474609375, 168.11312866210938, -171.31585693359375, 205.64840698242188, -223.15806579589844, 900.5160522460938, 771.8079833984375, 496.26593017578125, 12.31365966796875, 71.84994506835938, 189.68576049804688, -285.1597595214844, 2530.083984375, 1311.9766845703125, 117.66851806640625, 2137.29052734375, 44.04120635986328, 454.548828125, 511.9046630859375, 128.31240844726562, 912.5953369140625, 35.49739074707031, 498.0588684082031, 85.75291442871094, 763.369873046875, 1260.50048828125, 638.7825927734375, 519.0714111328125, 19.368865966796875, 246.1211395263672, 749.2457275390625, -618.77880859375, 172.64096069335938, -191.6689453125, -1041.2889404296875, 1098.0382080078125, 1809.0396728515625, 2268.15771484375, -587.6224365234375, 490.0961608886719, 2213.9443359375, 111.53501892089844, 431.7512512207031, 82.1392822265625, 1601.9964599609375, -494.319091796875, 1632.123291015625, -1214.7677001953125, -135.06707763671875, -117.54915618896484, 1933.068603515625, 564.333251953125, -156.66696166992188, 142.641357421875, 1295.8355712890625, 339.7868957519531, -238.1776580810547, 698.05810546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000555.npy"}
{"epoch": 0.8149779735682819, "step": 556, "batch_size": 64, "mean": 298.2765808105469, "std": 454.6336975097656, "min": -488.6456298828125, "p10": -244.83353881835933, "median": 251.74628448486328, "p90": 904.4056396484375, "max": 1468.285888671875, "pos_frac": 0.703125, "sample": [1375.5775146484375, 206.6085205078125, 87.77395629882812, -124.598876953125, 276.3497619628906, 140.45675659179688, 604.482421875, 416.8486633300781, -212.98464965820312, 337.19683837890625, 92.55928802490234, 732.3974609375, 275.85919189453125, -304.8255920410156, 218.25628662109375, 562.3358764648438, 1202.3929443359375, -3.19049072265625, 696.4498901367188, -188.85992431640625, -488.6456298828125, -82.59102630615234, -326.1022644042969, 149.5463409423828, -382.307373046875, -154.68389892578125, 423.0707702636719, -66.60444641113281, 907.5036010742188, -287.276611328125, 431.98480224609375, 380.39776611328125, 254.70359802246094, 96.37329864501953, 853.7781982421875, -51.23601531982422, 431.3431396484375, 121.90849304199219, 219.92941284179688, -57.908729553222656, -113.7564697265625, 740.537841796875, 176.5027618408203, 120.01697540283203, -258.4830627441406, 574.0432739257812, 992.6845703125, 456.5574951171875, 1005.4976196289062, 45.996620178222656, 248.78897094726562, -149.0029296875, 897.1770629882812, 284.05926513671875, 1468.285888671875, -84.03478240966797, 890.115478515625, 1422.932861328125, 467.2787780761719, 508.431884765625, 418.1942138671875, -355.7344970703125, 313.38470458984375, 255.957275390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000556.npy"}
{"epoch": 0.8164464023494861, "step": 557, "batch_size": 64, "mean": 453.0914306640625, "std": 739.7955932617188, "min": -1553.614501953125, "p10": -343.3324737548828, "median": 394.72509765625, "p90": 1322.1692260742193, "max": 2820.03955078125, "pos_frac": 0.75, "sample": [-1553.614501953125, 120.21824645996094, -1036.33154296875, 862.9737548828125, 386.925537109375, 229.4793701171875, 160.6613006591797, 532.5223388671875, 694.71337890625, 96.0230712890625, 1026.482177734375, 1370.4991455078125, 1013.7598266601562, -7.049072265625, 145.79718017578125, 341.14288330078125, 461.96392822265625, 974.6533203125, -10.817012786865234, 111.37744903564453, 80.36448669433594, 1209.3994140625, 1832.3446044921875, 698.5501708984375, -25.01007080078125, 530.3843994140625, 12.37603759765625, -442.95794677734375, -339.3760681152344, 817.2784423828125, 171.4415283203125, 618.5635986328125, 1193.9866943359375, 582.4010009765625, 237.4526824951172, 203.26234436035156, 450.2917175292969, -570.0850219726562, 931.1505737304688, 568.1021728515625, 869.0078735351562, -277.4707946777344, 543.5962524414062, 136.23110961914062, 178.2930450439453, -532.3497314453125, 755.10986328125, 1767.3475341796875, -423.1295471191406, 2820.03955078125, -38.45305252075195, 2368.55810546875, 1520.74853515625, 170.54122924804688, 1068.909912109375, -128.1240997314453, -345.028076171875, 1562.8409423828125, -276.007080078125, 1156.94140625, -19.12541961669922, 509.2838439941406, 402.524658203125, 526.2645263671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000557.npy"}
{"epoch": 0.8179148311306902, "step": 558, "batch_size": 64, "mean": 610.5439453125, "std": 648.1548461914062, "min": -1697.0460205078125, "p10": -98.62938537597654, "median": 496.77410888671875, "p90": 1414.8775390625, "max": 2034.571044921875, "pos_frac": 0.8125, "sample": [437.4400634765625, 1051.28466796875, 225.1404266357422, 1386.26025390625, 174.91815185546875, 309.5317687988281, 564.579833984375, 91.10670471191406, 951.7332763671875, -360.4360046386719, -160.04043579101562, 1273.470458984375, 1338.8587646484375, 1157.4749755859375, 449.2630310058594, 1159.035400390625, 655.3182983398438, 355.2321472167969, 1715.131591796875, 1917.70751953125, -273.7369079589844, 376.44219970703125, 275.4136962890625, 1202.0667724609375, -20.04779815673828, 780.7271728515625, 1427.14208984375, 443.17803955078125, 1451.134033203125, 542.74658203125, 415.3393859863281, 604.0494384765625, 899.5602416992188, 1252.7918701171875, 1442.2608642578125, 1056.1197509765625, -1697.0460205078125, -73.035400390625, 499.5582580566406, 190.3260040283203, 373.11676025390625, -5.412199020385742, 125.78465270996094, 396.04559326171875, 469.72100830078125, -44.13108825683594, -123.11872863769531, 2034.571044921875, 1113.84375, -0.5271186828613281, 788.8231201171875, 1153.850341796875, 614.2150268554688, 493.9899597167969, 1142.2156982421875, -563.060546875, 839.2767333984375, 390.0314636230469, 431.078369140625, -109.59823608398438, 224.75515747070312, 1481.6810302734375, 1255.765869140625, 1103.896728515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000558.npy"}
{"epoch": 0.8193832599118943, "step": 559, "batch_size": 64, "mean": 505.05621337890625, "std": 664.85205078125, "min": -905.0733642578125, "p10": -121.57596588134764, "median": 308.7906494140625, "p90": 1270.2769042968753, "max": 2409.185302734375, "pos_frac": 0.796875, "sample": [1025.23779296875, 1393.1832275390625, 714.5654296875, 715.4814453125, 119.27906036376953, 46.68098449707031, 103.49263000488281, 149.61073303222656, 1291.018798828125, 2409.185302734375, 1172.09423828125, 393.0223388671875, 1178.761474609375, -1.6250324249267578, 91.55911254882812, 754.373046875, 1112.715576171875, 1408.2476806640625, -905.0733642578125, -496.15673828125, 1183.6126708984375, 293.50445556640625, 214.70755004882812, 446.18896484375, 248.6428985595703, 1157.26611328125, -163.76687622070312, -106.76959228515625, 1221.879150390625, 194.34381103515625, -0.26982879638671875, 267.372802734375, 118.01020050048828, 59.65662384033203, 98.39482116699219, -127.92155456542969, 31.31387710571289, -74.48986053466797, 213.0779571533203, 208.073974609375, 2128.83984375, 540.613037109375, 961.566162109375, 122.9715347290039, -807.4738159179688, 813.4453735351562, 657.6910400390625, 655.6077270507812, 1145.21435546875, -70.97164154052734, 533.0274658203125, 224.47589111328125, 324.07684326171875, 1156.79296875, 1606.4490966796875, 1978.1474609375, -388.9490966796875, 785.3983764648438, 1034.8841552734375, 505.1214599609375, -15.934011459350586, 70.90623474121094, -505.59429931640625, 708.8079833984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000559.npy"}
{"epoch": 0.8208516886930984, "step": 560, "batch_size": 64, "mean": 373.83026123046875, "std": 598.6519775390625, "min": -982.2417602539062, "p10": -266.4108215332031, "median": 323.6508331298828, "p90": 1269.5274047851565, "max": 2129.708251953125, "pos_frac": 0.71875, "sample": [2129.708251953125, -367.14617919921875, -101.3096694946289, 1160.088623046875, -451.8928527832031, 659.8009033203125, 100.2563705444336, -129.93780517578125, 102.5452880859375, 484.7088317871094, 239.8980712890625, 153.84024047851562, 242.94326782226562, 539.8818359375, 1565.08203125, -247.96759033203125, 716.0604858398438, 1309.6312255859375, 519.84716796875, 1025.98095703125, 711.36572265625, -120.44073486328125, 193.02902221679688, 1388.5938720703125, 1019.7023315429688, -649.5044555664062, -8.103572845458984, 1228.0789794921875, 422.9608154296875, -165.31939697265625, 921.7905883789062, -180.47242736816406, 26.034215927124023, 550.1781005859375, 476.73602294921875, -513.4573364257812, 123.97010040283203, 1452.9912109375, 251.17626953125, 523.9534301757812, 532.3650512695312, -144.15374755859375, 407.18341064453125, 190.97555541992188, 769.6255493164062, 1371.3211669921875, -109.93052673339844, -50.39086151123047, -205.11036682128906, 1287.291015625, 556.1717529296875, 265.0051574707031, 580.8082275390625, 215.50950622558594, 382.2965087890625, 386.4845275878906, -274.3150634765625, 494.56201171875, 824.0554809570312, 639.7408447265625, -708.8717041015625, 97.11502838134766, -982.2417602539062, 94.35748291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000560.npy"}
{"epoch": 0.8223201174743024, "step": 561, "batch_size": 64, "mean": 297.10052490234375, "std": 682.1762084960938, "min": -2537.79248046875, "p10": -302.2575805664062, "median": 241.31307983398438, "p90": 1123.187377929688, "max": 1976.7691650390625, "pos_frac": 0.71875, "sample": [614.8836059570312, 447.51373291015625, 124.67855072021484, 69.5562973022461, -1152.125, 1421.05859375, 743.7572021484375, 83.53996276855469, 707.2421875, 1160.951416015625, 154.12744140625, 538.027099609375, 1976.7691650390625, -148.89454650878906, 193.86624145507812, 190.38455200195312, 176.038818359375, -243.97952270507812, 383.24981689453125, 461.1363525390625, 1326.0687255859375, 160.94363403320312, 579.0324096679688, 312.0821838378906, -259.4786682128906, 88.43748474121094, 878.271728515625, 319.8695373535156, -53.82556915283203, 1630.1861572265625, 723.8350830078125, 1267.574462890625, -0.080780029296875, 276.1146545410156, 975.254638671875, -619.3108520507812, -320.5914001464844, 527.529296875, 20.80814552307129, 653.8576049804688, 120.75647735595703, -421.5010681152344, -68.89264678955078, 850.6455688476562, 514.7523193359375, -122.98162078857422, 97.55084228515625, 772.0585327148438, 1035.0712890625, 225.63113403320312, -2537.79248046875, 1719.51171875, 75.62605285644531, -196.04058837890625, 376.5426025390625, -29.53973388671875, 477.0926513671875, -85.91569519042969, 668.5960693359375, 314.90655517578125, -668.0772705078125, 256.9950256347656, -530.3172607421875, -218.60391235351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000561.npy"}
{"epoch": 0.8237885462555066, "step": 562, "batch_size": 64, "mean": 433.372802734375, "std": 657.5842895507812, "min": -1256.5343017578125, "p10": -271.4836791992187, "median": 380.0679626464844, "p90": 1050.35205078125, "max": 2948.965576171875, "pos_frac": 0.765625, "sample": [615.156982421875, 2948.965576171875, 1425.374267578125, 545.206298828125, 155.62850952148438, 416.1089172363281, 746.7314453125, 300.57208251953125, 1106.4197998046875, 783.36474609375, 882.2236328125, 931.6927490234375, 270.15020751953125, 528.5037231445312, 1007.7985229492188, -241.56846618652344, 245.66802978515625, 923.0977783203125, 138.69764709472656, 410.6380615234375, 349.49786376953125, 627.9041137695312, 517.2271728515625, -300.5362243652344, 35.24571990966797, 814.6203002929688, 1052.30908203125, -48.189697265625, 1006.143798828125, 233.8214111328125, -284.32965087890625, -244.6048583984375, -14.295305252075195, 755.754638671875, -14.631477355957031, 2421.3525390625, -147.18316650390625, 453.85528564453125, 600.7864990234375, 999.891357421875, 729.1099853515625, 274.32757568359375, 662.7353515625, 280.7889404296875, -1256.5343017578125, 263.76861572265625, -283.003173828125, 913.4450073242188, 456.90185546875, 1045.78564453125, -20.138484954833984, -126.95854187011719, 84.8454360961914, 45.05741500854492, 720.8292236328125, 268.60382080078125, -747.5447387695312, 64.95355987548828, 162.95359802246094, -622.9660034179688, 98.60984802246094, -541.8917236328125, 1204.5496826171875, 1102.5589599609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000562.npy"}
{"epoch": 0.8252569750367107, "step": 563, "batch_size": 64, "mean": 316.32666015625, "std": 795.9945068359375, "min": -3204.349609375, "p10": -564.0203552246094, "median": 389.9095764160156, "p90": 1040.1227416992188, "max": 1859.3193359375, "pos_frac": 0.765625, "sample": [207.80322265625, -3204.349609375, -1058.66650390625, 409.49072265625, 812.0031127929688, 133.3038330078125, 1452.681884765625, -36.820343017578125, 324.44097900390625, 654.84228515625, 903.148681640625, 207.14727783203125, -633.10888671875, 1025.1165771484375, 799.7896118164062, 795.2847900390625, 105.96788024902344, 1859.3193359375, 717.8923950195312, -218.81394958496094, 614.8914794921875, 152.48196411132812, -179.39889526367188, 353.324951171875, 977.0519409179688, -27.000988006591797, 766.8839721679688, 321.141845703125, 1143.3148193359375, 989.1341552734375, 139.391357421875, 770.1881103515625, -1654.327392578125, 567.9535522460938, -170.5060577392578, 140.59271240234375, 4.749546051025391, 840.814208984375, 587.9815673828125, 374.21124267578125, 892.39501953125, 631.9253540039062, 1309.7266845703125, 266.11395263671875, 1046.553955078125, -186.4530029296875, -1389.8062744140625, 1794.4696044921875, -569.2149658203125, 1402.7708740234375, 33.21396255493164, 758.7520141601562, 74.93701934814453, -697.42529296875, 810.856689453125, -551.8995971679688, 446.29107666015625, -249.95880126953125, 99.40589904785156, 581.0908813476562, 405.60791015625, 416.8927917480469, 352.076904296875, 597.236572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000563.npy"}
{"epoch": 0.8267254038179148, "step": 564, "batch_size": 64, "mean": 413.9423828125, "std": 545.6494750976562, "min": -565.8392333984375, "p10": -254.36305541992184, "median": 342.28297424316406, "p90": 1167.9204223632821, "max": 2016.4022216796875, "pos_frac": 0.796875, "sample": [356.0384521484375, 450.7850036621094, 751.8013916015625, 139.9794158935547, 716.2030639648438, 565.1835327148438, 705.8507080078125, -324.4419250488281, 385.8814392089844, -565.8392333984375, 652.351806640625, 758.8104248046875, -379.29803466796875, 516.535400390625, 980.1104736328125, 32.226593017578125, -194.5458526611328, 1452.3880615234375, 1248.410400390625, 480.71026611328125, 328.5274963378906, 283.0655517578125, 196.88584899902344, -267.0928039550781, -376.53253173828125, 319.9413146972656, -224.66030883789062, 230.41983032226562, 18.754058837890625, -399.861083984375, 357.640625, 1817.658447265625, 712.9768676757812, -103.33440399169922, 40.029579162597656, 419.25347900390625, 161.8772735595703, 86.7904281616211, 446.1201171875, -1.4001579284667969, 619.1512451171875, 2016.4022216796875, -43.3857421875, 624.8338623046875, 768.2474975585938, 832.2034912109375, 1498.55615234375, 801.4663696289062, 1437.8876953125, 381.9957275390625, 36.29161834716797, 671.0221557617188, 7.096717834472656, 77.47393798828125, 115.50611877441406, 288.6767578125, 197.53419494628906, 908.81689453125, 309.0909729003906, -123.94203186035156, 1387.671142578125, -273.69940185546875, 959.0762939453125, 218.13778686523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000564.npy"}
{"epoch": 0.8281938325991189, "step": 565, "batch_size": 64, "mean": 443.851806640625, "std": 782.0408325195312, "min": -1350.3958740234375, "p10": -380.18912048339837, "median": 277.72511291503906, "p90": 1302.3808837890626, "max": 2725.807373046875, "pos_frac": 0.65625, "sample": [-12.507099151611328, 1146.3343505859375, 1537.511474609375, -1350.3958740234375, 236.37548828125, 948.3760986328125, 2193.13916015625, -145.01564025878906, -163.4427032470703, -318.06640625, 566.8494873046875, 1145.76806640625, -252.0709228515625, -1112.1444091796875, 269.33294677734375, -487.96844482421875, -406.8131408691406, -35.36407470703125, -100.57901000976562, 515.4405517578125, 1232.6116943359375, -27.472068786621094, 236.95843505859375, 1691.5010986328125, -522.6870727539062, -17.44725799560547, 183.68426513671875, 346.8381652832031, -660.3975830078125, 1115.142333984375, 659.4734497070312, 235.29331970214844, 661.3097534179688, 864.92578125, 918.2789916992188, -2.9950408935546875, 903.0046997070312, 656.0504760742188, -288.22357177734375, -120.15349578857422, -71.7593994140625, 960.9109497070312, 777.3993530273438, 1759.9573974609375, 1017.3781127929688, 2725.807373046875, 567.7443237304688, 2375.5830078125, 1249.8128662109375, -583.9713134765625, -170.9884033203125, 15.009231567382812, 1015.6890869140625, 503.06817626953125, 771.432373046875, 263.2593688964844, 1119.156494140625, 286.1172790527344, 17.860286712646484, 26.66956329345703, -275.27294921875, 315.8607177734375, 174.4274139404297, 1324.9100341796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000565.npy"}
{"epoch": 0.8296622613803231, "step": 566, "batch_size": 64, "mean": 488.22674560546875, "std": 785.5648193359375, "min": -1459.34326171875, "p10": -424.5784271240234, "median": 550.1907348632812, "p90": 1488.8078125000002, "max": 2356.8896484375, "pos_frac": 0.71875, "sample": [-171.95782470703125, -752.8847045898438, 698.1480712890625, 2356.8896484375, 507.8945617675781, -178.32046508789062, 662.5811767578125, 142.80667114257812, -954.860107421875, 541.6007080078125, 618.3170166015625, -270.5606689453125, -25.56591796875, 690.8594360351562, 825.8959350585938, 413.805908203125, 760.4400634765625, 135.49790954589844, 28.04159927368164, -1459.34326171875, 1068.395263671875, 652.8931884765625, 558.78076171875, 47.99176025390625, 1449.990966796875, -519.9365234375, 588.8248901367188, 1117.0399169921875, -122.80650329589844, 1182.1639404296875, 918.134521484375, 963.7220458984375, 758.3123168945312, 102.89643859863281, 1012.810546875, -383.4032897949219, -96.2328872680664, 1971.8609619140625, 1088.12890625, -260.5928955078125, -442.22491455078125, 844.3876953125, 527.4124755859375, 1505.443603515625, 1430.5623779296875, 1882.9212646484375, 2206.97607421875, 589.3837890625, -898.5662231445312, 290.776611328125, -652.8295288085938, 1769.087646484375, 777.431884765625, 1650.4840087890625, -28.153493881225586, -381.87835693359375, 1139.4769287109375, 373.25494384765625, 1135.273193359375, -357.2320556640625, 50.63074493408203, 42.8001708984375, 677.4224243164062, 445.4090270996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000566.npy"}
{"epoch": 0.8311306901615272, "step": 567, "batch_size": 64, "mean": 369.55865478515625, "std": 499.2571716308594, "min": -1154.848388671875, "p10": -118.44011688232418, "median": 286.6072998046875, "p90": 990.5265075683595, "max": 1857.0963134765625, "pos_frac": 0.859375, "sample": [142.42230224609375, 8.759359359741211, 1337.8790283203125, 661.06884765625, 380.4715576171875, 671.0885009765625, 164.78887939453125, -168.89016723632812, 1054.754638671875, 180.7745361328125, 1007.8424072265625, 43.70124053955078, 1165.80908203125, 268.62261962890625, 182.44607543945312, 271.82452392578125, 1245.6204833984375, 644.70263671875, 639.487060546875, 47.7202033996582, 572.368408203125, 191.13778686523438, -308.2669982910156, 574.7398071289062, 344.8468017578125, 663.5238647460938, 690.626220703125, 1555.8642578125, -82.26609802246094, 702.2620849609375, 23.845388412475586, 301.39007568359375, 374.88232421875, -1154.848388671875, 243.4427490234375, -32.00593948364258, 118.37820434570312, 739.1722412109375, 1857.0963134765625, 833.5447387695312, 225.20431518554688, -844.5355834960938, 228.8251953125, 100.05677032470703, 207.00233459472656, 950.1227416992188, 335.95404052734375, 210.29644775390625, 494.10040283203125, 408.029052734375, 229.60447692871094, 936.60302734375, 5.790214538574219, 467.77557373046875, -135.37745666503906, 317.49334716796875, 147.10812377929688, 268.88214111328125, -133.94326782226562, 128.7852783203125, 415.957275390625, 541.5792846679688, -474.40655517578125, 460.21923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000567.npy"}
{"epoch": 0.8325991189427313, "step": 568, "batch_size": 64, "mean": 504.41290283203125, "std": 578.3226318359375, "min": -812.0108032226562, "p10": -168.02238922119133, "median": 412.47467041015625, "p90": 1185.023425292969, "max": 2041.5107421875, "pos_frac": 0.859375, "sample": [755.1221313476562, -812.0108032226562, 1290.1727294921875, 984.33349609375, 498.637451171875, 426.1957092285156, 758.6397094726562, 796.1929931640625, 398.7536315917969, -805.4494018554688, -390.87713623046875, -198.99801635742188, 666.4249267578125, 1969.647705078125, 243.30767822265625, 518.980224609375, 229.708251953125, 190.18902587890625, 229.9127960205078, -277.4935302734375, 621.9708862304688, 383.6717834472656, 966.3316650390625, 1088.9674072265625, 616.4227294921875, 273.8141174316406, 121.17391204833984, 2041.5107421875, 242.54129028320312, 448.5699768066406, 929.2298583984375, 811.885986328125, 1127.4189453125, 621.561279296875, 1647.8695068359375, 374.8484802246094, 175.00315856933594, 689.6133422851562, 278.0769348144531, 970.5525512695312, 1031.244873046875, 26.65127944946289, 804.6077880859375, 910.2531127929688, -1.6413726806640625, 352.34246826171875, 153.45236206054688, -95.74592590332031, 336.4160461425781, 1209.7110595703125, 89.44173431396484, 1354.1754150390625, 370.06182861328125, -676.7625122070312, 232.01507568359375, -410.89013671875, 1363.250244140625, 1102.30322265625, 270.0842590332031, 633.737548828125, 194.906982421875, 789.984619140625, 195.8907470703125, 144.5127410888672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000568.npy"}
{"epoch": 0.8340675477239354, "step": 569, "batch_size": 64, "mean": 397.50946044921875, "std": 765.1259155273438, "min": -2427.48046875, "p10": -336.70230407714837, "median": 322.66209411621094, "p90": 1378.3633911132813, "max": 2235.118896484375, "pos_frac": 0.78125, "sample": [-706.6355590820312, 142.53115844726562, 2000.727294921875, 182.26760864257812, 443.071044921875, -219.83934020996094, 697.04931640625, 113.88931274414062, -603.8681030273438, 179.87620544433594, 789.5076293945312, 1396.2452392578125, 512.91064453125, 583.5173950195312, 383.8172607421875, -594.9176025390625, 267.127685546875, 930.2063598632812, -275.31060791015625, -135.99856567382812, -1333.231689453125, 1749.05712890625, 89.85940551757812, -234.52337646484375, 5.508150100708008, 703.26953125, 1047.1656494140625, 24.973434448242188, 36.666351318359375, 107.72247314453125, 1392.6689453125, 962.79638671875, 737.4940185546875, -48.012939453125, 1498.1580810546875, 398.44281005859375, 122.95068359375, 352.6907653808594, 776.0863037109375, -2427.48046875, 584.6375732421875, 217.86013793945312, -78.72052001953125, 1206.0513916015625, 934.637939453125, 129.53282165527344, 857.7034301757812, 187.34788513183594, -363.0130310058594, 836.706787109375, -506.7860107421875, 1981.290771484375, 12.061454772949219, 548.4763793945312, 1074.20703125, -246.174560546875, 131.11737060546875, 750.4053955078125, 292.6334228515625, 76.99652099609375, 582.7365112304688, 602.3583984375, 1344.9837646484375, 2235.118896484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000569.npy"}
{"epoch": 0.8355359765051396, "step": 570, "batch_size": 64, "mean": 382.88421630859375, "std": 648.6878051757812, "min": -1059.5238037109375, "p10": -299.8995819091797, "median": 295.6323699951172, "p90": 1306.945727539063, "max": 2897.11669921875, "pos_frac": 0.75, "sample": [282.41168212890625, 1106.266357421875, 440.63134765625, 1698.9498291015625, 2897.11669921875, 171.3791046142578, 760.3261108398438, 1429.985595703125, 1624.24658203125, 305.84375, 377.00494384765625, 964.0786743164062, 285.4209899902344, 533.2833251953125, 125.3399887084961, 399.78070068359375, 921.883056640625, 489.69390869140625, 709.832275390625, -287.576416015625, 804.5797729492188, -305.1809387207031, -599.8438720703125, -773.7577514648438, 129.48643493652344, 850.0039672851562, -663.1452026367188, -318.2352600097656, 250.90093994140625, 384.05010986328125, 501.9766540527344, 279.7181396484375, -200.33892822265625, -532.0502319335938, 226.38858032226562, 317.47662353515625, 244.7818603515625, 455.87518310546875, 106.66155242919922, 1354.56787109375, 475.805419921875, -201.900390625, 323.8370056152344, 171.16986083984375, 1420.673828125, 306.8878173828125, 169.7719268798828, 746.5575561523438, 1199.93408203125, 714.76171875, -7.291095733642578, 215.38131713867188, 565.7052001953125, 393.6519775390625, -80.45240020751953, -1059.5238037109375, 209.60009765625, -20.627365112304688, 228.2654571533203, -122.88601684570312, -211.54690551757812, 1352.807861328125, 162.66726684570312, -198.4726104736328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000570.npy"}
{"epoch": 0.8370044052863436, "step": 571, "batch_size": 64, "mean": 448.14007568359375, "std": 725.8352661132812, "min": -1490.63427734375, "p10": -460.2388610839844, "median": 424.2345886230469, "p90": 1425.7176879882816, "max": 2242.4765625, "pos_frac": 0.765625, "sample": [569.5404663085938, 880.2353515625, 79.93794250488281, 880.9571533203125, -253.67141723632812, 610.148681640625, 69.49629211425781, 1049.98486328125, 321.642333984375, 1184.1077880859375, 2014.96484375, -462.5851745605469, -299.1931457519531, 1824.595458984375, 218.2018280029297, 554.1615600585938, 820.592041015625, 795.3182983398438, 509.8592224121094, 879.5662231445312, 2242.4765625, 1834.55712890625, 725.6552124023438, 1272.9736328125, -454.7641296386719, -31.999513626098633, 753.9453125, 364.90350341796875, 1508.3099365234375, 1183.8116455078125, 162.67396545410156, -99.41265869140625, 310.0647277832031, 1745.168212890625, 857.309814453125, -52.5690803527832, -669.15966796875, 148.97225952148438, 1350.5147705078125, 604.4927978515625, 928.7197875976562, -537.0160522460938, 1457.947509765625, 597.9905395507812, 483.565673828125, 76.42993927001953, 545.681640625, 146.44801330566406, 660.1968383789062, 49.43559265136719, -565.84326171875, -1490.63427734375, 295.95758056640625, 59.5478515625, -729.8971557617188, 363.673095703125, 249.8347625732422, 527.8758544921875, -901.4287109375, 21.598003387451172, -274.1914367675781, 610.2000122070312, -34.442352294921875, 133.529541015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000571.npy"}
{"epoch": 0.8384728340675477, "step": 572, "batch_size": 64, "mean": 471.08837890625, "std": 817.552490234375, "min": -1480.3902587890625, "p10": -350.2122406005859, "median": 399.5254669189453, "p90": 1503.9408081054694, "max": 3280.940673828125, "pos_frac": 0.6875, "sample": [-368.1257629394531, -90.33741760253906, -397.54156494140625, 542.017333984375, 1975.5714111328125, -140.91119384765625, 25.549224853515625, 30.51184844970703, -15.95071029663086, 154.37142944335938, -85.9798355102539, 1358.699951171875, 973.62255859375, 403.4893798828125, 689.9518432617188, 1080.2672119140625, 1043.58154296875, 1236.1214599609375, 1566.1868896484375, 641.3079833984375, 273.90234375, 727.7666015625, -44.9660530090332, 1141.8310546875, -1112.5194091796875, 64.7640609741211, 427.3343505859375, -329.78192138671875, -274.4638977050781, 847.630126953125, -648.896240234375, 565.5751342773438, 468.2835693359375, -70.52963256835938, 251.94375610351562, 3280.940673828125, 75.06700897216797, 395.5615539550781, -333.54534912109375, -663.6537475585938, 1727.0631103515625, 35.50437545776367, -326.5196533203125, 637.1554565429688, -289.18511962890625, -357.3551940917969, 1131.5374755859375, 883.99560546875, 657.2610473632812, -12.829658508300781, 947.9970703125, 340.52825927734375, 389.6197204589844, 2207.548095703125, 302.80340576171875, 497.6719055175781, 698.0194091796875, 2207.918212890625, -25.47906494140625, -1480.3902587890625, 667.7810668945312, 1273.697998046875, 1728.3150634765625, 642.3505249023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000572.npy"}
{"epoch": 0.8399412628487518, "step": 573, "batch_size": 64, "mean": 415.822021484375, "std": 599.5905151367188, "min": -598.7072143554688, "p10": -189.5795715332031, "median": 298.1593475341797, "p90": 1034.731298828125, "max": 2606.61328125, "pos_frac": 0.828125, "sample": [660.2565307617188, -124.13973999023438, 69.87752532958984, 9.32061767578125, 17.366439819335938, 2606.61328125, 928.2606811523438, -162.392578125, 102.22754669189453, 29.436080932617188, 468.8681335449219, 230.42738342285156, -598.7072143554688, 2185.6943359375, 412.5545654296875, 498.1776123046875, 1914.7237548828125, -529.6261596679688, 17.45899200439453, -201.23114013671875, -589.3811645507812, 1282.948486328125, 781.5880126953125, 599.3650512695312, 69.70270538330078, 308.28851318359375, 892.7360229492188, 1147.97314453125, 135.805419921875, 1207.1820068359375, -278.3477783203125, 976.8920288085938, 288.0301818847656, 585.6357421875, 659.9937744140625, -355.1209411621094, 332.2418212890625, 63.1512451171875, 793.550048828125, 29.45514678955078, 907.2352294921875, 367.3260803222656, 156.25848388671875, -98.65823364257812, 458.9739074707031, 69.0064697265625, 280.77691650390625, 61.99968338012695, 896.9473876953125, -296.07635498046875, 458.8468017578125, 227.77828979492188, 231.26153564453125, 739.8345336914062, 394.54290771484375, 91.05950164794922, -109.4793930053711, 998.5816650390625, 565.856689453125, 591.4617919921875, 167.533203125, 810.0217895507812, 1050.2239990234375, 124.43940734863281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000573.npy"}
{"epoch": 0.8414096916299559, "step": 574, "batch_size": 64, "mean": 454.5906982421875, "std": 620.6673583984375, "min": -1040.6273193359375, "p10": -338.6295257568359, "median": 431.1173553466797, "p90": 1389.715942382813, "max": 2017.048583984375, "pos_frac": 0.796875, "sample": [284.3877258300781, 737.0169067382812, -134.05160522460938, -287.3277587890625, 1286.755615234375, 126.16399383544922, 127.77606201171875, 1433.841796875, 1042.621826171875, 803.6371459960938, 52.34734344482422, 1523.0718994140625, 1039.497314453125, 868.1448974609375, 113.42599487304688, 723.3200073242188, 42.57122039794922, 430.90625, -648.7389526367188, 2017.048583984375, 559.2005004882812, 770.249267578125, 539.9662475585938, 674.2271118164062, 236.0748291015625, 740.9920043945312, 431.3284606933594, -411.71832275390625, 147.44993591308594, -292.221435546875, 164.49148559570312, -544.3040161132812, 209.65203857421875, 1632.794189453125, -60.90216827392578, 794.5595703125, -481.6232604980469, 885.9781494140625, 229.85752868652344, 536.9263916015625, 1006.6292114257812, -381.1444091796875, 726.6768798828125, 814.9371337890625, 743.1978759765625, 476.6266174316406, 511.28533935546875, 14.828964233398438, -1040.6273193359375, -358.5187072753906, 327.2074890136719, 95.61315155029297, 68.17414855957031, 1574.3466796875, -37.129669189453125, 888.4241943359375, 360.5162048339844, 1554.3858642578125, 254.6190643310547, 683.9881591796875, 143.18934631347656, 1781.87646484375, 543.6589965820312, -4.350942611694336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000574.npy"}
{"epoch": 0.8428781204111601, "step": 575, "batch_size": 64, "mean": 417.9283447265625, "std": 741.8092041015625, "min": -1368.513671875, "p10": -374.722476196289, "median": 408.4753723144531, "p90": 1372.3492431640632, "max": 2464.44091796875, "pos_frac": 0.75, "sample": [1447.9796142578125, 229.8468780517578, 928.2656860351562, 378.79229736328125, 582.6873779296875, -1105.9317626953125, 554.326171875, 531.2344970703125, 215.17123413085938, 148.30711364746094, 85.45783233642578, -46.465065002441406, 635.502197265625, 1195.7664794921875, 680.5100708007812, 336.5994567871094, -133.05612182617188, 781.4992065429688, 302.6936950683594, 322.95843505859375, 395.7004699707031, -39.379150390625, 1481.339111328125, 675.2271728515625, 600.0661010742188, 330.5903015136719, -1368.513671875, 315.3862609863281, -767.0869750976562, 613.4768676757812, -1003.6361083984375, 1188.1494140625, 912.9484252929688, 1504.225341796875, 256.25567626953125, -242.55491638183594, -292.7073974609375, -125.93779754638672, 1436.0985107421875, 1049.458251953125, -275.630126953125, 228.29693603515625, -409.8717956542969, 1044.5465087890625, 1223.6009521484375, -876.6025390625, 501.7238464355469, 498.53680419921875, 827.5390625, 2464.44091796875, 522.7715454101562, 952.7694091796875, 421.2502746582031, -11.3623046875, 1608.4781494140625, 151.82833862304688, 580.9606323242188, -1321.3267822265625, 2197.779052734375, 86.39070129394531, -88.43578338623047, 655.67138671875, 141.45555114746094, 631.351318359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000575.npy"}
{"epoch": 0.8443465491923642, "step": 576, "batch_size": 64, "mean": 312.60577392578125, "std": 585.5676879882812, "min": -941.4209594726562, "p10": -381.8668823242187, "median": 229.1067886352539, "p90": 1000.8891540527344, "max": 1718.741943359375, "pos_frac": 0.75, "sample": [186.9087677001953, 974.8387451171875, -152.12281799316406, 35.391788482666016, -348.99371337890625, -150.34461975097656, -183.37339782714844, 1681.2620849609375, 941.3547973632812, 162.86944580078125, 288.2629699707031, 1666.620361328125, 111.35430908203125, 93.49275207519531, 791.5184936523438, 381.56414794921875, 501.5284118652344, -831.1246948242188, 275.3824462890625, 608.3364868164062, 529.1174926757812, 1074.2882080078125, 604.573486328125, 823.5572509765625, 7.343303680419922, 1441.0712890625, -97.92399597167969, -401.4534912109375, -343.0337219238281, 801.4371948242188, -153.3014373779297, 247.1863555908203, 37.356964111328125, 37.454551696777344, 1012.2481689453125, 593.3079833984375, 1718.741943359375, 349.66436767578125, 211.0272216796875, 760.6074829101562, 61.51233673095703, 840.9996337890625, 201.13534545898438, 307.2689208984375, 188.67410278320312, -897.7244262695312, -58.58198547363281, 686.96435546875, -676.5076904296875, 663.119140625, 396.4241943359375, 1005.4310302734375, 990.2914428710938, 897.1602172851562, -941.4209594726562, -404.7921447753906, 160.87078857421875, 252.20510864257812, 308.0601501464844, 194.392333984375, -395.95538330078125, 123.34164428710938, -322.51385498046875, 138.41799926757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000576.npy"}
{"epoch": 0.8458149779735683, "step": 577, "batch_size": 64, "mean": 408.823486328125, "std": 662.3851318359375, "min": -1499.9072265625, "p10": -124.45094223022457, "median": 341.04017639160156, "p90": 1363.1252197265626, "max": 2462.689208984375, "pos_frac": 0.828125, "sample": [-1499.9072265625, 2462.689208984375, 1356.7520751953125, 1559.880615234375, 179.648681640625, 29.347213745117188, 340.6792297363281, 132.5297088623047, 160.446533203125, -65.44027709960938, 698.431396484375, 352.1156921386719, 587.1713256835938, 437.33221435546875, 445.475341796875, 765.396728515625, 1027.617919921875, 1365.8565673828125, -586.9407958984375, 624.9016723632812, 173.26051330566406, 85.99259185791016, -1034.5810546875, 229.9910888671875, 287.30413818359375, 793.3765869140625, 141.2322235107422, 233.82798767089844, 469.2237243652344, 408.5535888671875, 168.04281616210938, 479.2745361328125, 264.10052490234375, -92.78289031982422, 1814.8912353515625, 49.69488525390625, 431.77545166015625, -743.08056640625, 242.4704132080078, -395.5321044921875, 343.8830871582031, 183.57992553710938, 341.401123046875, 538.9924926757812, -16.06720733642578, 592.44921875, 62.62693786621094, 640.529052734375, 834.3929443359375, 1119.589599609375, 78.15556335449219, 327.3849182128906, 1467.4295654296875, -138.02296447753906, 1773.39501953125, 501.24151611328125, 477.42193603515625, -707.2640380859375, 98.20204162597656, 1599.4375, -39.19392395019531, 983.7018432617188, 387.1145935058594, 333.3045654296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000577.npy"}
{"epoch": 0.8472834067547724, "step": 578, "batch_size": 64, "mean": 450.94769287109375, "std": 584.2476196289062, "min": -898.1187744140625, "p10": -156.15890197753905, "median": 373.345703125, "p90": 1169.9358398437505, "max": 2203.907470703125, "pos_frac": 0.78125, "sample": [2203.907470703125, 224.67822265625, 788.593017578125, 101.55029296875, 552.2866821289062, 234.11822509765625, 1346.124267578125, 470.1529235839844, 215.86593627929688, -138.0391082763672, 139.40354919433594, 908.2766723632812, 488.7769775390625, 241.83987426757812, 93.82962036132812, -576.1928100585938, 109.28306579589844, -0.44077301025390625, 1908.08544921875, 834.89013671875, 690.3517456054688, 386.7955322265625, 1017.8804321289062, -141.1174774169922, -9.010719299316406, 880.1886596679688, 818.5576782226562, -110.07453155517578, -343.1359558105469, 126.83881378173828, -24.947982788085938, -380.3780517578125, 359.8958740234375, 416.1894226074219, 105.40516662597656, 540.2588500976562, -183.64404296875, 311.328369140625, -898.1187744140625, -149.0643310546875, 528.782958984375, 728.927978515625, 566.3228149414062, 454.4316711425781, 1341.2762451171875, 1015.5230102539062, 346.93975830078125, 73.72419738769531, 847.6297607421875, 185.64646911621094, 890.748779296875, 784.2390747070312, 59.67250442504883, 1226.5572509765625, 933.2139892578125, 1744.664306640625, -159.19943237304688, 248.90916442871094, 1492.7152099609375, 1037.8192138671875, 578.49267578125, -322.36346435546875, 128.9239044189453, 565.8656616210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000578.npy"}
{"epoch": 0.8487518355359766, "step": 579, "batch_size": 64, "mean": 263.0865173339844, "std": 618.6154174804688, "min": -1730.3099365234375, "p10": -439.40496826171864, "median": 305.1524658203125, "p90": 1003.3020446777346, "max": 2289.741455078125, "pos_frac": 0.71875, "sample": [-61.601356506347656, -200.99229431152344, -1730.3099365234375, 564.20849609375, -71.4853515625, 117.84762573242188, 868.9215698242188, 364.8151550292969, 685.4552612304688, 406.6587219238281, 1.3073654174804688, -148.48240661621094, -751.1778564453125, 327.6142578125, 231.15347290039062, 662.558349609375, 211.6730194091797, 530.39404296875, 1130.892822265625, 212.16575622558594, 1027.2747802734375, 93.63800811767578, 322.37939453125, 507.95941162109375, 7.916738510131836, 467.7269287109375, 345.85888671875, 45.24414825439453, 576.9569091796875, 248.5844268798828, 562.0447998046875, -486.85748291015625, 609.7721557617188, 659.9560546875, 2289.741455078125, 1446.671630859375, 395.6932678222656, -789.060302734375, -50.48060989379883, 537.3689575195312, 1111.0233154296875, -270.3540954589844, 727.6241455078125, -328.68243408203125, 1231.8380126953125, 33.66960144042969, -38.17305374145508, -161.20753479003906, -567.6207275390625, 316.4254150390625, 947.3656616210938, 482.81463623046875, -92.65547180175781, 616.8956909179688, 354.6131896972656, -979.2010498046875, 1255.57568359375, 28.241331100463867, 293.8795166015625, -900.5361938476562, 173.7957000732422, -194.9984130859375, 283.93499755859375, 343.26336669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000579.npy"}
{"epoch": 0.8502202643171806, "step": 580, "batch_size": 64, "mean": 508.994140625, "std": 712.4046020507812, "min": -1241.568359375, "p10": -97.60138778686523, "median": 360.2447052001953, "p90": 1314.2037231445313, "max": 3031.452392578125, "pos_frac": 0.796875, "sample": [-194.0856170654297, 1016.2464599609375, 246.75376892089844, 591.149169921875, -140.21246337890625, 96.85736846923828, 185.3096160888672, 2104.3583984375, 459.1296691894531, 301.8375549316406, 420.8721618652344, -48.41650390625, 3031.452392578125, 243.54226684570312, 65.29202270507812, 693.54833984375, 393.94287109375, 93.59941101074219, 856.256591796875, 501.5244140625, 282.4189453125, 1005.7210693359375, 1112.4453125, 496.5337829589844, 554.9393310546875, 11.128242492675781, 1200.0426025390625, 1297.8729248046875, 551.682373046875, 144.3387451171875, -1.5579605102539062, 150.3757781982422, 102.7138900756836, 134.57630920410156, 807.9853515625, 1321.20263671875, 34.735076904296875, -104.56339263916016, 837.35791015625, 805.8763427734375, 1958.7908935546875, 9.697675704956055, -1241.568359375, 1071.5869140625, -56.59552764892578, 769.5264282226562, -430.9908752441406, -95.3839111328125, 621.835205078125, 2805.66015625, 48.95439147949219, -66.60235595703125, 1487.9071044921875, 132.26382446289062, 422.915771484375, 391.3563232421875, -98.5517349243164, 329.1330871582031, -6.433874130249023, -294.5851135253906, 459.9810791015625, 984.9277954101562, 1396.93603515625, 310.08209228515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000580.npy"}
{"epoch": 0.8516886930983847, "step": 581, "batch_size": 64, "mean": 357.78289794921875, "std": 637.6695556640625, "min": -875.061279296875, "p10": -495.1543975830078, "median": 370.118896484375, "p90": 1024.33544921875, "max": 2957.865234375, "pos_frac": 0.734375, "sample": [144.10861206054688, 112.58351135253906, 165.84768676757812, -113.98545837402344, 389.7462158203125, 430.6314392089844, -527.4578247070312, 142.52471923828125, 1017.5311279296875, -621.5623779296875, 772.1749877929688, 800.8275146484375, 573.5271606445312, 363.26129150390625, 655.4034423828125, -461.9369201660156, 1445.89013671875, 902.6315307617188, 508.12762451171875, 1364.5706787109375, -204.1852264404297, 639.0792846679688, 83.1114501953125, 610.8177490234375, 618.5638427734375, 492.0337829589844, 330.25518798828125, 225.60577392578125, -25.623046875, 1027.2515869140625, -291.29461669921875, 2957.865234375, -55.550540924072266, 1549.6943359375, 261.02947998046875, 545.0087890625, 899.1407470703125, -115.87577819824219, 87.76459503173828, 452.195068359375, 377.884765625, 84.44656372070312, -51.11187744140625, -296.47479248046875, 1204.6810302734375, 269.23846435546875, 614.5096435546875, 890.5457153320312, 1386.0692138671875, 582.288818359375, -637.025634765625, -875.061279296875, 55.8504753112793, -500.6708068847656, 696.978271484375, 394.22943115234375, -557.9682006835938, -547.3848876953125, 45.13201904296875, -482.28277587890625, 307.9767150878906, 896.4732666015625, 376.97650146484375, 511.4715270996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000581.npy"}
{"epoch": 0.8531571218795888, "step": 582, "batch_size": 64, "mean": 402.342041015625, "std": 586.1719970703125, "min": -1099.01513671875, "p10": -236.53863067626952, "median": 305.73118591308594, "p90": 1183.6563720703125, "max": 1782.0697021484375, "pos_frac": 0.75, "sample": [-1099.01513671875, 205.26031494140625, 1282.9835205078125, 305.2232666015625, 562.2835693359375, -225.17633056640625, 285.48583984375, 462.66375732421875, 500.0703125, -71.91061401367188, 360.57781982421875, 85.25090789794922, -32.71086120605469, 768.04345703125, 924.1218872070312, 1507.737548828125, -60.64100646972656, -13.208480834960938, -302.31927490234375, 1250.587646484375, 409.37738037109375, 82.90248107910156, 621.5330200195312, -627.046875, -292.0535888671875, 945.9688720703125, 174.26150512695312, 1156.0068359375, 1557.019775390625, 1782.0697021484375, 882.975341796875, 351.0236511230469, 1156.5484619140625, 2.6747055053710938, -420.25244140625, 133.32553100585938, 22.488197326660156, 181.49488830566406, 669.4573974609375, 649.048828125, 362.3592834472656, -219.2238006591797, 948.7335815429688, 1056.8702392578125, -0.6829681396484375, 1110.462646484375, 2.7192001342773438, 766.8748168945312, -241.40818786621094, 254.4235382080078, -19.537303924560547, 772.654541015625, 1195.2740478515625, 300.83270263671875, -40.527130126953125, -977.2299194335938, 250.13272094726562, 306.2391052246094, 823.0663452148438, 655.8043823242188, 1260.57568359375, 221.49037170410156, 754.5632934570312, 71.29068756103516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000582.npy"}
{"epoch": 0.8546255506607929, "step": 583, "batch_size": 64, "mean": 243.46913146972656, "std": 683.2799072265625, "min": -1408.2015380859375, "p10": -679.816845703125, "median": 244.92453002929688, "p90": 979.7735351562501, "max": 2107.381103515625, "pos_frac": 0.734375, "sample": [77.47109985351562, 270.42901611328125, 265.1294250488281, -711.7911987304688, 949.075927734375, 22.340469360351562, 962.4983520507812, 267.5736389160156, 2107.381103515625, 147.34506225585938, -215.17144775390625, 37.77760314941406, -907.6898803710938, 331.1378173828125, 177.7437744140625, -251.85342407226562, 938.9865112304688, 939.4381103515625, 391.3907165527344, 1078.2335205078125, 510.82568359375, 66.3387222290039, -605.2100219726562, -398.21923828125, -131.01153564453125, -992.4427490234375, 371.8388671875, 204.4229736328125, -564.7503662109375, -819.5359497070312, -127.30577087402344, -183.97799682617188, 581.38671875, 595.8406982421875, 349.7041931152344, 2038.7728271484375, 408.1026611328125, 12.372749328613281, 987.1771850585938, 242.95651245117188, 5.71160888671875, 358.5417175292969, 375.1145935058594, -96.84085083007812, -1066.908447265625, -1408.2015380859375, 1641.59375, 441.95867919921875, -416.8911437988281, 775.46435546875, 943.0341796875, 1345.8773193359375, 143.18426513671875, 518.021728515625, 899.48828125, -713.9583129882812, 47.47846603393555, 483.0345153808594, 55.00946044921875, 246.89254760742188, 1253.7086181640625, 31.25933837890625, 36.61219024658203, 258.1060485839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000583.npy"}
{"epoch": 0.856093979441997, "step": 584, "batch_size": 64, "mean": 499.4644775390625, "std": 611.7685546875, "min": -666.3199462890625, "p10": -150.92644424438473, "median": 343.244873046875, "p90": 1293.231433105469, "max": 2544.496337890625, "pos_frac": 0.8125, "sample": [563.158203125, 235.62718200683594, -172.04197692871094, 256.9716491699219, -5.535888671875, -372.85198974609375, 598.8975219726562, 512.101806640625, 1174.642333984375, 71.75172424316406, 25.39776611328125, -101.65686798095703, 114.27435302734375, 203.5418701171875, 398.58502197265625, 1124.7149658203125, -282.62884521484375, 1244.81103515625, 969.21728515625, -514.92724609375, 289.5560302734375, 592.6493530273438, -29.726272583007812, 265.56787109375, 375.09423828125, 282.1013488769531, 1341.23876953125, 214.23477172851562, 205.23861694335938, 250.334228515625, 1193.1351318359375, 534.8570556640625, 334.0116882324219, -666.3199462890625, 694.7509765625, 309.6708984375, 626.5694580078125, 515.5972900390625, 228.96676635742188, -98.86776733398438, 498.31829833984375, -235.25860595703125, 352.4780578613281, 1719.6922607421875, 884.162841796875, 308.24884033203125, 1119.63671875, 1220.21142578125, -252.32861328125, 2544.496337890625, 1937.7447509765625, 32.37528991699219, 1591.490478515625, 1313.9830322265625, 438.7679748535156, 1637.0509033203125, 251.0028533935547, 676.846435546875, -25.927452087402344, 284.9672546386719, 522.8021240234375, 375.49456787109375, 1128.91845703125, 137.8398895263672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000584.npy"}
{"epoch": 0.8575624082232012, "step": 585, "batch_size": 64, "mean": 450.8072204589844, "std": 753.0850830078125, "min": -1992.7523193359375, "p10": -357.9877807617187, "median": 445.75559997558594, "p90": 1356.4398681640625, "max": 2754.621826171875, "pos_frac": 0.75, "sample": [213.11740112304688, -136.3931121826172, 686.352294921875, 328.8426513671875, 302.143798828125, 2754.621826171875, 980.2073364257812, -620.2405395507812, 418.8590393066406, 287.466796875, 1099.0565185546875, -1.1809539794921875, 622.0863037109375, 666.1651611328125, 3.1285629272460938, 1567.4322509765625, 1504.949951171875, 613.27197265625, 1535.5020751953125, 540.9465942382812, 497.9163513183594, 1350.287353515625, 309.897705078125, 474.0883483886719, 1114.984619140625, -98.29493713378906, 785.5389404296875, 1069.155517578125, 672.9800415039062, 160.8409423828125, 68.23579406738281, 780.0758666992188, 1359.07666015625, 80.68115234375, 1104.5787353515625, 226.18634033203125, 968.0484619140625, -313.7217102050781, -44.08500671386719, 924.5231323242188, 398.26513671875, -1992.7523193359375, 920.1571655273438, -821.896484375, 1021.0156860351562, 491.75164794921875, 478.9012451171875, -500.4137878417969, 874.1608276367188, -229.13412475585938, -78.20574951171875, -1138.1396484375, -91.13499450683594, -376.9589538574219, 99.11122131347656, 1642.942626953125, 472.65216064453125, -551.6158447265625, 865.8162841796875, -32.870513916015625, 158.88487243652344, 152.46409606933594, 149.63307189941406, 2081.69873046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000585.npy"}
{"epoch": 0.8590308370044053, "step": 586, "batch_size": 64, "mean": 304.63677978515625, "std": 602.399658203125, "min": -1237.7825927734375, "p10": -380.4061401367187, "median": 349.5049133300781, "p90": 1015.1013610839847, "max": 1641.10986328125, "pos_frac": 0.703125, "sample": [1181.952392578125, -221.9962158203125, -305.142578125, 533.248046875, 590.166259765625, 423.43267822265625, -184.202880859375, 346.2283935546875, 278.5252380371094, -8.683082580566406, -621.9930419921875, 359.7737121582031, 815.7147827148438, 1472.900146484375, -67.2400131225586, -141.0023956298828, 646.9573364257812, -816.455078125, 271.0582275390625, 226.67063903808594, -150.3094024658203, 150.56192016601562, 1072.0009765625, 756.5150146484375, 938.5375366210938, 16.738174438476562, 661.7586059570312, -257.35675048828125, 77.50408172607422, 381.7283630371094, 731.654296875, -168.38961791992188, 345.32415771484375, -1237.7825927734375, 728.653564453125, 793.160400390625, 1641.10986328125, -399.540771484375, 581.6145629882812, -154.1529541015625, 924.828369140625, -442.170166015625, 479.4656982421875, 422.764404296875, -1108.6553955078125, -831.3367919921875, 386.6792297363281, 409.75543212890625, 142.64488220214844, 579.3419799804688, -335.7586669921875, 383.63787841796875, 1047.9144287109375, 1510.977294921875, 867.0413208007812, 247.1974334716797, 424.1094970703125, 1625.746337890625, 110.81392669677734, -108.80884552001953, 230.162353515625, 740.7211303710938, 352.78143310546875, 147.65963745117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000586.npy"}
{"epoch": 0.8604992657856094, "step": 587, "batch_size": 64, "mean": 406.54833984375, "std": 578.6703491210938, "min": -1460.68017578125, "p10": -183.46811523437498, "median": 372.3928527832031, "p90": 1300.7094238281256, "max": 2010.009765625, "pos_frac": 0.765625, "sample": [567.5638427734375, 950.0826416015625, 425.83062744140625, 81.2901611328125, 328.1465148925781, 459.1860656738281, 593.2048950195312, -77.91825866699219, -0.8299636840820312, 1501.7686767578125, -386.6271057128906, -111.43856811523438, 2010.009765625, 340.6881408691406, 240.87411499023438, 486.789306640625, -30.185546875, 394.60638427734375, 417.8821105957031, 394.449462890625, -155.11988830566406, 588.54638671875, 1461.383056640625, 19.06804084777832, 350.33624267578125, 1169.643310546875, -195.6173553466797, 996.8670043945312, 686.443603515625, -10.329360961914062, 128.7100830078125, 60.07640838623047, -561.4851684570312, 55.119293212890625, 707.9661254882812, 119.65458679199219, 245.97552490234375, -1460.68017578125, 919.851318359375, 135.76539611816406, -329.0933837890625, 827.9232177734375, 149.7838134765625, 756.9219970703125, -47.95127868652344, 1369.42431640625, 652.7261962890625, -246.13414001464844, 214.38613891601562, 328.51690673828125, 462.63531494140625, 1558.060791015625, 442.2733154296875, 655.113525390625, 518.769775390625, 590.7099609375, 948.235595703125, -200.99234008789062, 324.44403076171875, 20.553325653076172, 412.4192810058594, -107.846435546875, 1513.7862548828125, 1356.880615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000587.npy"}
{"epoch": 0.8619676945668135, "step": 588, "batch_size": 64, "mean": 351.92852783203125, "std": 518.3449096679688, "min": -914.514404296875, "p10": -258.3384338378906, "median": 315.6816864013672, "p90": 1065.5239013671876, "max": 1454.796142578125, "pos_frac": 0.765625, "sample": [138.97315979003906, 553.7627563476562, -212.28634643554688, 146.01580810546875, 43.98252868652344, 662.8480224609375, 717.4118041992188, 27.108030319213867, 758.360107421875, 743.812255859375, 128.60003662109375, -409.5875244140625, -33.87554931640625, 554.7478637695312, -278.0750427246094, -419.2943420410156, 827.7308349609375, 534.465087890625, 226.09097290039062, 259.1846618652344, 659.0474853515625, -152.03005981445312, -173.4718475341797, 1218.134765625, 132.32728576660156, 530.5897827148438, 1351.6451416015625, -105.2264404296875, -484.28662109375, 34.941307067871094, 288.4974670410156, 90.85227966308594, 289.51043701171875, 1076.666748046875, 951.8074340820312, 500.15631103515625, 171.06361389160156, -201.70913696289062, 1294.9337158203125, 1039.52392578125, 1279.7083740234375, 680.1704711914062, 40.572662353515625, -570.7753295898438, 671.4804077148438, 427.0176086425781, 178.7655487060547, 986.5147705078125, -914.514404296875, 376.6046142578125, -17.45046615600586, 1454.796142578125, 1190.750244140625, 416.513427734375, 464.578857421875, 599.627685546875, 646.204345703125, 374.209716796875, -176.6820526123047, 182.62945556640625, 9.196006774902344, 341.8529357910156, 906.736328125, -508.03179931640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000588.npy"}
{"epoch": 0.8634361233480177, "step": 589, "batch_size": 64, "mean": 469.1645202636719, "std": 575.0010986328125, "min": -784.8447265625, "p10": -229.50870056152337, "median": 413.39173889160156, "p90": 1344.613232421875, "max": 2031.087646484375, "pos_frac": 0.84375, "sample": [1587.1064453125, 342.0052795410156, 394.5111083984375, 56.10856246948242, 686.4464111328125, 148.59767150878906, -83.53669738769531, 630.5589599609375, 409.49969482421875, -364.36395263671875, -22.198867797851562, 417.2837829589844, 84.2511215209961, 277.05035400390625, 692.458251953125, 1338.8165283203125, 2031.087646484375, 127.70600128173828, 563.5803833007812, 199.97702026367188, 465.8485107421875, 229.51806640625, 167.58534240722656, 1478.5291748046875, 56.65826416015625, 561.2425537109375, 391.1791076660156, 723.9389038085938, 1796.57666015625, -166.51397705078125, 589.7932739257812, 645.163330078125, 849.9027709960938, 522.343505859375, 1161.83984375, -256.5064392089844, -318.94317626953125, 127.43280792236328, 763.8359985351562, 142.98495483398438, 526.4652709960938, -592.1400756835938, 88.49636840820312, 781.8953857421875, 619.3592529296875, 549.1571044921875, 1118.78369140625, 260.03973388671875, 595.863525390625, 1459.7198486328125, 589.838623046875, 237.07601928710938, 372.2135009765625, -784.8447265625, 346.552490234375, -648.7750854492188, 603.7786865234375, 1403.12744140625, 32.58363342285156, 303.4878234863281, 1347.0975341796875, 703.9378051757812, -442.7946472167969, 1106.25341796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000589.npy"}
{"epoch": 0.8649045521292217, "step": 590, "batch_size": 64, "mean": 457.81378173828125, "std": 602.9258422851562, "min": -484.70989990234375, "p10": -263.5567901611328, "median": 347.11749267578125, "p90": 1251.3738769531253, "max": 2191.18359375, "pos_frac": 0.75, "sample": [579.984130859375, 1629.9434814453125, 801.1719970703125, 271.22021484375, 1022.7948608398438, -363.55670166015625, 155.24151611328125, 2191.18359375, 982.7383422851562, -255.37167358398438, 608.1804809570312, -305.82611083984375, 368.9388427734375, 1464.4859619140625, -140.08074951171875, -74.48664855957031, -246.17971801757812, 125.82959747314453, 818.6785888671875, 229.25015258789062, 74.6275634765625, -440.31915283203125, -137.34188842773438, 307.54852294921875, -105.87652587890625, 180.84783935546875, 823.414306640625, -316.1607666015625, 1350.0955810546875, -125.98865509033203, 1086.7169189453125, 483.2808837890625, 37.853355407714844, 1452.11181640625, 206.42977905273438, 394.7126159667969, 337.18414306640625, 678.828857421875, 1190.860107421875, 571.0563354492188, 550.0891723632812, -267.064697265625, -126.57736206054688, -484.70989990234375, 357.05084228515625, 67.20826721191406, 178.0884552001953, 957.2777709960938, 268.90887451171875, -404.40802001953125, 440.6595764160156, 1277.308349609375, 289.32012939453125, -112.83019256591797, 622.7159423828125, 699.1708374023438, 907.859375, 2017.36083984375, 127.8267593383789, 1034.9212646484375, 1088.6885986328125, 924.1041259765625, 243.00465393066406, 730.0874633789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000590.npy"}
{"epoch": 0.8663729809104258, "step": 591, "batch_size": 64, "mean": 415.48028564453125, "std": 583.6268310546875, "min": -1075.7181396484375, "p10": -363.5605529785156, "median": 402.725830078125, "p90": 1173.308435058594, "max": 1701.44580078125, "pos_frac": 0.828125, "sample": [-16.49425506591797, 1188.302734375, 1433.83154296875, 649.0939331054688, 1000.1446533203125, -81.32273864746094, 20.467878341674805, -1075.7181396484375, 499.56634521484375, 1701.44580078125, 990.9373779296875, 147.7728271484375, 618.699462890625, 382.2509765625, 54.01971435546875, 1700.1197509765625, 801.9869995117188, 631.1743774414062, 125.5343017578125, 284.82537841796875, -108.87001037597656, 981.4696655273438, 189.30775451660156, 79.1557846069336, 571.7203979492188, 157.87814331054688, -705.1505737304688, 589.111572265625, 617.73291015625, 395.33380126953125, -316.45269775390625, 147.7841796875, 410.11785888671875, -639.0439453125, 1147.236083984375, 216.7899627685547, 178.79043579101562, 244.5757598876953, 690.4317626953125, -383.7496337890625, -387.9815673828125, 815.1867065429688, 186.1210174560547, 410.2183532714844, -596.975830078125, 1024.5177001953125, 854.092529296875, 95.07806396484375, 657.1354370117188, 1053.0689697265625, 13.256431579589844, 1184.4822998046875, 1013.4503784179688, 114.59112548828125, 1298.674560546875, 474.4648132324219, 539.7994384765625, 1325.061279296875, -803.3141479492188, 655.3806762695312, 142.5496063232422, 443.63262939453125, 318.32147216796875, 239.1168212890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000591.npy"}
{"epoch": 0.8678414096916299, "step": 592, "batch_size": 64, "mean": 380.2485656738281, "std": 752.1707153320312, "min": -1387.4378662109375, "p10": -557.9162475585937, "median": 378.18873596191406, "p90": 1309.4586547851563, "max": 2099.388427734375, "pos_frac": 0.734375, "sample": [382.5086669921875, 1266.1505126953125, -103.38174438476562, 433.04327392578125, -910.9908447265625, 169.2821807861328, 534.9540405273438, 1078.5299072265625, 1721.553466796875, 1656.3973388671875, -928.4586791992188, 2099.388427734375, 258.3094177246094, 822.1322021484375, -1032.359619140625, -500.4353942871094, -171.061767578125, 338.4832458496094, 151.8822021484375, 1339.5516357421875, 107.07062530517578, 1160.201904296875, -26.036949157714844, 1184.53173828125, 373.8688049316406, 114.75247192382812, 163.32666015625, 1035.6337890625, 1581.03955078125, 602.8519897460938, 69.61062622070312, -330.2681884765625, 175.23748779296875, 812.211669921875, 1226.2874755859375, 303.2969055175781, -580.1508178710938, 363.9496154785156, -506.03558349609375, 636.846435546875, 488.58929443359375, -245.86495971679688, 735.3248901367188, 1722.44189453125, 362.18035888671875, -330.1060485839844, 485.1437072753906, -1387.4378662109375, 912.9346923828125, 412.8334655761719, 723.65380859375, 75.08088684082031, 829.8455200195312, 583.4725341796875, -1035.4212646484375, 927.5072631835938, 668.71533203125, 134.86514282226562, 1328.019287109375, -307.6230163574219, -1289.490478515625, 788.5136108398438, 701.3567504882812, -22.33172035217285], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000592.npy"}
{"epoch": 0.869309838472834, "step": 593, "batch_size": 64, "mean": 297.4552917480469, "std": 664.4849243164062, "min": -1103.30029296875, "p10": -497.43423461914057, "median": 196.69361877441406, "p90": 1316.5947387695314, "max": 1865.866943359375, "pos_frac": 0.625, "sample": [-20.138214111328125, 727.4019775390625, -265.80767822265625, 104.26922607421875, -739.5697631835938, -461.13519287109375, 1516.1279296875, -709.7437133789062, 1274.203125, 609.12744140625, -192.2010498046875, 1634.489501953125, 1417.1824951171875, 1093.631591796875, -3.9741287231445312, -826.2723388671875, 1334.7625732421875, 1634.6600341796875, 697.2091674804688, 130.9490203857422, -512.990966796875, 178.9853515625, 376.40771484375, 78.62076568603516, 70.95799255371094, 1865.866943359375, 517.8385009765625, 111.0916519165039, 418.6234130859375, -883.310546875, -2.6647891998291016, 482.7345886230469, -360.16534423828125, 214.40188598632812, 710.1287841796875, 826.3355102539062, -125.27365112304688, 1338.669189453125, -270.3389892578125, 443.0506896972656, -831.3797607421875, 611.3961791992188, -326.2820129394531, -1103.30029296875, -27.161741256713867, -101.9794921875, -34.171180725097656, 9.581993103027344, 468.3648986816406, 859.6824340820312, 504.33685302734375, 293.0105285644531, 933.7244873046875, 481.6983337402344, 334.31658935546875, 948.9910278320312, -21.772430419921875, 43.72895050048828, -236.8037109375, 679.403076171875, -83.07427978515625, 558.03759765625, -179.50259399414062, 822.1521606445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000593.npy"}
{"epoch": 0.8707782672540382, "step": 594, "batch_size": 64, "mean": 371.8956298828125, "std": 623.448486328125, "min": -974.0704345703125, "p10": -385.85698547363273, "median": 313.3977355957031, "p90": 1221.362060546875, "max": 1812.449951171875, "pos_frac": 0.765625, "sample": [1002.8441162109375, 4.014984130859375, 164.70155334472656, 17.198640823364258, -53.909019470214844, -723.9934692382812, 617.9157104492188, 419.98504638671875, 60.983985900878906, 773.695556640625, 545.1692504882812, 370.99517822265625, 1812.449951171875, 721.9539184570312, 135.73397827148438, 938.2003173828125, 767.44580078125, 169.4950714111328, 1187.7459716796875, -455.1393127441406, -557.39599609375, 1438.1549072265625, -974.0704345703125, 1656.0015869140625, -292.9410400390625, 572.1614379882812, -124.6108627319336, 341.9762268066406, 450.1231994628906, -194.80313110351562, 74.39039611816406, 462.094970703125, 1371.0206298828125, 565.2611694335938, 291.78680419921875, 623.9727783203125, 665.0636596679688, 28.187015533447266, 271.7283630371094, -689.9246826171875, 9.64272689819336, 992.7852172851562, 163.7098846435547, -174.69094848632812, 1227.8524169921875, 971.5731201171875, 894.9848022460938, 554.630126953125, -412.6347961425781, 450.7922668457031, -908.5673828125, 39.68914794921875, 290.84307861328125, -196.278564453125, 335.0086669921875, -44.37247848510742, 1206.2178955078125, 951.745361328125, 1609.0576171875, 258.3054504394531, 1310.49169921875, -323.37542724609375, 83.23272705078125, 55.01158905029297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000594.npy"}
{"epoch": 0.8722466960352423, "step": 595, "batch_size": 64, "mean": 430.8812255859375, "std": 606.3203125, "min": -480.0907897949219, "p10": -175.6789123535156, "median": 301.1611022949219, "p90": 1058.609069824219, "max": 2760.617919921875, "pos_frac": 0.78125, "sample": [913.6915283203125, 298.28033447265625, 662.6008911132812, 32.405616760253906, -86.63907623291016, 1073.4254150390625, 364.74200439453125, 1004.5958862304688, -295.99981689453125, 416.44879150390625, 1591.318359375, 89.45323181152344, 1213.175048828125, 990.69189453125, -165.6244659423828, 343.492919921875, 358.1253356933594, 2017.94970703125, 192.23243713378906, -110.81887817382812, 93.68704223632812, -186.11672973632812, 641.5166015625, 378.7160949707031, -260.70086669921875, -283.4208679199219, 266.4686584472656, 106.63495635986328, 280.0841064453125, 304.0418701171875, 163.3475341796875, 249.24420166015625, 296.52484130859375, 293.65277099609375, 980.4697265625, 6.899370193481445, 326.5123596191406, 652.3740844726562, 1606.3985595703125, 327.1707763671875, -42.07594299316406, -480.0907897949219, 484.3351135253906, -363.6684875488281, 421.0334167480469, 407.7482604980469, 285.7259521484375, -179.9879608154297, 760.5025024414062, 688.32958984375, -18.79633331298828, 775.7764282226562, 130.19003295898438, -128.95848083496094, 413.99462890625, 98.13871765136719, 1024.03759765625, 99.26129150390625, -156.47576904296875, 64.14163208007812, 2003.48388671875, 536.4407958984375, 845.6439208984375, 2760.617919921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000595.npy"}
{"epoch": 0.8737151248164464, "step": 596, "batch_size": 64, "mean": 296.531982421875, "std": 639.2484741210938, "min": -1199.8231201171875, "p10": -509.6370330810546, "median": 197.4710235595703, "p90": 1241.371496582032, "max": 2122.455810546875, "pos_frac": 0.75, "sample": [-389.6156005859375, 54.44451904296875, 742.429443359375, 55.449005126953125, 918.7175903320312, 599.2418823242188, 225.28146362304688, 535.8988647460938, 36.308441162109375, 547.4609375, 2122.455810546875, -156.03225708007812, -595.012939453125, 468.5503234863281, 1035.5928955078125, 1486.5318603515625, 191.21913146972656, 154.1024627685547, 498.8258972167969, 647.95849609375, -105.04449462890625, 542.84423828125, 97.88078308105469, 236.87379455566406, -553.8137817382812, 162.0457305908203, 486.2066955566406, 425.4256591796875, 286.4503173828125, 134.525634765625, -366.056884765625, 111.4446029663086, -556.5528564453125, 1575.8951416015625, 184.2645721435547, 1063.444091796875, 1572.4534912109375, 528.3950805664062, 26.600095748901367, -113.8269271850586, 192.92706298828125, 506.81829833984375, 137.8470458984375, 390.6556396484375, -225.82208251953125, 218.00743103027344, 1317.6260986328125, 227.92283630371094, -337.3137512207031, -204.31971740722656, 1808.992431640625, 46.67890167236328, -1199.8231201171875, 202.01498413085938, 330.9480285644531, -723.5433959960938, 145.04811096191406, -611.1227416992188, 710.4508666992188, 151.30831909179688, 1581.246826171875, -406.5579528808594, 409.0942077636719, -610.3011474609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000596.npy"}
{"epoch": 0.8751835535976505, "step": 597, "batch_size": 64, "mean": 525.302001953125, "std": 724.3052978515625, "min": -1204.923095703125, "p10": -172.5231124877929, "median": 414.04791259765625, "p90": 1512.2902587890626, "max": 2501.442626953125, "pos_frac": 0.796875, "sample": [966.4365234375, 573.4845581054688, 1265.0224609375, 373.8397216796875, 508.27056884765625, 1559.1986083984375, -196.1330108642578, 1032.41259765625, 1561.6256103515625, -389.39581298828125, -111.51802825927734, -117.433349609375, 414.9586486816406, 78.41775512695312, 780.2568359375, 80.35184478759766, 1133.048095703125, 125.04414367675781, 378.4199523925781, 1822.4185791015625, 457.2953796386719, 509.7615966796875, 4.390289306640625, 1142.47021484375, 168.8879852294922, 318.2503356933594, 34.30111312866211, 285.90338134765625, 117.74589538574219, -965.0467529296875, -1204.923095703125, -9.364805221557617, 1308.19384765625, 511.566650390625, 575.55322265625, 2412.745849609375, 1516.830322265625, -52.32494354248047, -463.3232421875, 96.56536102294922, 791.739013671875, 1332.74853515625, 57.054847717285156, 272.7529296875, 413.1371765136719, -109.05435943603516, 607.9625854492188, 976.7196044921875, 64.68247985839844, 1496.6680908203125, 155.90997314453125, -329.695556640625, -233.63726806640625, 2501.442626953125, 1983.2513427734375, 122.05961608886719, 608.85009765625, 481.54071044921875, 766.5855712890625, 705.9178466796875, 103.07585144042969, 1501.69677734375, -60.013755798339844, 803.7318725585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000597.npy"}
{"epoch": 0.8766519823788547, "step": 598, "batch_size": 64, "mean": 409.35821533203125, "std": 617.6261596679688, "min": -1175.1072998046875, "p10": -182.05966033935545, "median": 414.1699981689453, "p90": 1222.2988525390626, "max": 2262.76611328125, "pos_frac": 0.78125, "sample": [434.648193359375, 730.37841796875, 351.42059326171875, 1088.439453125, -1175.1072998046875, 393.6918029785156, 1281.2110595703125, 705.3099365234375, 203.2728271484375, 1233.086181640625, 34.919342041015625, -689.5211181640625, -842.1827392578125, 201.52027893066406, 1406.989501953125, -188.9965057373047, 737.696533203125, 730.09716796875, 1352.497802734375, 1817.219482421875, 260.61474609375, 1197.12841796875, 746.419677734375, -376.88079833984375, 314.056396484375, 469.1202087402344, 596.45947265625, 350.18994140625, -1085.27099609375, 171.0281524658203, 813.39404296875, 600.3900146484375, 486.649658203125, -123.97632598876953, -495.84442138671875, 533.2462158203125, 591.789306640625, 1268.5751953125, 195.8501434326172, 311.64141845703125, 151.37240600585938, 203.53875732421875, -110.88167572021484, 490.0589294433594, -86.70106506347656, 709.79150390625, 445.4171142578125, 480.3994140625, 19.99317741394043, 143.05703735351562, 347.3411560058594, 2262.76611328125, 856.5125732421875, 818.0038452148438, 554.8267822265625, 285.3620910644531, -34.224609375, 1005.1473388671875, 586.847900390625, -165.87368774414062, 16.936080932617188, -158.1639404296875, 772.19873046875, -25.971904754638672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000598.npy"}
{"epoch": 0.8781204111600588, "step": 599, "batch_size": 64, "mean": 336.27593994140625, "std": 532.787109375, "min": -2081.221435546875, "p10": -142.37838287353514, "median": 302.56146240234375, "p90": 965.0511779785157, "max": 1603.183837890625, "pos_frac": 0.828125, "sample": [417.4681701660156, 919.774658203125, 316.16412353515625, 440.19775390625, 773.0431518554688, 85.16119384765625, 1603.183837890625, 273.67730712890625, 204.75347900390625, 384.1563720703125, 454.93328857421875, -676.8206787109375, 6.187583923339844, 66.906982421875, -10.355875015258789, 435.73046875, 947.519775390625, 972.5646362304688, 597.316650390625, 1286.0948486328125, 191.85638427734375, 611.3746337890625, 743.8101806640625, 435.6235656738281, 305.66363525390625, 35.60454559326172, 157.01043701171875, 275.81219482421875, 240.06349182128906, -2081.221435546875, 592.2234497070312, 210.11376953125, 457.5487060546875, 175.24375915527344, 1143.5592041015625, 709.9571533203125, 585.9273071289062, -94.6710205078125, 1182.6639404296875, 277.4142761230469, 476.08306884765625, 170.95468139648438, 902.3406982421875, -313.6812744140625, -149.36793518066406, 393.6284484863281, 599.6883544921875, 251.45693969726562, 349.14154052734375, 145.88052368164062, 1200.29736328125, 255.05995178222656, 73.51624298095703, 13.379402160644531, -279.8493347167969, -17.908695220947266, 299.45928955078125, 395.85986328125, -392.92010498046875, 1224.30615234375, -126.06942749023438, -395.76434326171875, 262.731689453125, 530.2007446289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000599.npy"}
{"epoch": 0.8795888399412628, "step": 600, "batch_size": 64, "mean": 322.5917053222656, "std": 639.6307983398438, "min": -1193.86962890625, "p10": -549.4893432617187, "median": 215.4433135986328, "p90": 1126.8391967773441, "max": 2400.23388671875, "pos_frac": 0.734375, "sample": [135.47434997558594, 687.0761108398438, 541.3002319335938, -1193.86962890625, 833.4791870117188, 595.2230224609375, -66.5247802734375, 572.556396484375, -609.2149047851562, 521.9600830078125, 44.459373474121094, 15.51229476928711, 991.3297119140625, -218.87063598632812, 303.6201477050781, 643.6861572265625, -730.3926391601562, 1451.62939453125, 74.1117172241211, 226.05316162109375, 885.121826171875, 1000.822265625, -125.93379211425781, 244.7935791015625, -128.5579071044922, -371.2462158203125, 740.9886474609375, 1188.47021484375, 1203.314453125, 837.5615844726562, 202.9136199951172, -39.844696044921875, 940.8614501953125, 116.05481719970703, 1029.9576416015625, 662.437744140625, 75.16893005371094, 250.10911560058594, -491.07232666015625, 1510.45068359375, 2400.23388671875, 220.4351043701172, -104.40977478027344, 41.11510467529297, 1499.272216796875, 163.66189575195312, 854.8316650390625, 66.79798889160156, -783.5137329101562, 580.1322021484375, 38.96467590332031, 1168.35986328125, 263.92706298828125, -53.68152618408203, 210.45152282714844, 785.3490600585938, -586.739013671875, 149.99606323242188, 53.40373992919922, -574.5252075195312, -615.1173706054688, 433.7799072265625, -232.23297119140625, 114.40550994873047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000600.npy"}
{"epoch": 0.8810572687224669, "step": 601, "batch_size": 64, "mean": 418.25567626953125, "std": 568.9058837890625, "min": -1191.1351318359375, "p10": -221.99286193847655, "median": 374.5825653076172, "p90": 1139.1808715820312, "max": 1765.8509521484375, "pos_frac": 0.78125, "sample": [671.59375, 1281.527587890625, 1765.8509521484375, 741.2603149414062, -96.02263641357422, 1052.073974609375, 358.1443176269531, 78.30150604248047, -229.464599609375, 277.40826416015625, -232.7823486328125, -164.4888153076172, 1446.2017822265625, 448.49041748046875, 307.8819580078125, 707.6680297851562, 288.0916748046875, 640.750244140625, -204.55880737304688, 1321.76318359375, 819.6793823242188, 624.5112915039062, 903.9767456054688, -73.83409118652344, 560.70361328125, 236.8575897216797, -64.23040771484375, 623.3846435546875, 779.7518310546875, 258.2381896972656, 401.5075988769531, 380.81976318359375, -987.8260498046875, 561.3034057617188, -437.9187316894531, 510.66033935546875, 154.630859375, -104.48533630371094, 164.61135864257812, 276.0287170410156, 692.3489379882812, 788.6314086914062, 161.3798065185547, -1191.1351318359375, 1138.2972412109375, -569.83984375, 45.139923095703125, 1535.1544189453125, 617.4120483398438, -333.0291748046875, 934.7467651367188, 995.0650634765625, 147.43260192871094, 368.3453674316406, 199.9873809814453, 982.8124389648438, 250.9314727783203, 1416.8741455078125, 1139.5595703125, 442.133544921875, -73.61075592041016, 244.54940795898438, 499.3411865234375, 287.77313232421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000601.npy"}
{"epoch": 0.882525697503671, "step": 602, "batch_size": 64, "mean": 520.9512329101562, "std": 591.3538818359375, "min": -488.1795349121094, "p10": -92.27585296630858, "median": 463.5677032470703, "p90": 1179.5487915039064, "max": 2620.80517578125, "pos_frac": 0.84375, "sample": [500.32220458984375, 140.75335693359375, -139.8981475830078, 362.07904052734375, 26.953222274780273, 165.08251953125, 1130.280029296875, 775.9503173828125, 139.6809844970703, 265.3568115234375, 459.68670654296875, 24.232070922851562, 704.32861328125, 1042.2142333984375, 1184.515380859375, 67.46477508544922, 576.8162841796875, 471.7528076171875, 467.4486999511719, 1381.40087890625, -18.45416259765625, 219.21499633789062, -35.67913818359375, -488.1795349121094, 1205.9500732421875, 719.87744140625, 215.0755615234375, 474.1361999511719, 89.75079345703125, -101.17353820800781, 68.96898651123047, 1050.34765625, 830.5051879882812, 135.9702606201172, 909.7427978515625, 1261.6199951171875, 200.43911743164062, 558.8171997070312, -462.2250671386719, -372.5536804199219, 874.8013916015625, 1135.5042724609375, 722.2955932617188, 977.192626953125, 800.2081298828125, 911.166015625, 2347.884521484375, -121.49922180175781, 1167.9600830078125, -446.570068359375, 975.455078125, 826.3601684570312, 218.53604125976562, 148.12057495117188, 1217.0396728515625, -71.51458740234375, 355.9849853515625, 842.103515625, 66.29122924804688, 109.156982421875, 856.2572631835938, 2620.80517578125, 442.3966369628906, 156.36949157714844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000602.npy"}
{"epoch": 0.8839941262848752, "step": 603, "batch_size": 64, "mean": 476.6319580078125, "std": 713.6810302734375, "min": -933.7374267578125, "p10": -358.6540313720703, "median": 347.86158752441406, "p90": 1336.114685058594, "max": 2939.37353515625, "pos_frac": 0.71875, "sample": [-372.59442138671875, -368.4373474121094, 290.03253173828125, 43.57881164550781, 1360.765625, -18.93170166015625, 1244.279541015625, 409.71588134765625, 845.6983642578125, 1223.220947265625, -47.82269287109375, 13.721275329589844, -649.5523071289062, 840.3675537109375, 48.72671127319336, 471.9910583496094, 43.772735595703125, 1526.980224609375, 2097.894287109375, 548.2803344726562, 1065.858642578125, 881.859619140625, 727.8919677734375, 54.8470344543457, 310.12188720703125, 293.6404724121094, 504.63873291015625, 137.30206298828125, -62.98586654663086, 249.8387451171875, 1668.75341796875, -933.7374267578125, 1813.3056640625, -35.211578369140625, -7.3138275146484375, 376.6043701171875, 1042.037353515625, -260.375732421875, 690.9315795898438, 560.8919067382812, 940.9564208984375, -476.13177490234375, -96.93754577636719, -23.33310317993164, -111.75582122802734, 562.8208618164062, 741.6082153320312, 147.3205108642578, -404.64068603515625, 435.5464172363281, 524.0803833007812, 1278.5958251953125, 909.5142822265625, -335.8262939453125, 173.0848388671875, -480.2884521484375, 2939.37353515625, 319.1188049316406, 1832.298095703125, 65.2618408203125, 866.77001953125, 1198.6912841796875, 907.7311401367188, -40.001094818115234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000603.npy"}
{"epoch": 0.8854625550660793, "step": 604, "batch_size": 64, "mean": 520.3671264648438, "std": 720.8078002929688, "min": -930.552490234375, "p10": -384.58124694824215, "median": 485.39451599121094, "p90": 1659.5297119140625, "max": 2363.778076171875, "pos_frac": 0.765625, "sample": [-427.1708068847656, 176.5966796875, -262.69549560546875, -127.27989196777344, 664.6011962890625, 1216.7825927734375, 995.2723999023438, 1166.2142333984375, 1013.9371948242188, 2270.4091796875, -930.552490234375, 972.4999389648438, 20.633499145507812, -365.8623352050781, 919.9478759765625, 1112.69091796875, 1002.40185546875, 72.47340393066406, 382.4281311035156, 1081.344482421875, 127.3431396484375, -570.8677978515625, 89.64051818847656, 1659.61181640625, 578.9154052734375, -583.5293579101562, 1844.6319580078125, -260.220703125, 162.02084350585938, 213.7130126953125, 4.445960998535156, 429.7565002441406, 548.0020751953125, 486.5464782714844, -173.42312622070312, 1693.695556640625, 700.2042846679688, 9.203559875488281, -392.6036376953125, 880.887939453125, 611.5413208007812, 484.2425537109375, 525.5892944335938, 327.2900390625, 453.3553771972656, 2363.778076171875, -558.2999267578125, 1659.338134765625, 496.49395751953125, -8.754257202148438, 633.3161010742188, 1082.1954345703125, 863.9539794921875, -103.38154602050781, 1998.4991455078125, 577.2406616210938, -426.8048095703125, 68.81759643554688, 406.9909973144531, 821.49609375, -25.2694091796875, 560.9685668945312, 1689.152099609375, 399.1003112792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000604.npy"}
{"epoch": 0.8869309838472834, "step": 605, "batch_size": 64, "mean": 367.9199523925781, "std": 634.3158569335938, "min": -1187.9111328125, "p10": -259.9810256958008, "median": 275.68202209472656, "p90": 1064.5187744140628, "max": 2790.83837890625, "pos_frac": 0.734375, "sample": [-55.58750915527344, -330.15826416015625, -23.21026611328125, 237.161376953125, -22.861061096191406, 117.20832824707031, 893.0145874023438, 698.1514282226562, 863.3004760742188, 218.8819580078125, 636.1070556640625, -13.881439208984375, 134.3330078125, -271.7872314453125, 451.5123291015625, 452.7802429199219, 76.23851776123047, 828.7680053710938, 1641.60400390625, -208.9877471923828, 2790.83837890625, 81.06735229492188, 625.178955078125, -42.281524658203125, 247.9619598388672, 913.6767578125, -207.02484130859375, -279.9072570800781, 338.63946533203125, 287.6654357910156, 72.2292709350586, -232.43321228027344, 973.0360717773438, -525.236083984375, -202.61956787109375, 429.6639099121094, 1220.927001953125, 88.92892456054688, 251.17333984375, 750.0302734375, 117.17658996582031, 2254.11181640625, -761.3442993164062, -1187.9111328125, 482.2379150390625, 429.61663818359375, -349.4312744140625, 1169.8153076171875, 97.29004669189453, 26.375749588012695, 567.1300659179688, 432.21063232421875, 1237.7237548828125, 307.1230773925781, 263.6986083984375, 413.0634765625, 426.7955017089844, 1098.111083984375, 402.30670166015625, 207.6807403564453, 986.13671875, 398.3114318847656, -179.6864013671875, 804.231689453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000605.npy"}
{"epoch": 0.8883994126284875, "step": 606, "batch_size": 64, "mean": 404.90985107421875, "std": 586.7932739257812, "min": -600.3906860351562, "p10": -353.4496917724609, "median": 279.02503967285156, "p90": 1099.4193359375004, "max": 2182.249267578125, "pos_frac": 0.765625, "sample": [-484.748779296875, 95.58297729492188, 88.36006927490234, -376.7647399902344, 372.8034362792969, 947.4661865234375, 173.8441162109375, 207.38653564453125, 1199.50048828125, 870.6892700195312, 172.21571350097656, 2182.249267578125, 916.0228271484375, -35.29766845703125, -539.974365234375, 233.74847412109375, 483.2320556640625, 228.1494140625, 229.74148559570312, 1257.449951171875, 801.2794189453125, -452.05364990234375, 1650.3551025390625, 20.10742950439453, 959.6241455078125, 687.7584228515625, 634.972412109375, 148.17091369628906, -43.41413116455078, -299.04791259765625, 602.1585083007812, 699.5557861328125, -471.3493347167969, 273.6064758300781, -583.7025146484375, 334.94989013671875, 212.6385498046875, 700.3101196289062, -90.66397857666016, -111.1376953125, -600.3906860351562, -208.073974609375, 2003.71142578125, 1149.2744140625, 272.5089416503906, 632.7400512695312, 109.74201202392578, 603.6397705078125, 825.8863525390625, 321.887939453125, -44.61613464355469, 910.7755737304688, -43.33625793457031, 247.73780822753906, 983.0908203125, 1506.8621826171875, 284.443603515625, 744.2421264648438, 39.34215545654297, 235.43324279785156, 615.75048828125, 542.7113647460938, 558.76220703125, 326.3302307128906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000606.npy"}
{"epoch": 0.8898678414096917, "step": 607, "batch_size": 64, "mean": 338.9346923828125, "std": 596.3048706054688, "min": -1236.029052734375, "p10": -251.68978576660152, "median": 266.0603942871094, "p90": 1032.819451904297, "max": 2500.816650390625, "pos_frac": 0.796875, "sample": [428.60687255859375, -134.45631408691406, 546.4468994140625, 220.6238250732422, 141.2738037109375, 168.2749481201172, 1139.3720703125, 363.66400146484375, 3.4769287109375, 1294.715576171875, -332.20263671875, 704.3478393554688, 448.5653381347656, -273.4363708496094, 119.42459106445312, 1045.46484375, -679.3264770507812, 220.874755859375, 407.7979736328125, 56.58856964111328, -171.52377319335938, 62.09870910644531, 255.32363891601562, 508.04522705078125, -474.56103515625, 622.3477172851562, -177.17047119140625, 1537.1552734375, 745.78662109375, 114.18177795410156, 635.947265625, 105.74530792236328, 744.2297973632812, 266.4591979980469, 500.28753662109375, 241.3419189453125, 1003.3135375976562, 100.3448486328125, 1568.3248291015625, 375.8935852050781, -1183.3292236328125, 577.7437133789062, -200.94775390625, 519.6885375976562, 666.0999755859375, 89.6719970703125, -21.691307067871094, 132.28314208984375, -1236.029052734375, 226.47064208984375, 559.6724853515625, 330.2271423339844, -125.15756225585938, 607.30615234375, 248.10964965820312, 82.53321838378906, 296.2589111328125, 2500.816650390625, 1470.5286865234375, 750.2803955078125, 265.6615905761719, -388.4674072265625, 591.8068237304688, 478.6148986816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000607.npy"}
{"epoch": 0.8913362701908958, "step": 608, "batch_size": 64, "mean": 402.94073486328125, "std": 499.6820983886719, "min": -477.8456726074219, "p10": -269.561116027832, "median": 407.9303436279297, "p90": 944.007470703125, "max": 1744.6102294921875, "pos_frac": 0.796875, "sample": [819.776611328125, 890.191650390625, 362.9637756347656, 541.9396362304688, -423.0966491699219, 1535.814697265625, -394.9998474121094, 141.4844512939453, -128.67237854003906, 581.030517578125, 779.8092651367188, 26.857038497924805, 35.648643493652344, 669.6912841796875, -149.55752563476562, 185.37741088867188, -477.8456726074219, 440.8563232421875, -113.38922882080078, 402.9112243652344, 463.259765625, 645.9862670898438, 87.14883422851562, 795.0262451171875, 30.577613830566406, 412.949462890625, 634.8687744140625, 464.3271789550781, 950.3967895507812, -206.9696807861328, 88.01519775390625, -43.38185119628906, 248.5540313720703, 284.3572082519531, 765.4428100585938, 288.46588134765625, 459.26739501953125, 421.7127685546875, 1281.5859375, -319.808837890625, 1744.6102294921875, 212.99676513671875, 642.6412963867188, 977.8585205078125, 1173.7689208984375, 287.60418701171875, 81.68205261230469, 772.8355712890625, 33.685020446777344, -296.3860168457031, 660.9269409179688, -44.34050750732422, 556.57177734375, 837.3677368164062, -302.9256286621094, 929.0990600585938, -407.3540344238281, 188.75735473632812, 1711.618408203125, 815.99755859375, 740.15869140625, 502.02655029296875, 287.2670593261719, 203.16619873046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000608.npy"}
{"epoch": 0.8928046989720999, "step": 609, "batch_size": 64, "mean": 521.3295288085938, "std": 809.88720703125, "min": -1169.81103515625, "p10": -280.30283813476547, "median": 432.9218292236328, "p90": 1239.2265136718754, "max": 3999.588623046875, "pos_frac": 0.78125, "sample": [93.02261352539062, 19.72583770751953, 104.27204132080078, -675.450927734375, 220.23960876464844, 430.49932861328125, 1276.546630859375, 501.53021240234375, 524.9671020507812, -416.76812744140625, 810.1005249023438, 1793.1171875, 101.42762756347656, 435.3443298339844, 846.0361938476562, -1169.81103515625, -90.90322875976562, 585.501708984375, 1014.2498168945312, 395.1195983886719, 653.6143798828125, -129.15167236328125, 802.2449340820312, 737.5819702148438, -345.0819091796875, 596.6757202148438, 1358.213623046875, 221.79067993164062, -453.1690979003906, 732.794189453125, 1680.8662109375, 677.9880981445312, 2487.624267578125, 551.8040161132812, 2966.319580078125, 1071.374755859375, -8.658515930175781, 737.2392578125, -117.03325653076172, 327.0299987792969, 285.3126525878906, 189.1880645751953, 924.8106689453125, 3999.588623046875, 785.5396728515625, 86.94097900390625, 473.8468322753906, -73.10186767578125, -712.7416381835938, 167.62789916992188, 331.6024169921875, 267.2863464355469, 1019.1045532226562, -3.0261077880859375, -5.661460876464844, 1086.7308349609375, 326.7247619628906, 668.006103515625, 646.409912109375, 1152.146240234375, 267.02667236328125, 484.44305419921875, -378.8048095703125, 27.257160186767578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000609.npy"}
{"epoch": 0.8942731277533039, "step": 610, "batch_size": 64, "mean": 333.0104064941406, "std": 600.1758422851562, "min": -1849.3968505859375, "p10": -394.11669006347654, "median": 281.0553741455078, "p90": 983.702728271485, "max": 1722.3153076171875, "pos_frac": 0.78125, "sample": [714.070556640625, 688.9822998046875, 157.93385314941406, 222.7867889404297, -435.1560974121094, 665.67138671875, 78.85118103027344, 592.9786376953125, 651.5932006835938, -537.7868041992188, 47.98188018798828, 592.16650390625, 12.273384094238281, 1436.1549072265625, 697.680908203125, 396.58197021484375, 245.79965209960938, 184.42405700683594, -581.6665649414062, -1849.3968505859375, 455.73834228515625, 1722.3153076171875, 156.73648071289062, 1050.7489013671875, -209.9169921875, 373.06793212890625, 186.17626953125, -407.0516357421875, 780.5504760742188, 775.458984375, 1616.119873046875, -124.26689147949219, 4.7980804443359375, 725.9033813476562, 274.85760498046875, 1593.674560546875, -363.9351501464844, 203.86227416992188, 316.53131103515625, 747.9182739257812, 739.6847534179688, 403.20098876953125, -58.33723068237305, 835.1178588867188, 163.03622436523438, 242.73797607421875, 1047.3819580078125, 182.17745971679688, -113.59048461914062, 347.47265625, 38.43028259277344, 628.2402954101562, 235.83140563964844, -113.35258483886719, 709.5830688476562, 145.99923706054688, 297.5500793457031, 343.12139892578125, -242.6074981689453, 287.2531433105469, -486.5267639160156, -602.178466796875, 793.8942260742188, 1627.3330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000610.npy"}
{"epoch": 0.895741556534508, "step": 611, "batch_size": 64, "mean": 554.140625, "std": 763.8386840820312, "min": -786.7107543945312, "p10": -239.65504150390623, "median": 399.22869873046875, "p90": 1523.6409423828125, "max": 2668.87060546875, "pos_frac": 0.734375, "sample": [1525.4188232421875, 390.7445983886719, 2581.93115234375, 1642.5289306640625, 1158.6343994140625, 594.5086059570312, -141.6663818359375, 1391.4298095703125, 34.601829528808594, 229.68826293945312, 290.0304260253906, 761.3945922851562, 1.6521110534667969, -33.46720504760742, -701.956787109375, -216.09205627441406, -364.19049072265625, -786.7107543945312, 300.8815002441406, 1473.6083984375, -714.2831420898438, 1261.1005859375, 2207.80615234375, -463.9226379394531, 1442.1722412109375, 639.1682739257812, 23.361183166503906, 1002.5064086914062, 626.7261352539062, 175.79649353027344, -98.80377197265625, 1248.7327880859375, 403.98291015625, -67.9283218383789, 449.0753173828125, 1179.6004638671875, 90.23444366455078, 190.3165283203125, -186.19778442382812, 322.5921325683594, 988.0368041992188, 356.7293701171875, -12.376571655273438, 1871.0550537109375, 419.87078857421875, 549.6290893554688, -295.2957458496094, 514.1040649414062, 1088.9730224609375, -167.80361938476562, 638.363525390625, -249.7534637451172, 229.092529296875, 394.4744873046875, 774.131591796875, 508.92791748046875, 365.0600891113281, 1751.953125, 462.90576171875, 1519.4925537109375, 1253.2861328125, -7.432960510253906, 2668.87060546875, -22.30167007446289], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000611.npy"}
{"epoch": 0.8972099853157122, "step": 612, "batch_size": 64, "mean": 431.320556640625, "std": 594.1006469726562, "min": -868.7947387695312, "p10": -318.5953247070312, "median": 328.0908203125, "p90": 1112.9020507812502, "max": 2518.395751953125, "pos_frac": 0.8125, "sample": [911.7256469726562, 267.8548889160156, 327.2645568847656, -353.50799560546875, -110.12548828125, 31.310022354125977, 1130.79833984375, 457.8030700683594, 495.17840576171875, 468.30047607421875, -571.054443359375, 759.784423828125, 163.81491088867188, 1071.14404296875, 904.8618774414062, 1043.764404296875, 580.512451171875, 223.001953125, 545.763671875, 1256.61376953125, 817.3521728515625, 118.66609954833984, -868.7947387695312, 1328.170166015625, 673.2094116210938, -355.331787109375, 814.773193359375, -337.3909912109375, 750.4061889648438, 746.045654296875, 516.5617065429688, -797.8690185546875, 126.05513000488281, 326.0648498535156, 85.22760009765625, -356.4324951171875, 656.941162109375, 328.9170837402344, 13.8677978515625, -182.2503662109375, 141.3498992919922, 98.32820129394531, 129.7471923828125, 213.2491455078125, 2518.395751953125, 24.580265045166016, 786.3544921875, 1063.3408203125, 1248.11572265625, 308.11669921875, 186.7034454345703, 755.27734375, 1558.15625, 136.25469970703125, 218.7201690673828, -250.0572967529297, 536.751953125, -0.5015716552734375, 999.68505859375, -274.73876953125, 1302.0333251953125, 752.7196655273438, 1049.7313232421875, 93.203857421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000612.npy"}
{"epoch": 0.8986784140969163, "step": 613, "batch_size": 64, "mean": 499.3277587890625, "std": 623.697509765625, "min": -1011.3561401367188, "p10": -81.68381195068355, "median": 486.0186462402344, "p90": 1344.8997436523437, "max": 2462.74658203125, "pos_frac": 0.828125, "sample": [595.8714599609375, 1372.7147216796875, 1344.6817626953125, -113.37712097167969, 1064.345703125, 1095.8016357421875, 1.2447090148925781, 1155.51318359375, 925.3953857421875, 25.545236587524414, 1378.9954833984375, 195.51284790039062, 639.5338745117188, 225.48699951171875, 1642.5797119140625, -383.100830078125, 19.693161010742188, 428.73681640625, 544.5381469726562, -102.53425598144531, 386.83880615234375, 77.50096893310547, 718.5016479492188, 675.1475830078125, 1344.9931640625, 296.319091796875, 1118.869873046875, 999.5044555664062, -33.03277587890625, 582.5667724609375, 759.7566528320312, -14.870269775390625, -29.05694007873535, 570.0272827148438, 418.2054443359375, 1846.83544921875, 595.36767578125, 706.3955688476562, 2462.74658203125, 2.6581764221191406, -1011.3561401367188, 1073.348388671875, 679.1871337890625, 797.068115234375, 625.5228271484375, -922.1611328125, 252.6424102783203, 1495.6400146484375, -4.149433135986328, 236.6533203125, 732.8839111328125, -262.0196533203125, -449.1506042480469, 154.3463134765625, 1000.2081298828125, 547.627685546875, 123.91232299804688, 381.4653625488281, 50.10527801513672, 543.3004760742188, 6.6096649169921875, 101.30426788330078, 168.11935424804688, 93.413818359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000613.npy"}
{"epoch": 0.9001468428781204, "step": 614, "batch_size": 64, "mean": 501.2961120605469, "std": 695.879638671875, "min": -796.3143920898438, "p10": -256.1655822753906, "median": 457.98634338378906, "p90": 1403.3476684570314, "max": 2801.981201171875, "pos_frac": 0.765625, "sample": [216.21546936035156, 443.17376708984375, -567.7572021484375, 941.951416015625, 17.380813598632812, 1345.6256103515625, 11.762699127197266, 439.7187805175781, 500.2539978027344, 27.98886489868164, 1286.1949462890625, 376.14825439453125, 472.7989196777344, 670.2913208007812, 284.68792724609375, -168.74398803710938, 1433.611328125, 739.2037353515625, 234.09584045410156, -85.17994689941406, 1097.9444580078125, -19.32647705078125, 735.0130004882812, 995.0682373046875, -612.0902709960938, -109.30736541748047, 489.4674072265625, -48.979034423828125, 598.70703125, -276.7989501953125, 676.6826171875, 1727.843017578125, -208.02105712890625, 754.7717895507812, -615.3294677734375, -198.8167724609375, 576.6434936523438, -716.9462890625, -9.53260612487793, 760.9938354492188, 1554.1031494140625, 390.0250549316406, 1650.0819091796875, 305.3302307128906, 1075.1629638671875, 1119.7366943359375, 1341.8038330078125, 255.09051513671875, 637.2894287109375, 721.0975341796875, 47.376243591308594, 298.05303955078125, 1428.085693359375, 102.62356567382812, 675.698974609375, 2801.981201171875, 1968.8291015625, 135.42572021484375, -620.0708618164062, -796.3143920898438, 1201.4600830078125, 176.18780517578125, 579.16796875, 817.3173828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000614.npy"}
{"epoch": 0.9016152716593245, "step": 615, "batch_size": 64, "mean": 418.7528076171875, "std": 674.4854736328125, "min": -1963.6441650390625, "p10": -213.19897918701167, "median": 277.1340560913086, "p90": 1150.6625732421876, "max": 2699.863037109375, "pos_frac": 0.828125, "sample": [188.10446166992188, 772.4423828125, 142.48483276367188, 1550.8291015625, 214.3922576904297, 1206.440673828125, 3.2144317626953125, 63.82294464111328, 38.8819580078125, 99.22909545898438, 932.135986328125, 230.66786193847656, 249.17994689941406, -387.70599365234375, 746.0855102539062, -462.1055603027344, 976.6962280273438, -1963.6441650390625, 305.0881652832031, 403.9862060546875, 309.61431884765625, 160.26568603515625, 1969.8819580078125, 170.4088134765625, 72.7427978515625, 1069.2220458984375, -160.21737670898438, 404.344482421875, 554.2149047851562, 122.04757690429688, 379.2308044433594, 949.4727172851562, 665.2960205078125, 1017.5142822265625, 1305.8355712890625, 91.6279067993164, 137.3979949951172, 127.5146484375, -402.4844970703125, -142.31854248046875, -781.340087890625, 1153.473388671875, 1020.5928344726562, 60.61479187011719, 1602.116455078125, 637.0205078125, 600.5809936523438, 1144.10400390625, 153.81109619140625, 488.253173828125, 938.2761840820312, -235.90538024902344, 206.3754425048828, -48.357879638671875, 783.692626953125, -413.4339904785156, 241.72557067871094, 413.0291748046875, 527.7166137695312, 801.501953125, -155.4610137939453, 2699.863037109375, 713.821044921875, 136.26986694335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000615.npy"}
{"epoch": 0.9030837004405287, "step": 616, "batch_size": 64, "mean": 337.53411865234375, "std": 579.8196411132812, "min": -770.0923461914062, "p10": -366.8234558105469, "median": 274.6616973876953, "p90": 1159.575134277344, "max": 1769.67724609375, "pos_frac": 0.75, "sample": [925.9771728515625, 97.7803955078125, 142.26055908203125, 715.3290405273438, 429.6864318847656, 74.71878814697266, 194.4796142578125, -241.59725952148438, 371.8368225097656, 1181.5498046875, 1379.8482666015625, 568.56494140625, 137.91513061523438, -24.949050903320312, -739.0947875976562, 1230.8114013671875, 443.0518493652344, 276.9259338378906, 151.4061279296875, -90.25594329833984, 1468.3145751953125, 552.0303955078125, 412.8052673339844, 419.7682800292969, -352.0876159667969, -682.6547241210938, 524.7672729492188, 235.9271240234375, -33.746604919433594, 1185.80810546875, 1769.67724609375, 181.34483337402344, -465.49298095703125, 7.568931579589844, 106.68162536621094, -723.2190551757812, -164.76165771484375, 233.43435668945312, 907.1620483398438, 706.389404296875, 361.39422607421875, 440.196533203125, 496.69085693359375, 683.5886840820312, -770.0923461914062, 1057.6409912109375, 185.3123321533203, -371.32464599609375, 154.04949951171875, 777.646484375, 281.1212158203125, -335.6106262207031, 1687.8543701171875, 387.5260009765625, 733.5042724609375, 153.38116455078125, 272.3974609375, 186.40699768066406, -269.912841796875, 582.7266235351562, 1108.3009033203125, -356.3206787109375, 1073.7581787109375, -434.0137634277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000616.npy"}
{"epoch": 0.9045521292217328, "step": 617, "batch_size": 64, "mean": 239.94285583496094, "std": 593.8347778320312, "min": -1188.8873291015625, "p10": -428.7472015380859, "median": 270.5278625488281, "p90": 811.3543212890626, "max": 2192.191162109375, "pos_frac": 0.75, "sample": [293.32940673828125, 139.21722412109375, -182.9322509765625, 326.70892333984375, 473.8049011230469, -301.91668701171875, 23.48626136779785, -754.785400390625, 751.2687377929688, 135.29164123535156, -377.5600280761719, 342.9927062988281, 388.6640930175781, 396.6442565917969, 167.9300994873047, 233.41500854492188, 438.91510009765625, 277.81256103515625, 62.94081115722656, 1452.4696044921875, 800.927978515625, 613.2978515625, 122.97557830810547, 10.970693588256836, 509.2720031738281, 429.4144592285156, 79.4405746459961, -220.3648223876953, 349.8083190917969, -1186.23974609375, 14.106172561645508, 366.87396240234375, -347.73712158203125, -388.8343200683594, 667.9911499023438, 300.6581115722656, 1090.1978759765625, 815.82275390625, 651.0496826171875, 435.0594787597656, 544.6835327148438, -467.89910888671875, 879.01953125, 627.5187377929688, 1888.8565673828125, 489.76885986328125, 730.9656982421875, -612.470947265625, 193.17538452148438, 537.5767211914062, -1188.8873291015625, 138.00221252441406, 39.66002655029297, 263.2431640625, 229.94577026367188, -222.5355224609375, 909.55908203125, -166.774658203125, -61.00670623779297, -445.85272216796875, 2192.191162109375, 293.43878173828125, -866.829345703125, 28.606109619140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000617.npy"}
{"epoch": 0.9060205580029369, "step": 618, "batch_size": 64, "mean": 522.0931396484375, "std": 859.965576171875, "min": -1865.09423828125, "p10": -467.8816772460937, "median": 438.67327880859375, "p90": 1315.1710571289063, "max": 3081.5859375, "pos_frac": 0.78125, "sample": [-1865.09423828125, 847.1588134765625, 196.14300537109375, 234.25820922851562, 84.9234390258789, 285.325439453125, 119.01495361328125, 63.3082389831543, 2354.255859375, 914.8034057617188, 324.01824951171875, -929.33544921875, 814.7453002929688, 203.9821319580078, 1298.3829345703125, 319.0231018066406, -384.8946533203125, 1288.48681640625, 759.9171752929688, -225.72396850585938, -140.525390625, -485.7867736816406, -543.7452392578125, 94.26139831542969, -851.721435546875, -738.249267578125, 158.41647338867188, 1360.5914306640625, 1041.022216796875, 164.3868408203125, 630.728759765625, 459.15728759765625, 706.7562255859375, 1023.3305053710938, 1049.00341796875, -890.5966186523438, -426.1031188964844, 1292.5936279296875, 344.1512756347656, 443.62188720703125, 791.5653686523438, 1322.365966796875, 1110.1822509765625, 2603.708740234375, 912.5101928710938, -153.6243896484375, 407.17010498046875, 639.312744140625, 360.7909851074219, 3081.5859375, 778.900146484375, -58.58252716064453, 1498.3802490234375, 1282.0123291015625, 896.7545166015625, 516.5953979492188, -197.8241424560547, 1145.679443359375, 2627.537353515625, 20.359954833984375, 433.72467041015625, 1066.6004638671875, 599.49267578125, 334.7686767578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000618.npy"}
{"epoch": 0.9074889867841409, "step": 619, "batch_size": 64, "mean": 398.9811706542969, "std": 834.60595703125, "min": -1095.140625, "p10": -556.2883239746093, "median": 290.05577087402344, "p90": 1469.076916503907, "max": 3282.93603515625, "pos_frac": 0.671875, "sample": [-1095.140625, -663.901123046875, 22.66162109375, 1178.6705322265625, 1527.2100830078125, -199.81576538085938, 321.2942810058594, 180.07827758789062, 361.7569580078125, -490.0054016113281, 625.0774536132812, 746.763427734375, 478.9906005859375, 1893.166748046875, -787.457275390625, 227.0975341796875, 1219.500732421875, 415.960205078125, 1157.687255859375, 1154.5982666015625, -789.8546142578125, -870.0192260742188, 1297.9991455078125, 1333.432861328125, -1.3442535400390625, -504.872802734375, -117.79195404052734, 601.06689453125, 686.6219482421875, -92.90411376953125, 64.63296508789062, 535.643310546875, -444.25201416015625, 81.0118408203125, 1543.576416015625, -370.1181945800781, -45.6982421875, -301.6939697265625, 791.3712768554688, 646.9489135742188, -41.54561996459961, 1884.970458984375, 538.5584106445312, 575.0850219726562, 974.7704467773438, 76.28180694580078, -173.72689819335938, 2553.746337890625, 1602.05712890625, 302.12567138671875, 49.09345245361328, 227.82113647460938, 488.3317565917969, 3282.93603515625, -1077.0941162109375, -578.3235473632812, 632.4259643554688, -55.272212982177734, 1212.667236328125, -26.84079360961914, 325.1689758300781, 148.5106201171875, 17.11311912536621, 277.9858703613281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000619.npy"}
{"epoch": 0.908957415565345, "step": 620, "batch_size": 64, "mean": 384.1690979003906, "std": 627.1543579101562, "min": -1122.696044921875, "p10": -285.59993591308586, "median": 359.23028564453125, "p90": 1386.3005859375, "max": 1967.8665771484375, "pos_frac": 0.734375, "sample": [477.3862609863281, -385.8997802734375, 440.29620361328125, 621.9178466796875, 493.71173095703125, 820.3494873046875, 349.38653564453125, 287.51287841796875, -77.4636001586914, 1455.893798828125, -309.4596862792969, 369.07403564453125, -229.92718505859375, 1373.0933837890625, 443.4527587890625, 138.14149475097656, 1087.7156982421875, 475.9966735839844, -80.56423950195312, 1391.9608154296875, 1967.8665771484375, 248.85214233398438, 427.17889404296875, 1072.7626953125, -506.5599365234375, 223.58181762695312, 644.8219604492188, 297.6244812011719, 169.1242218017578, 78.82807922363281, -44.21843719482422, 1808.453369140625, 428.31109619140625, 1740.4892578125, -50.20166015625, 411.05499267578125, -84.6379165649414, 585.6414184570312, -10.453617095947266, 440.25079345703125, 373.01715087890625, 471.793212890625, -174.78228759765625, -107.25108337402344, 1392.2977294921875, 764.8474731445312, 649.4126586914062, -1122.696044921875, 117.2663345336914, 226.95932006835938, -203.18853759765625, -1038.9647216796875, 1787.5755615234375, 327.1000061035156, 238.42637634277344, 282.7973327636719, 449.9628601074219, 146.82791137695312, -551.0612182617188, 428.40380859375, 249.0794219970703, 954.16796875, -501.7701110839844, 435.25567626953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000620.npy"}
{"epoch": 0.9104258443465492, "step": 621, "batch_size": 64, "mean": 572.7069091796875, "std": 756.7515258789062, "min": -1748.376220703125, "p10": -269.45154571533203, "median": 563.6435852050781, "p90": 1477.1267578125005, "max": 3164.935302734375, "pos_frac": 0.828125, "sample": [623.1110229492188, -426.0539245605469, -801.473876953125, -242.6554718017578, 1792.7908935546875, -2.8609848022460938, 114.52112579345703, 677.7642211914062, 665.8230590820312, 1064.7994384765625, 904.5460815429688, 369.6446533203125, 311.15264892578125, 895.49072265625, 291.53033447265625, -454.28997802734375, -333.6242370605469, 773.9581298828125, -1748.376220703125, 198.86566162109375, 1572.5716552734375, 212.0455322265625, 1059.9310302734375, 542.7478637695312, 1234.7691650390625, 3164.935302734375, 52.823829650878906, 1158.7489013671875, 1168.78125, -124.35153198242188, 1350.6361083984375, 816.8148193359375, 947.3701782226562, 240.915771484375, 922.5971069335938, 1631.2918701171875, 176.26248168945312, 772.067626953125, 446.0304260253906, 666.97607421875, -125.94126892089844, 1322.7373046875, -280.9355773925781, 584.539306640625, 246.01992797851562, 174.59832763671875, 1531.3370361328125, 398.78948974609375, 1287.455810546875, 1182.5814208984375, -1009.870361328125, 364.046630859375, 90.0514144897461, 218.80096435546875, 300.15203857421875, 689.0447998046875, 923.4025268554688, 1579.202880859375, 250.37045288085938, 306.39910888671875, 1988.59326171875, 1016.70654296875, 772.030517578125, 154.5006561279297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000621.npy"}
{"epoch": 0.9118942731277533, "step": 622, "batch_size": 64, "mean": 399.3359375, "std": 663.733642578125, "min": -1246.098876953125, "p10": -267.0106506347655, "median": 362.2281799316406, "p90": 911.7734497070313, "max": 2924.44384765625, "pos_frac": 0.828125, "sample": [675.3327026367188, 346.96051025390625, -302.2549743652344, 409.87933349609375, 546.2750244140625, 416.5594482421875, -707.7006225585938, 363.3353576660156, 2559.319091796875, 571.0640258789062, 49.932289123535156, -407.1533203125, 71.96321105957031, -342.23455810546875, -33.703155517578125, 569.2879638671875, 275.22918701171875, 406.70953369140625, 26.874847412109375, 269.1766357421875, 1212.3487548828125, -83.7718505859375, 487.6043701171875, 2924.44384765625, 836.5841674804688, 25.270172119140625, 619.8302001953125, 737.4373779296875, 194.31068420410156, -1073.88720703125, 203.66525268554688, 9.8953857421875, 915.5039672851562, 94.81300354003906, 284.3341064453125, 903.0689086914062, 493.14404296875, -530.3544311523438, 1107.2642822265625, 574.6143798828125, 738.003173828125, 874.6792602539062, 40.336097717285156, 722.6273193359375, 135.83572387695312, -84.8055419921875, 361.1210021972656, -184.77389526367188, 54.72825622558594, 316.9295959472656, 1594.9239501953125, 525.8355712890625, 863.6937255859375, 664.0606079101562, 596.1787109375, 751.1932983398438, -1246.098876953125, 9.710960388183594, 118.35353088378906, 1511.1446533203125, 654.9046630859375, 529.0932006835938, 12.166858673095703, 296.6885986328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000622.npy"}
{"epoch": 0.9133627019089574, "step": 623, "batch_size": 64, "mean": 311.66925048828125, "std": 640.7202758789062, "min": -1172.4656982421875, "p10": -280.43679809570307, "median": 237.2112274169922, "p90": 1100.873547363282, "max": 2775.971923828125, "pos_frac": 0.703125, "sample": [25.43157958984375, 261.75, -819.1581420898438, 1336.2108154296875, -153.33319091796875, -26.072826385498047, 713.6436157226562, 49.266265869140625, 845.1464233398438, 57.883544921875, 1418.178955078125, 357.61920166015625, 645.8124389648438, 155.2283172607422, 675.7540283203125, -420.98394775390625, 785.0005493164062, 369.03045654296875, -296.19915771484375, 29.22412109375, -185.17100524902344, 451.58575439453125, 1281.3470458984375, -243.657958984375, -1172.4656982421875, 162.4383544921875, 367.49530029296875, -740.9794921875, 476.77520751953125, 659.4801635742188, -214.25973510742188, 460.0461120605469, -151.73150634765625, -114.28763580322266, -200.00326538085938, 121.48786926269531, 961.5341796875, 715.343505859375, 962.3336181640625, 227.77676391601562, 67.74000549316406, -795.44775390625, 377.6593322753906, 1443.4512939453125, -111.10787963867188, -66.77485656738281, 2775.971923828125, 501.73846435546875, -48.26810836791992, -444.62799072265625, 1160.247802734375, 574.4215087890625, 376.51275634765625, 343.658203125, 257.9915466308594, 1787.955810546875, 65.079833984375, 62.472015380859375, 244.10092163085938, 53.5365104675293, 534.0621337890625, 230.321533203125, 747.9805297851562, -26.36358642578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000623.npy"}
{"epoch": 0.9148311306901615, "step": 624, "batch_size": 64, "mean": 483.58160400390625, "std": 579.7620239257812, "min": -1172.9410400390625, "p10": -198.94970092773437, "median": 510.4460906982422, "p90": 1187.2959228515624, "max": 1482.1240234375, "pos_frac": 0.78125, "sample": [672.7286987304688, -387.46990966796875, 728.7674560546875, 459.5064697265625, 515.615478515625, 1093.67529296875, 1230.4114990234375, 1482.1240234375, 1436.434326171875, 235.6290283203125, 187.48898315429688, -25.75653076171875, 115.0950698852539, 1189.6171875, 872.8785400390625, -163.3377685546875, 600.0139770507812, 759.6087036132812, 1181.879638671875, -996.1476440429688, 49.78889465332031, 261.967529296875, 63.94336700439453, 852.7775268554688, 415.2632751464844, 239.97988891601562, 949.7926025390625, 863.8801879882812, 912.63232421875, -197.96823120117188, 196.6807403564453, 1100.623291015625, -199.37033081054688, 302.4217834472656, -388.764404296875, 983.4791870117188, 502.51318359375, -74.55816650390625, 1136.1839599609375, 343.6361083984375, -1172.9410400390625, 78.99236297607422, -135.6595458984375, 725.7664794921875, 951.58837890625, 1087.000244140625, 249.2366485595703, 846.9041137695312, 1051.2274169921875, 505.2767028808594, 223.2837677001953, -390.89697265625, -7.34547233581543, 1211.1976318359375, 570.182373046875, 708.1384887695312, -299.4093933105469, 649.3170776367188, -108.36347961425781, 707.3765869140625, 1322.47119140625, 212.51422119140625, 1388.7724609375, 1070.9288330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000624.npy"}
{"epoch": 0.9162995594713657, "step": 625, "batch_size": 64, "mean": 539.6091918945312, "std": 577.7627563476562, "min": -434.39691162109375, "p10": -16.041033935546857, "median": 465.8817443847656, "p90": 1251.9223388671876, "max": 2341.5224609375, "pos_frac": 0.890625, "sample": [71.11055755615234, 140.59298706054688, -297.9781799316406, 989.3743286132812, 319.00750732421875, 37.750244140625, 806.90625, 1089.44970703125, 1054.592041015625, 994.6070556640625, 54.6202392578125, 651.8072509765625, 393.30877685546875, 32.58256530761719, 2054.17919921875, 589.4769897460938, 265.5694580078125, 87.71363830566406, 2341.5224609375, 496.65521240234375, 2039.4598388671875, 231.7821807861328, 78.15128326416016, 711.9287719726562, 247.43460083007812, 870.2069091796875, 1875.54052734375, -23.225021362304688, 209.8772735595703, 321.2113037109375, 461.5960693359375, 717.5985107421875, 498.8661804199219, -434.39691162109375, 511.57183837890625, -128.55960083007812, 85.42662811279297, 622.7261962890625, 317.7729187011719, 727.5388793945312, 1263.4097900390625, 465.21478271484375, 1371.3480224609375, 473.59234619140625, 119.81733703613281, 466.5487060546875, -65.77546691894531, 472.21734619140625, 688.5493774414062, 820.4675903320312, 45.15558624267578, 0.7216033935546875, 944.6011962890625, 354.6180419921875, 142.75453186035156, 174.47406005859375, 164.1864776611328, 534.0769653320312, 1225.1182861328125, -380.3801574707031, 775.606689453125, -79.85647583007812, 1423.286376953125, 1019.8782348632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000625.npy"}
{"epoch": 0.9177679882525698, "step": 626, "batch_size": 64, "mean": 298.3277282714844, "std": 678.9322509765625, "min": -2664.941650390625, "p10": -397.04429931640607, "median": 242.66095733642578, "p90": 976.7689941406252, "max": 2073.150390625, "pos_frac": 0.71875, "sample": [627.7662353515625, 25.53814697265625, 485.04736328125, 391.73681640625, 110.56057739257812, 250.5478973388672, 704.2348022460938, 234.77401733398438, 994.0769653320312, -2664.941650390625, 1637.600830078125, 358.3998718261719, 1525.8275146484375, 2073.150390625, -646.5287475585938, 127.9399185180664, 229.69561767578125, -467.5592956542969, -9.066558837890625, -177.3040771484375, 45.31951904296875, 214.9246063232422, 630.1045532226562, 119.63738250732422, 23.537981033325195, -0.8350982666015625, 153.35791015625, -613.5183715820312, 529.112548828125, 680.2452392578125, -107.87757873535156, 651.0907592773438, -563.1884765625, -606.0101928710938, 558.3601684570312, 1318.60693359375, 834.9629516601562, 402.8717346191406, 394.4072265625, 746.6980590820312, 575.6775512695312, -232.50930786132812, 936.3837280273438, 496.91748046875, 1729.4339599609375, 775.7609252929688, -125.94493865966797, 60.061302185058594, 136.0506591796875, 429.34014892578125, -46.97344970703125, 835.19482421875, 296.325439453125, 1306.5394287109375, -16.401718139648438, 200.85606384277344, 704.5594482421875, 360.418212890625, -750.93359375, -112.07621765136719, 120.19566345214844, -98.8725814819336, 301.76959228515625, -12.103069305419922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000626.npy"}
{"epoch": 0.9192364170337739, "step": 627, "batch_size": 64, "mean": 590.864013671875, "std": 884.6433715820312, "min": -766.2708129882812, "p10": -192.88841247558594, "median": 410.2700958251953, "p90": 1515.036596679688, "max": 4423.763671875, "pos_frac": 0.75, "sample": [631.8800048828125, 250.62615966796875, 1151.755859375, 426.1650085449219, -184.30899047851562, -113.71784973144531, 122.96558380126953, 636.04345703125, 507.48101806640625, 411.0092468261719, 1313.3798828125, -505.9135437011719, 178.62042236328125, 504.1211242675781, -109.84078216552734, 1059.1962890625, -196.5653076171875, 766.0376586914062, 1006.9061279296875, 1122.8192138671875, -699.8193359375, -37.73434066772461, 676.4445190429688, -20.622587203979492, 1745.135986328125, 199.07452392578125, 409.53094482421875, 2540.12451171875, 1080.707275390625, 428.3176574707031, 1884.77392578125, 73.1041259765625, 510.186767578125, 1205.7203369140625, 557.1222534179688, 1753.0137939453125, 1392.1038818359375, 173.39663696289062, 500.5384521484375, -205.6448516845703, 178.6743927001953, -59.05543518066406, 3536.0390625, 1567.7220458984375, 769.814208984375, -164.97689819335938, 4423.763671875, -223.591064453125, 441.9781188964844, -43.446746826171875, 1016.2669067382812, 318.5437316894531, 906.1384887695312, -766.2708129882812, 296.772216796875, 284.5945739746094, 280.429931640625, 173.90187072753906, -385.6585388183594, 1299.2147216796875, -43.906349182128906, 251.33433532714844, 295.9355163574219, 316.94342041015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000627.npy"}
{"epoch": 0.920704845814978, "step": 628, "batch_size": 64, "mean": 449.6760559082031, "std": 688.9083251953125, "min": -865.91748046875, "p10": -390.0245544433593, "median": 329.44081115722656, "p90": 1404.7720092773438, "max": 2371.91845703125, "pos_frac": 0.6875, "sample": [313.3423156738281, -30.01392364501953, 345.539306640625, 1425.5684814453125, 1635.1962890625, 163.69873046875, 721.9738159179688, 153.58795166015625, -204.88304138183594, -74.82733154296875, 147.25027465820312, 168.66888427734375, -865.91748046875, 1154.61181640625, 1103.5748291015625, 1384.174560546875, -4.272798538208008, -343.9345703125, 1664.198486328125, 514.855712890625, 2371.91845703125, 544.9842529296875, 246.90097045898438, -304.8167724609375, -64.76058197021484, -456.1317443847656, 513.895751953125, 857.7835693359375, -506.59124755859375, -504.6153259277344, 991.8637084960938, 272.0420837402344, 782.4990234375, 1437.579345703125, 1367.886474609375, 889.744140625, 467.4611511230469, 733.050048828125, 138.1956024169922, 184.04751586914062, -189.70755004882812, 614.4221801757812, 511.97833251953125, 87.65180206298828, 1645.0439453125, 1314.7103271484375, 1378.160888671875, 226.50372314453125, -409.77740478515625, -728.4314575195312, -107.97805786132812, -217.2569122314453, 1413.5994873046875, 831.467041015625, -701.2843627929688, -133.5481719970703, 1036.3316650390625, 469.02862548828125, 496.4349670410156, 150.44134521484375, 1162.6151123046875, 634.3330688476562, -18.788589477539062, -22.011486053466797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000628.npy"}
{"epoch": 0.922173274596182, "step": 629, "batch_size": 64, "mean": 419.21453857421875, "std": 560.6600341796875, "min": -1158.7171630859375, "p10": -242.93312988281247, "median": 449.0620422363281, "p90": 1192.8162353515627, "max": 1816.3236083984375, "pos_frac": 0.796875, "sample": [184.9371337890625, 1285.75244140625, -329.86767578125, -455.9183044433594, 446.48486328125, 451.63922119140625, 396.07403564453125, 797.0789794921875, 1353.9610595703125, 452.06597900390625, 557.3816528320312, 893.1707763671875, -104.4738540649414, 542.52392578125, 1288.4212646484375, -203.22735595703125, 149.73452758789062, 24.436344146728516, -15.423208236694336, 590.8345947265625, 25.936141967773438, 1123.3017578125, 392.4743347167969, -165.37086486816406, 115.99263000488281, 491.3037414550781, -806.0809936523438, -122.56543731689453, 1074.3499755859375, 753.4868774414062, 72.8335952758789, 827.8295288085938, 1491.176513671875, 1816.3236083984375, 460.8622741699219, 164.91160583496094, -1158.7171630859375, 1380.13330078125, 1017.996826171875, -259.94989013671875, -366.62957763671875, -548.5492553710938, 346.06646728515625, 473.25933837890625, 552.3414306640625, 1222.608154296875, 292.33489990234375, 445.85906982421875, 767.8583374023438, 35.193992614746094, 898.1255493164062, 376.45733642578125, 139.68548583984375, 690.5042724609375, 231.3104248046875, 506.28643798828125, 76.84516143798828, 657.9078979492188, 996.490966796875, 559.5988159179688, 439.47589111328125, 564.5105590820312, 531.2093505859375, -60.835235595703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000629.npy"}
{"epoch": 0.9236417033773862, "step": 630, "batch_size": 64, "mean": 236.3033905029297, "std": 723.6176147460938, "min": -1335.3486328125, "p10": -486.45968017578116, "median": 157.420166015625, "p90": 1034.4691650390625, "max": 3548.30517578125, "pos_frac": 0.671875, "sample": [1047.0760498046875, -1088.6385498046875, -529.4056396484375, 370.76910400390625, -271.6186828613281, 704.5155639648438, -1.9420852661132812, -308.56329345703125, 2366.3095703125, -231.49932861328125, 705.1318359375, -274.1561279296875, 181.44357299804688, 1092.813720703125, 759.4468994140625, -1335.3486328125, 108.44351196289062, -630.9558715820312, 324.7244873046875, 236.37005615234375, 450.1482238769531, -174.6241912841797, 142.74940490722656, 1494.8245849609375, -136.9645538330078, 3548.30517578125, 44.13810729980469, 1005.0531005859375, 110.9480209350586, -261.93621826171875, 36.445335388183594, -344.09478759765625, 283.5916748046875, -580.6227416992188, 336.8108215332031, -566.5487060546875, -189.8881378173828, 362.4892272949219, 158.4872283935547, 411.9532470703125, 253.962646484375, -111.68923950195312, 337.9631042480469, -638.8692626953125, 166.89605712890625, 365.66790771484375, 103.64356231689453, 251.95965576171875, 711.6146850585938, -84.3699722290039, 58.96394348144531, 1176.2603759765625, 658.0104370117188, 156.3531036376953, -349.43017578125, 79.86036682128906, 415.1921081542969, 95.47300720214844, 644.099365234375, -386.25244140625, 1260.530517578125, 249.16647338867188, 304.35736083984375, 47.871917724609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000630.npy"}
{"epoch": 0.9251101321585903, "step": 631, "batch_size": 64, "mean": 382.4601135253906, "std": 676.6861572265625, "min": -1860.8319091796875, "p10": -349.8773712158203, "median": 316.0376281738281, "p90": 1192.6641601562503, "max": 2035.7139892578125, "pos_frac": 0.71875, "sample": [1231.39794921875, 957.6788330078125, -325.2162780761719, 283.2193603515625, -360.4464111328125, 508.04736328125, -201.02536010742188, -165.28436279296875, -256.0486145019531, -506.80194091796875, 1790.8363037109375, 1296.8607177734375, -513.5359497070312, 471.5285339355469, -597.5009765625, 191.2716827392578, 656.9266967773438, 829.5472412109375, 49.805885314941406, -309.5737609863281, 148.8065185546875, -25.486051559448242, 315.9929504394531, -619.5733032226562, 1051.099365234375, 46.58176040649414, 102.6855239868164, -254.57005310058594, 821.8680419921875, 819.2864990234375, -1860.8319091796875, 517.7138671875, 2035.7139892578125, 760.134033203125, 356.35162353515625, 624.6786499023438, 211.32077026367188, -164.0211639404297, -94.65767669677734, 1216.0908203125, 237.96865844726562, 642.7384643554688, 1545.3038330078125, 1039.314697265625, 713.6873779296875, -900.8993530273438, 1138.001953125, 286.4163818359375, 1499.865966796875, 623.0714721679688, 67.04357147216797, 708.5377807617188, -105.50762939453125, 439.9471740722656, 181.46966552734375, 243.12982177734375, 1137.4442138671875, 706.2199096679688, 1014.094482421875, -126.86531829833984, 210.34735107421875, 962.58349609375, 316.0823059082031, 856.5774536132812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000631.npy"}
{"epoch": 0.9265785609397944, "step": 632, "batch_size": 64, "mean": 389.77215576171875, "std": 658.3034057617188, "min": -1355.92236328125, "p10": -379.0459869384766, "median": 367.8470458984375, "p90": 1142.2611572265625, "max": 2452.812744140625, "pos_frac": 0.75, "sample": [556.0197143554688, -737.9063110351562, 265.74481201171875, 305.489013671875, 627.2240600585938, -1355.92236328125, 1178.97265625, 1162.339599609375, 1411.3428955078125, 980.5171508789062, 784.4300537109375, 686.8901977539062, 180.39886474609375, 909.4957275390625, 297.6865539550781, 227.83731079101562, 545.1123046875, 404.5088195800781, 64.914794921875, 81.26676177978516, 1585.02099609375, 356.7604675292969, 695.5667724609375, 505.49163818359375, -503.4302978515625, 1142.414306640625, 618.2982177734375, -367.0729064941406, 487.849853515625, -216.36700439453125, -1161.9881591796875, 345.0403747558594, 1057.37744140625, 1130.1444091796875, 607.4359130859375, 842.3424682617188, 1064.8983154296875, 728.3003540039062, 378.9336242675781, -214.67868041992188, 1308.5567626953125, -95.94269561767578, -245.86651611328125, -129.507080078125, 566.6184692382812, 354.89715576171875, 658.9202270507812, 179.19476318359375, 1141.90380859375, 2452.812744140625, 1008.30078125, -167.00277709960938, -492.22589111328125, 53.88727569580078, 143.30039978027344, -384.17730712890625, 379.6062316894531, 265.4468078613281, -600.44677734375, 3.47979736328125, 100.72520446777344, -236.6249542236328, -108.6343002319336, 1129.4940185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000632.npy"}
{"epoch": 0.9280469897209985, "step": 633, "batch_size": 64, "mean": 275.2097473144531, "std": 595.0234985351562, "min": -1187.8402099609375, "p10": -382.66123962402344, "median": 251.14923095703125, "p90": 1168.038134765625, "max": 1514.766845703125, "pos_frac": 0.703125, "sample": [-387.3971252441406, 1367.346435546875, 740.6728515625, -738.42236328125, 290.465087890625, 368.78424072265625, -421.00543212890625, -83.36709594726562, 1196.43310546875, 83.0813217163086, 244.9541015625, 175.21685791015625, 381.82476806640625, -1069.89306640625, 529.185546875, 145.9110870361328, 197.86412048339844, 1184.5330810546875, 351.3578186035156, -304.9747009277344, -185.52105712890625, 361.3667297363281, 303.795654296875, -40.083839416503906, 763.2455444335938, 118.82009887695312, -3.7559642791748047, -160.0697021484375, -882.0252685546875, 1046.9124755859375, 331.57073974609375, 226.25808715820312, 1196.5921630859375, 398.5331115722656, 781.6167602539062, -61.56319046020508, -292.8324279785156, -1187.8402099609375, -371.61083984375, 84.59693145751953, 1514.766845703125, 496.0404357910156, 262.20855712890625, -869.0391845703125, 601.0006103515625, 678.00927734375, 546.3231811523438, 1493.9022216796875, 981.9989013671875, 1129.5499267578125, 135.48300170898438, 280.06463623046875, -319.8354187011719, 937.929443359375, 241.5253143310547, -60.47245788574219, 176.00221252441406, 400.828369140625, 1304.2138671875, 87.51876831054688, 609.283447265625, -46.027069091796875, 94.22620391845703, 257.3443603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000633.npy"}
{"epoch": 0.9295154185022027, "step": 634, "batch_size": 64, "mean": 255.07102966308594, "std": 607.6716918945312, "min": -998.73974609375, "p10": -362.7284454345703, "median": 160.91856384277344, "p90": 1144.204211425782, "max": 2191.70947265625, "pos_frac": 0.640625, "sample": [210.39198303222656, 859.3348388671875, -354.2565612792969, 1676.8831787109375, -638.2847290039062, 4.731077194213867, 28.65355110168457, 225.67697143554688, 288.10888671875, 1385.0389404296875, -1.2354145050048828, 377.364990234375, -94.30609893798828, 372.060791015625, 660.6322631835938, 135.33175659179688, 347.58721923828125, -274.23944091796875, -676.6455078125, 988.263916015625, 1232.3995361328125, -113.78837585449219, -88.14833068847656, 101.65733337402344, -24.856109619140625, 95.96461486816406, 157.51931762695312, 684.1334838867188, 475.8208312988281, 58.21065139770508, 2191.70947265625, 356.82147216796875, 564.98583984375, -998.73974609375, 1211.0357666015625, 1242.5318603515625, 602.50244140625, -366.3592529296875, -349.81048583984375, 164.31781005859375, 825.9280395507812, -97.32920837402344, -120.52777862548828, -934.1979370117188, 247.5139617919922, -583.9569702148438, 38.65806198120117, -523.5364990234375, -254.9090576171875, 872.8025512695312, -312.89398193359375, 759.431884765625, 388.75469970703125, 1335.1285400390625, 406.4142761230469, 72.54954528808594, 232.88125610351562, 729.66748046875, -90.6447982788086, 249.27633666992188, -50.946807861328125, -30.994918823242188, -241.33213806152344, 687.8092041015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000634.npy"}
{"epoch": 0.9309838472834068, "step": 635, "batch_size": 64, "mean": 359.61346435546875, "std": 665.2763671875, "min": -1187.1319580078125, "p10": -348.3009628295897, "median": 351.98841857910156, "p90": 1035.1943603515629, "max": 2453.15087890625, "pos_frac": 0.6875, "sample": [930.2689208984375, 136.71925354003906, 2453.15087890625, -108.853271484375, 431.1805725097656, -80.32186126708984, 147.71160888671875, -7.787647247314453, -11.864173889160156, 527.2192993164062, 92.65943145751953, 1432.0977783203125, -30.150131225585938, 819.0650634765625, -652.9346313476562, 379.01904296875, 511.9988098144531, -13.223663330078125, 775.7535400390625, 27.674606323242188, 373.48565673828125, 203.03636169433594, 741.7159423828125, 885.626953125, 348.56280517578125, 41.030128479003906, 10.712631225585938, 446.06640625, -243.79627990722656, 1971.981689453125, -72.90482330322266, 397.73876953125, 209.3682861328125, 1344.9359130859375, 748.4439697265625, -131.899169921875, 148.00433349609375, -14.875934600830078, 355.4140319824219, 1077.8182373046875, -162.94447326660156, 935.7386474609375, 930.3187866210938, 427.1147766113281, -705.9149169921875, -188.46713256835938, 637.1026611328125, -47.83977508544922, -405.238037109375, 721.8482666015625, -434.62945556640625, -393.08868408203125, -1187.1319580078125, 800.2507934570312, 1138.897705078125, 361.68536376953125, 416.2584533691406, 820.88232421875, -974.58544921875, 2370.81982421875, 162.64462280273438, 523.0438232421875, 356.2886962890625, 312.35906982421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000635.npy"}
{"epoch": 0.9324522760646109, "step": 636, "batch_size": 64, "mean": 428.3197021484375, "std": 674.3988037109375, "min": -1217.201416015625, "p10": -244.07832336425778, "median": 357.48826599121094, "p90": 1000.2477905273438, "max": 3568.331787109375, "pos_frac": 0.765625, "sample": [45.475975036621094, -414.81756591796875, 89.44784545898438, -160.72567749023438, 358.2875061035156, -509.9126281738281, 356.68902587890625, 333.18402099609375, 418.3046569824219, 65.89900970458984, 814.3914794921875, -385.61676025390625, 410.3768005371094, 1190.9959716796875, 29.298912048339844, 1007.8298950195312, -99.5650634765625, 2330.749755859375, 1182.13037109375, 481.57147216796875, 256.1563720703125, 982.5562133789062, -73.73294067382812, -278.9696044921875, 756.0489501953125, 437.7037658691406, 1017.726318359375, 481.3656005859375, 213.15098571777344, 3568.331787109375, 33.41188049316406, -179.44093322753906, 944.7374877929688, 1313.0712890625, -350.0162048339844, 928.94873046875, 923.4903564453125, 388.2472839355469, 543.484130859375, -215.39505004882812, 606.7125244140625, 214.3839569091797, 952.2539672851562, 840.7620239257812, 627.4639282226562, 303.23944091796875, 912.13818359375, -43.802703857421875, 299.24652099609375, -256.37115478515625, 179.94190979003906, 30.395843505859375, -1217.201416015625, 363.44659423828125, 255.73477172851562, -197.7244415283203, 972.6231079101562, 754.8583984375, 265.0138854980469, 481.4525146484375, -8.110000610351562, 268.2458801269531, 608.4993286132812, 964.3847045898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000636.npy"}
{"epoch": 0.933920704845815, "step": 637, "batch_size": 64, "mean": 471.3244934082031, "std": 654.3062133789062, "min": -1049.3040771484375, "p10": -98.9008819580078, "median": 295.6039733886719, "p90": 1410.5452758789065, "max": 2439.488037109375, "pos_frac": 0.828125, "sample": [114.46514129638672, 113.34420013427734, 623.9150390625, 36.01714324951172, 61.97262954711914, 944.159912109375, 294.00726318359375, 31.565004348754883, 663.8699340820312, 1318.9805908203125, 736.234375, -1049.3040771484375, 107.24765014648438, 216.65719604492188, -254.00621032714844, 623.1841430664062, 834.3789672851562, 1433.7684326171875, 1095.6634521484375, 622.05419921875, 1525.5172119140625, 219.81866455078125, 145.79739379882812, 2316.033447265625, 175.07998657226562, 504.1494140625, -294.5716247558594, -57.75754165649414, 237.48178100585938, -104.95108032226562, 330.8364562988281, -150.78030395507812, -214.8096923828125, 607.55322265625, 555.4520263671875, -84.78375244140625, 316.55169677734375, 797.0275268554688, 818.9576416015625, 213.974853515625, 601.9868774414062, 385.82940673828125, 239.46670532226562, 1042.60302734375, 817.589111328125, 645.3164672851562, 1356.35791015625, 49.82410430908203, 1797.8804931640625, -917.8536987304688, 1183.6142578125, 2.6168041229248047, 297.20068359375, 1594.767822265625, 437.7622375488281, 101.42515563964844, 34.62333679199219, 1465.0537109375, -31.520734786987305, 2439.488037109375, 14.795944213867188, 105.85652160644531, 135.91009521484375, -60.57868194580078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000637.npy"}
{"epoch": 0.9353891336270191, "step": 638, "batch_size": 64, "mean": 380.04638671875, "std": 758.9913330078125, "min": -3609.371337890625, "p10": -167.46829452514643, "median": 311.6347198486328, "p90": 1233.2465942382814, "max": 1903.514404296875, "pos_frac": 0.796875, "sample": [1366.9324951171875, -880.1738891601562, -3609.371337890625, 970.735107421875, 315.630859375, 140.38656616210938, 6.079618453979492, 70.59271240234375, 1157.1898193359375, 340.3968811035156, 780.3114013671875, 1384.1986083984375, 140.45164489746094, 733.9931640625, 320.7979736328125, 551.7218017578125, 665.01806640625, -530.5072631835938, 484.86541748046875, 16.286640167236328, 502.0096130371094, 45.503379821777344, -322.76995849609375, 98.92615509033203, 297.9256286621094, 962.8394775390625, 1454.66259765625, -59.42775344848633, 582.2445068359375, 235.72091674804688, 865.0213012695312, 280.56231689453125, 802.2903442382812, -211.99984741210938, -192.5113983154297, 829.6270751953125, 874.189208984375, -54.26168441772461, 328.3342590332031, -367.3066711425781, 7.401994705200195, 502.1149597167969, 307.6385803222656, 1903.514404296875, 73.38379669189453, -18.76304817199707, 1027.9656982421875, 95.35652160644531, 685.3887329101562, 1208.88818359375, -77.87576293945312, -60.993499755859375, 1120.074951171875, 504.604248046875, 138.41842651367188, 30.563262939453125, 100.11539459228516, 58.316444396972656, 870.5648193359375, 1243.6859130859375, -109.03438568115234, 51.46544647216797, 1850.6439208984375, 1432.4150390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000638.npy"}
{"epoch": 0.9368575624082232, "step": 639, "batch_size": 64, "mean": 157.6584014892578, "std": 713.8563232421875, "min": -1322.6422119140625, "p10": -684.9025573730469, "median": 175.00524139404297, "p90": 1069.9893676757818, "max": 1890.928955078125, "pos_frac": 0.609375, "sample": [1393.53369140625, -992.5590209960938, -497.19256591796875, 1266.25390625, 938.4046630859375, -495.46234130859375, 241.05252075195312, 398.6788024902344, -590.9647216796875, 804.2548217773438, 1587.4146728515625, -24.669315338134766, 459.8143005371094, 639.8766479492188, -1117.12109375, 214.62203979492188, -479.932373046875, -393.0672607421875, -43.528228759765625, 454.3384704589844, 745.0325317382812, -1254.0306396484375, 876.0989379882812, 613.0850219726562, -628.310791015625, -1322.6422119140625, -1053.58447265625, 292.3955383300781, 164.71231079101562, 2.7876739501953125, 610.2786254882812, 480.7082824707031, 17.39104461669922, -571.3826293945312, 1126.3828125, 1253.785400390625, -386.3079833984375, 871.9306030273438, -397.2808837890625, -32.733375549316406, 95.93831634521484, -67.27212524414062, 622.7939453125, 421.761962890625, 1890.928955078125, -879.3141479492188, -166.93704223632812, 19.082725524902344, -315.3193359375, -450.56671142578125, 181.94996643066406, 747.44384765625, 252.6972198486328, 901.2054443359375, -678.4470825195312, -420.5317687988281, -687.669189453125, 187.91810607910156, 123.00013732910156, 722.3656616210938, 377.32305908203125, 1289.809326171875, 581.8521118164062, 168.06051635742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000639.npy"}
{"epoch": 0.9383259911894273, "step": 640, "batch_size": 64, "mean": 574.392578125, "std": 803.23193359375, "min": -911.8138427734375, "p10": -190.05155487060546, "median": 417.2586975097656, "p90": 1467.5801025390626, "max": 3141.627685546875, "pos_frac": 0.765625, "sample": [763.0390014648438, 576.0474853515625, -522.4783325195312, -146.4922332763672, 128.52777099609375, 600.810546875, 219.797119140625, 1379.947265625, 1378.220947265625, 207.16024780273438, 1458.786865234375, 1685.2052001953125, 927.1572265625, 60.985870361328125, -269.6239929199219, 487.9815368652344, 906.5166625976562, -170.68438720703125, 24.861679077148438, 1471.3486328125, -856.0379638671875, 961.712158203125, 429.17523193359375, 788.786376953125, -192.89688110351562, -911.8138427734375, 122.93942260742188, 399.50872802734375, 3141.627685546875, 985.3695068359375, 604.7128295898438, 1013.2688598632812, -644.4102172851562, 642.9472045898438, 2584.8505859375, 342.27349853515625, -179.7586669921875, 266.53790283203125, 66.5552978515625, 258.5599365234375, -80.48274230957031, 222.43055725097656, -5.7815093994140625, 1195.4166259765625, 1075.1190185546875, 138.43557739257812, 942.5028686523438, 651.0369262695312, -505.58367919921875, 1298.1795654296875, 2349.04931640625, 347.4377136230469, -80.57192993164062, 1179.2899169921875, -40.40589904785156, 754.7142333984375, 405.3421630859375, 911.93310546875, 72.02656555175781, 778.1048583984375, 1528.6610107421875, 2583.255126953125, -183.41246032714844, 233.40493774414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000640.npy"}
{"epoch": 0.9397944199706314, "step": 641, "batch_size": 64, "mean": 378.48095703125, "std": 760.3213500976562, "min": -1221.1627197265625, "p10": -540.9864532470702, "median": 379.2915344238281, "p90": 1083.7160888671876, "max": 3335.364501953125, "pos_frac": 0.75, "sample": [161.18661499023438, 324.47076416015625, 499.5301208496094, 1098.264404296875, -565.6256103515625, -158.21768188476562, 475.76220703125, 1645.9864501953125, 515.98681640625, 169.8698272705078, -1023.552490234375, 555.13671875, 3.6866607666015625, -435.7410888671875, 3335.364501953125, -558.8842163085938, -499.2250061035156, -461.775634765625, 70.91966247558594, 1049.77001953125, -45.685211181640625, 116.6879653930664, 90.79510498046875, 690.4786987304688, 235.78390502929688, 591.5660400390625, -1213.2000732421875, 867.4520874023438, 606.3041381835938, 107.21678161621094, 940.3585205078125, 506.86932373046875, 663.03076171875, 79.67071533203125, 116.28723907470703, 38.99093246459961, 1395.218017578125, 317.236328125, 818.629150390625, 666.9730224609375, 97.23645782470703, -9.768918991088867, -57.27942657470703, -647.8531494140625, 260.67730712890625, 834.4266967773438, -1221.1627197265625, 1034.57421875, 1030.9019775390625, 1015.8927001953125, 2240.347412109375, 849.0140991210938, -1029.4149169921875, 758.8292236328125, -50.458229064941406, 403.2468566894531, 640.4248046875, -377.48126220703125, 551.1572875976562, 355.3362121582031, 787.4942626953125, 1189.4422607421875, 576.7904052734375, 1196.830810546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000641.npy"}
{"epoch": 0.9412628487518355, "step": 642, "batch_size": 64, "mean": 454.66107177734375, "std": 936.3311157226562, "min": -1683.145263671875, "p10": -359.41856689453124, "median": 372.04315185546875, "p90": 1379.0308105468757, "max": 4975.771484375, "pos_frac": 0.71875, "sample": [113.51313781738281, 552.9683837890625, 762.7894287109375, 614.7354736328125, 291.1839599609375, -1683.145263671875, 820.5031127929688, 274.3555603027344, -69.02899169921875, -596.3790283203125, -45.57312774658203, 99.12826538085938, 1553.9114990234375, -367.061279296875, 832.8043823242188, 1035.16552734375, 489.69854736328125, 764.6354370117188, -935.9902954101562, 65.79965209960938, 260.5963439941406, -143.34027099609375, 395.788330078125, -10.479082107543945, 368.122314453125, 673.064697265625, -170.63949584960938, 214.5250244140625, 95.38453674316406, 474.2791442871094, 851.258544921875, 656.552490234375, 824.0958251953125, 2424.32080078125, -321.2690734863281, 47.415557861328125, 382.0921630859375, 787.9447021484375, -653.16552734375, 254.41665649414062, 818.8255004882812, 168.26336669921875, 520.4286499023438, 1465.28271484375, -341.5855712890625, 1177.7763671875, 805.451904296875, 1798.329345703125, -195.35267639160156, -165.5853271484375, 4975.771484375, 961.1714477539062, -617.9926147460938, 467.7255859375, -166.11936950683594, -259.8465881347656, 375.9639892578125, 2645.25390625, 42.629974365234375, -974.4285888671875, 504.29400634765625, 1876.97412109375, 288.1044921875, 941.9969482421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000642.npy"}
{"epoch": 0.9427312775330396, "step": 643, "batch_size": 64, "mean": 349.2655029296875, "std": 571.4017333984375, "min": -803.97509765625, "p10": -383.6912628173828, "median": 320.9884033203125, "p90": 1053.9843872070312, "max": 1924.3406982421875, "pos_frac": 0.765625, "sample": [362.1546325683594, 320.7588195800781, 159.88856506347656, 962.7294921875, 821.4489135742188, 689.073486328125, 577.7972412109375, -37.62718963623047, 429.0462646484375, 911.7470703125, 15.858331680297852, -666.0440673828125, 288.0330505371094, 199.77908325195312, -241.45423889160156, 947.853759765625, 893.1784057617188, 1040.84716796875, 1087.33203125, 431.6239013671875, 126.37110900878906, 40.04728317260742, -290.49395751953125, 164.68386840820312, 109.67793273925781, 172.25216674804688, -803.97509765625, 694.9776000976562, 476.7274475097656, 1110.9202880859375, -349.86322021484375, 321.2179870605469, 315.5120849609375, 212.005615234375, 952.29296875, 1273.085205078125, 392.5254821777344, -468.25067138671875, 645.7095336914062, 379.0739440917969, -607.4653930664062, 1924.3406982421875, 1069.210693359375, 377.80126953125, 672.2703857421875, 92.74854278564453, 2.83209228515625, 415.89813232421875, 1900.819580078125, 28.222881317138672, 966.7470703125, 664.74658203125, 693.80126953125, -292.48101806640625, 1059.6146240234375, 40.6314697265625, -425.84228515625, -398.1889953613281, 421.28399658203125, -76.16586303710938, -223.40435791015625, -221.19345092773438, -571.0099487304688, 169.2535400390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000643.npy"}
{"epoch": 0.9441997063142438, "step": 644, "batch_size": 64, "mean": 365.61663818359375, "std": 762.953125, "min": -1870.052978515625, "p10": -567.198501586914, "median": 229.78643798828125, "p90": 1261.613037109375, "max": 2554.60400390625, "pos_frac": 0.75, "sample": [2554.60400390625, 34.943748474121094, 880.68359375, -1870.052978515625, 487.77630615234375, 378.72991943359375, -800.0504150390625, 99.91102600097656, 993.61376953125, 404.93804931640625, 43.39922332763672, -17.366378784179688, 7.026557922363281, 450.99176025390625, 173.8794708251953, 633.6552124023438, 210.28509521484375, -31.7049560546875, 294.7466735839844, -595.353759765625, 157.01580810546875, 2080.297607421875, 45.80528259277344, -617.7354736328125, 826.7144165039062, 952.4735107421875, -1.80950927734375, 490.15814208984375, -275.18768310546875, 1702.2999267578125, -38.96526336669922, -275.5877380371094, 769.224853515625, -77.07640075683594, 27.895830154418945, 0.010858535766601562, 1266.806640625, 702.519775390625, 587.1060180664062, 249.28778076171875, 135.26266479492188, -501.5028991699219, 514.7551879882812, 68.38056182861328, 1169.974853515625, 560.1796264648438, -653.0349731445312, -274.90411376953125, 161.9938201904297, 1088.9990234375, 1679.830322265625, 879.3216552734375, 88.42942810058594, -1211.15185546875, 1249.49462890625, 1199.762939453125, 369.42236328125, 1305.5919189453125, 24.53790283203125, 478.5975646972656, 562.9616088867188, 2049.344970703125, 151.0308837890625, -603.724853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000644.npy"}
{"epoch": 0.9456681350954479, "step": 645, "batch_size": 64, "mean": 176.3662872314453, "std": 705.8453369140625, "min": -1366.8255615234375, "p10": -743.0362426757812, "median": 104.98989868164062, "p90": 1097.4837036132815, "max": 2255.26513671875, "pos_frac": 0.578125, "sample": [313.8077697753906, 1112.23046875, -635.755859375, 970.2590942382812, -1366.8255615234375, -983.965087890625, -695.7048950195312, 5.998809814453125, 385.1379699707031, -142.99819946289062, -180.28041076660156, -40.6834716796875, 54.414756774902344, -7.600851058959961, -123.32858276367188, -486.3406982421875, 439.72491455078125, 328.2137145996094, 945.3888549804688, 1063.0745849609375, 698.126220703125, -393.7420654296875, 100.3248291015625, -15.647659301757812, 113.78228759765625, 19.324626922607422, 1135.1905517578125, -42.714637756347656, -763.3211059570312, -1119.6865234375, 184.41505432128906, 423.4109802246094, -667.5069580078125, 1470.482421875, 877.2969360351562, -20.115570068359375, 1850.923583984375, 828.9483642578125, -1166.8385009765625, -782.7862548828125, 1335.2598876953125, -70.32070922851562, 109.65496826171875, 347.3490295410156, 557.0069580078125, 546.0651245117188, 412.48052978515625, 326.13909912109375, 154.55828857421875, 618.6373901367188, -14.67156982421875, -259.15081787109375, 1314.0283203125, 33.226097106933594, 351.631591796875, 2255.26513671875, -133.93197631835938, 326.49017333984375, -889.96142578125, -116.28202056884766, 602.3653564453125, 141.7875518798828, -47.12260055541992, -297.6954345703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000645.npy"}
{"epoch": 0.947136563876652, "step": 646, "batch_size": 64, "mean": 309.9844665527344, "std": 604.6002197265625, "min": -1129.5433349609375, "p10": -384.34992980957026, "median": 214.14554595947266, "p90": 1109.2163452148438, "max": 2100.311279296875, "pos_frac": 0.734375, "sample": [812.947021484375, 1101.8831787109375, -88.61231231689453, 1651.62109375, 621.638427734375, 236.37327575683594, 530.8807983398438, 645.2133178710938, 935.01708984375, 23.027633666992188, 432.9496765136719, 538.4361572265625, -981.8724365234375, -3.067962646484375, 1062.5203857421875, 207.02430725097656, -403.9870300292969, 284.9433898925781, 1154.9114990234375, -60.68560028076172, 370.72320556640625, -190.2333984375, -144.24853515625, 595.7364501953125, 123.59902954101562, 358.6156005859375, 460.01251220703125, 250.02891540527344, 109.05023193359375, 908.8953857421875, 110.81365966796875, 483.5163879394531, 27.342132568359375, 148.78855895996094, -224.6038055419922, 205.97645568847656, 74.10772705078125, 1661.0924072265625, -236.5409698486328, -1129.5433349609375, 421.704833984375, 548.247314453125, 140.69398498535156, -456.6455078125, -87.49143981933594, -696.3013916015625, -70.18583679199219, 85.51863861083984, -529.7679443359375, 2100.311279296875, 1112.359130859375, 221.26678466796875, 627.219970703125, 39.07926940917969, 1296.7642822265625, -338.530029296875, -609.9617309570312, 103.89087677001953, 171.55894470214844, 173.72015380859375, 1165.1148681640625, 970.8274536132812, 510.887939453125, 274.4329528808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000646.npy"}
{"epoch": 0.9486049926578561, "step": 647, "batch_size": 64, "mean": 387.15777587890625, "std": 560.1911010742188, "min": -1176.6917724609375, "p10": -170.35709228515623, "median": 360.6318817138672, "p90": 1150.7571411132812, "max": 1908.7781982421875, "pos_frac": 0.765625, "sample": [838.01708984375, 604.2588500976562, 342.55828857421875, 245.71653747558594, 242.41928100585938, 1174.07373046875, 220.36251831054688, 359.4096984863281, 534.1854248046875, 525.6532592773438, 1519.5528564453125, 55.088897705078125, 42.68231201171875, 1730.74462890625, 769.4012451171875, 630.3580932617188, -68.61509704589844, 1132.2142333984375, 705.299072265625, -146.57962036132812, 215.48941040039062, 202.12261962890625, -63.13105773925781, -175.009033203125, 593.392333984375, 369.79388427734375, -73.09080505371094, 1908.7781982421875, -974.8396606445312, 809.5770263671875, -212.31573486328125, -291.04791259765625, 85.53689575195312, 779.9107666015625, 1158.7041015625, 841.1763916015625, 450.4326171875, 361.85406494140625, -622.6383666992188, 271.7303466796875, 616.6591186523438, 117.33238983154297, 1200.122802734375, -159.5025634765625, -76.30996704101562, 472.4400634765625, 425.5487976074219, 446.40521240234375, -87.68038177490234, 1227.10205078125, 191.1913604736328, 101.2657241821289, 1088.14013671875, 247.679931640625, -8.782180786132812, 622.9347534179688, -1176.6917724609375, 881.8397216796875, -294.5522155761719, 312.2027587890625, 492.2794494628906, 416.25555419921875, 504.4042663574219, 124.58401489257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000647.npy"}
{"epoch": 0.9500734214390602, "step": 648, "batch_size": 64, "mean": 582.78515625, "std": 817.3012084960938, "min": -1075.1087646484375, "p10": -201.61361236572264, "median": 519.8121948242188, "p90": 1654.1310913085945, "max": 3672.803955078125, "pos_frac": 0.75, "sample": [566.1473388671875, 304.0360107421875, 184.4805908203125, 532.7351684570312, 259.0008544921875, 652.513916015625, 1988.12158203125, 1736.3662109375, 673.764404296875, 1462.2491455078125, -43.645172119140625, -683.1300048828125, 46.516632080078125, 965.9702758789062, 204.1143798828125, -202.66468811035156, 1293.268798828125, 551.1921997070312, -32.63862609863281, -132.0013427734375, -26.769453048706055, 525.2127075195312, -88.11890411376953, 988.452392578125, -204.2274627685547, 2036.5552978515625, 633.8942260742188, 109.35116577148438, -49.80451965332031, -194.9854736328125, 832.6618041992188, 803.598876953125, -199.16110229492188, 3672.803955078125, 666.9527587890625, 535.5091552734375, -1075.1087646484375, 548.327392578125, 56.15924072265625, -696.2679443359375, -214.35235595703125, 296.99481201171875, 2416.679443359375, 727.8580322265625, 735.8492431640625, 401.5873718261719, 1379.2947998046875, -83.9757080078125, 578.4903564453125, 648.0397338867188, 1151.2664794921875, 366.3113708496094, 316.5121154785156, 514.4116821289062, 1367.7884521484375, 118.72908782958984, 497.0061340332031, 829.447998046875, 443.8201904296875, 2505.831298828125, 1325.7320556640625, 225.88482666015625, -404.3041687011719, 1951.91064453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000648.npy"}
{"epoch": 0.9515418502202643, "step": 649, "batch_size": 64, "mean": 283.9723205566406, "std": 691.89990234375, "min": -911.466064453125, "p10": -478.4241302490234, "median": 211.6774139404297, "p90": 1081.8225341796876, "max": 3335.921630859375, "pos_frac": 0.609375, "sample": [573.4129638671875, 14.948371887207031, 474.8147888183594, -128.92166137695312, 541.23876953125, -311.133056640625, -11.27755355834961, 160.15130615234375, 737.8466796875, 346.26287841796875, 572.024169921875, -140.84576416015625, -531.2295532226562, 67.19755554199219, -578.0602416992188, -176.90475463867188, -99.93406677246094, 395.6227111816406, -364.8854064941406, 288.31671142578125, 661.5909423828125, 189.9741668701172, -498.3694152832031, -158.4207000732422, 858.2232666015625, -820.7417602539062, 764.6583862304688, 1029.598876953125, -62.02665328979492, 585.185302734375, 352.9161682128906, -405.4273986816406, 58.247802734375, 39.946327209472656, 950.0841064453125, 518.7884521484375, 814.4951171875, 370.9127197265625, 853.2954711914062, -430.1346435546875, 1345.179443359375, 442.10662841796875, -4.525943756103516, 1178.587890625, 889.4044189453125, -247.63644409179688, 1489.6351318359375, 3335.921630859375, 483.017822265625, 357.39984130859375, -142.46153259277344, 233.3806610107422, 52.65235900878906, 837.1438598632812, -358.4434814453125, 1269.314453125, 1299.53271484375, -518.64697265625, -13.328479766845703, -151.50311279296875, -864.7972412109375, -911.466064453125, 1104.2041015625, -431.8851318359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000649.npy"}
{"epoch": 0.9530102790014684, "step": 650, "batch_size": 64, "mean": 437.9971008300781, "std": 746.2782592773438, "min": -1260.77978515625, "p10": -482.35199890136715, "median": 297.781982421875, "p90": 1371.5266235351564, "max": 2593.650390625, "pos_frac": 0.765625, "sample": [206.4737548828125, -663.9244995117188, 818.20654296875, 791.3927612304688, 199.86727905273438, 1008.4638671875, 304.4488220214844, -911.9646606445312, 2379.168701171875, 661.8443603515625, 1122.83544921875, -655.25, 1450.1043701171875, 1027.667236328125, 919.0204467773438, 1381.15185546875, 283.633056640625, 1743.3798828125, 1166.252197265625, 2593.650390625, 821.7991333007812, -107.32164764404297, 1570.1090087890625, 597.3379516601562, 259.95501708984375, 2.294872283935547, -105.52130126953125, 490.7139587402344, 182.59439086914062, 421.8092956542969, 700.3566284179688, -82.12605285644531, -277.5670471191406, -971.5440673828125, -445.15875244140625, 244.18801879882812, -24.088329315185547, 1991.3017578125, -127.60227966308594, 18.1064453125, 551.1796875, 200.54287719726562, -498.2919616699219, 646.8870239257812, 651.837646484375, 609.0074462890625, 76.13304138183594, 887.43701171875, 115.41840362548828, 1349.0677490234375, 291.1151428222656, 271.7731628417969, 644.0758056640625, 858.967041015625, 868.0796508789062, 42.33489227294922, 436.88360595703125, -583.8198852539062, -1260.77978515625, 239.7923583984375, 755.080322265625, 108.07056427001953, -401.2668151855469, 186.23208618164062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000650.npy"}
{"epoch": 0.9544787077826725, "step": 651, "batch_size": 64, "mean": 555.2301025390625, "std": 632.68359375, "min": -617.2399291992188, "p10": -40.655047035217265, "median": 463.89703369140625, "p90": 1339.6764648437504, "max": 2323.61669921875, "pos_frac": 0.859375, "sample": [905.8801879882812, 626.5337524414062, 2312.0517578125, 140.97918701171875, -454.86029052734375, -48.8929443359375, 401.73211669921875, 601.006591796875, 35.56898498535156, 151.1443328857422, 1221.7608642578125, 2323.61669921875, 311.1827087402344, -146.11199951171875, 141.747314453125, -239.75173950195312, 39.79227066040039, 526.0668334960938, -310.2259216308594, 899.6354370117188, 1801.584716796875, 416.56689453125, 832.4459228515625, 318.05767822265625, 715.3911743164062, 55.60444641113281, -21.433286666870117, 0.11785888671875, 194.4150390625, 719.5709228515625, 189.7562713623047, -6.283164978027344, 1111.0374755859375, 109.69329833984375, 1241.0289306640625, 925.9961547851562, 585.3931884765625, 54.980072021484375, 777.0640869140625, 3.7274646759033203, 457.74609375, 1156.7781982421875, -338.0657043457031, 530.063232421875, 673.4662475585938, 708.6455688476562, 53.93927001953125, 979.1613159179688, 107.56132507324219, 654.1876220703125, 202.50375366210938, 313.733154296875, 1381.9539794921875, 1915.9207763671875, 383.99566650390625, -617.2399291992188, 1079.906494140625, 470.0479736328125, 1800.630126953125, 479.8725280761719, 1131.1300048828125, 1509.6474609375, 921.29443359375, 114.27529907226562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000651.npy"}
{"epoch": 0.9559471365638766, "step": 652, "batch_size": 64, "mean": 429.65704345703125, "std": 718.1599731445312, "min": -1573.8304443359375, "p10": -274.2612274169921, "median": 284.01219177246094, "p90": 1326.8586669921874, "max": 2670.940185546875, "pos_frac": 0.78125, "sample": [374.0682373046875, 170.52517700195312, 782.8931274414062, -1137.935302734375, 596.3657836914062, 454.55267333984375, 234.67332458496094, -516.3482055664062, 712.3096923828125, 159.827880859375, 1327.8092041015625, 1432.4105224609375, 1324.6407470703125, -1573.8304443359375, 71.5396957397461, 572.238037109375, 41.001739501953125, 957.3873901367188, 1480.904541015625, -315.1583557128906, 1045.621337890625, 387.7356872558594, 83.26561737060547, -197.44927978515625, 16.35935401916504, -23.23388671875, 961.6231689453125, -143.46128845214844, 397.5616455078125, 121.34778594970703, 77.30632019042969, 2670.940185546875, -307.1806335449219, 2347.96630859375, 294.6945495605469, 1638.354736328125, 21.904098510742188, 76.4127197265625, 67.41064453125, 29.662261962890625, 465.12933349609375, -41.44525909423828, 203.43472290039062, -119.66434478759766, 992.5137939453125, 888.39404296875, 937.8795166015625, -96.3819351196289, 655.3209838867188, 801.7808227539062, 1860.5335693359375, 87.46870422363281, 862.16796875, 246.98818969726562, -559.034423828125, 418.3507995605469, 1308.3494873046875, -330.58392333984375, 416.8954162597656, 844.6439208984375, 271.401611328125, 416.49444580078125, 273.329833984375, -22.635040283203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000652.npy"}
{"epoch": 0.9574155653450808, "step": 653, "batch_size": 64, "mean": 386.3274841308594, "std": 696.8011474609375, "min": -1242.1859130859375, "p10": -401.17788391113277, "median": 286.0798645019531, "p90": 1325.7160766601564, "max": 2233.72607421875, "pos_frac": 0.734375, "sample": [885.6083984375, -1.3316802978515625, -121.15461730957031, 2148.22998046875, 182.00978088378906, -375.8140563964844, 100.76606750488281, -424.988037109375, 46.69439697265625, 1017.4942626953125, 536.6033935546875, -369.421630859375, 23.46826934814453, 1401.0335693359375, 888.2123413085938, 1354.9951171875, 894.1337890625, 1257.3983154296875, 1233.597412109375, -194.47430419921875, 159.3421630859375, 326.2259826660156, 1746.511474609375, 399.4654846191406, 526.1203002929688, 735.9226684570312, 1047.4276123046875, 302.0953369140625, 11.844276428222656, -522.85498046875, 1434.0521240234375, 270.06439208984375, 593.1755981445312, -1242.1859130859375, -222.42391967773438, -173.3897247314453, 353.596435546875, 588.44140625, -675.923828125, 2105.89794921875, 2233.72607421875, 640.5563354492188, 25.959455490112305, -32.445213317871094, 585.9381103515625, 609.554443359375, 19.013916015625, -412.048095703125, 265.8813171386719, 255.2335205078125, 517.5423583984375, 146.7915496826172, 832.431396484375, 168.85025024414062, 528.7252807617188, -854.0487670898438, -435.3395080566406, 386.49334716796875, 687.042724609375, 31.235490798950195, 567.3364868164062, -299.2105712890625, 230.71121215820312, -221.43821716308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000653.npy"}
{"epoch": 0.9588839941262849, "step": 654, "batch_size": 64, "mean": 376.21954345703125, "std": 629.2120361328125, "min": -470.1534423828125, "p10": -361.5203918457031, "median": 281.99497985839844, "p90": 1344.1203369140626, "max": 2126.8662109375, "pos_frac": 0.65625, "sample": [104.74312591552734, 368.71771240234375, -229.46905517578125, 105.56616973876953, 494.40374755859375, -136.7861785888672, -375.142822265625, 392.521484375, 14.400531768798828, 794.9298095703125, 731.6743774414062, -331.6388854980469, 111.82866668701172, 467.184814453125, -404.24066162109375, -208.9183349609375, -71.11565399169922, 2126.8662109375, 991.2433471679688, 585.38720703125, 550.9063720703125, 1588.7542724609375, 481.56658935546875, 311.18414306640625, 1533.3316650390625, 700.0968627929688, -166.9813690185547, 698.2359008789062, 123.64443969726562, -107.646240234375, -196.2064971923828, 1290.450927734375, 372.05035400390625, 53.096466064453125, 31.42767333984375, -26.59178924560547, 886.6712036132812, 250.83895874023438, 232.93643188476562, 499.6878967285156, 442.2735595703125, 838.3359985351562, 756.9825439453125, -298.9841613769531, -470.1534423828125, 910.4248657226562, 328.06341552734375, 1760.80859375, 1353.060791015625, -322.3517150878906, -83.98779296875, 1509.6405029296875, 606.6956787109375, 252.80581665039062, -85.34123992919922, -387.67041015625, 594.9697875976562, -374.3267517089844, 1813.306396484375, 1323.25927734375, -25.112335205078125, -378.46185302734375, -405.76220703125, -220.034912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000654.npy"}
{"epoch": 0.960352422907489, "step": 655, "batch_size": 64, "mean": 392.17303466796875, "std": 641.191650390625, "min": -787.6956787109375, "p10": -330.0212158203125, "median": 259.9714889526367, "p90": 1189.644616699219, "max": 2412.729736328125, "pos_frac": 0.71875, "sample": [-405.8531494140625, -787.6956787109375, 1370.6368408203125, 213.48745727539062, 53.03968048095703, 1208.336669921875, 895.14697265625, 899.7201538085938, 317.2815856933594, -232.6080322265625, 704.9823608398438, 49.25164794921875, -59.96148681640625, -591.28759765625, -6.794347763061523, 1145.8558349609375, -138.84811401367188, 863.7796630859375, -341.037353515625, 1055.3468017578125, -255.93377685546875, 417.09002685546875, 942.4141235351562, -567.9127807617188, -304.31689453125, 583.3116455078125, 307.5653381347656, 50.532745361328125, -141.4369354248047, 267.83837890625, -181.45468139648438, 1710.0155029296875, 164.48255920410156, 226.59519958496094, 647.391357421875, 865.2230834960938, -652.3670043945312, 798.5279541015625, 587.158935546875, 1068.2587890625, 392.1493225097656, 76.7476577758789, 638.694580078125, 2412.729736328125, 1627.2144775390625, 11.329452514648438, 403.4336242675781, -519.8359985351562, 1155.377685546875, 567.2731323242188, -104.84111022949219, 170.7430419921875, 60.61895751953125, 233.66551208496094, 1204.3304443359375, 252.10459899902344, 1631.8338623046875, -289.15350341796875, -136.3993682861328, 540.7802734375, 683.12841796875, 244.7725372314453, 156.0328369140625, 940.58154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000655.npy"}
{"epoch": 0.9618208516886931, "step": 656, "batch_size": 64, "mean": 663.841796875, "std": 742.0775146484375, "min": -595.9990844726562, "p10": -116.99443130493164, "median": 427.6307373046875, "p90": 1797.725927734375, "max": 2886.087646484375, "pos_frac": 0.8125, "sample": [1552.0035400390625, 732.187744140625, -595.9990844726562, -28.678119659423828, 217.70745849609375, 297.62518310546875, 333.62078857421875, 570.807373046875, 319.9635925292969, 798.05322265625, 806.850830078125, 664.363525390625, 299.0284423828125, 68.75662231445312, 399.020751953125, 1810.583740234375, -190.41326904296875, 65.33718872070312, 1767.724365234375, 166.73556518554688, 840.1636962890625, -103.1427001953125, 2103.333251953125, -154.17782592773438, -115.12008666992188, 234.41473388671875, 959.3143310546875, 1189.04052734375, 446.5701904296875, 1092.616943359375, 387.4407653808594, 2886.087646484375, 711.9395141601562, -150.5763397216797, 2367.21728515625, 882.0943603515625, 457.83331298828125, 1028.6763916015625, 2448.657958984375, 93.68881225585938, 2240.18896484375, -160.2322235107422, 1260.3807373046875, 54.86181640625, 1007.8726806640625, 1020.9306030273438, 378.49688720703125, 2223.266845703125, 360.16571044921875, 261.6742248535156, 881.4586791992188, 144.55662536621094, -161.08438110351562, 408.6912841796875, 960.1546020507812, 268.4432678222656, -25.731460571289062, 317.292724609375, -117.79772186279297, 1134.5316162109375, 1031.792724609375, -49.98607635498047, 655.3378295898438, 729.2584228515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000656.npy"}
{"epoch": 0.9632892804698973, "step": 657, "batch_size": 64, "mean": 533.8377075195312, "std": 722.1788330078125, "min": -1615.4117431640625, "p10": -112.0333885192871, "median": 462.8014221191406, "p90": 1394.8278930664062, "max": 2553.186279296875, "pos_frac": 0.859375, "sample": [1302.25, 29.24970245361328, 734.3887329101562, 477.77191162109375, 1507.7642822265625, 1631.8961181640625, 800.8225708007812, 138.04783630371094, 1396.8212890625, -128.375732421875, -367.81036376953125, 604.3031005859375, 1390.1766357421875, -100.89482116699219, 198.0516815185547, 505.1422119140625, 60.18998718261719, 348.6540222167969, 102.16295623779297, 1517.83642578125, 2454.5966796875, 511.4169921875, 80.07402801513672, 2553.186279296875, 1289.8458251953125, 1249.4249267578125, 234.89772033691406, 24.41632080078125, 694.09423828125, 641.274658203125, 827.538818359375, 447.8309326171875, 33.900146484375, -1615.4117431640625, 566.0064086914062, 602.1412353515625, 280.3988037109375, -116.80706024169922, 17.7139892578125, 332.62896728515625, 661.264404296875, -1164.7100830078125, -303.7391662597656, 607.9803466796875, 929.7191162109375, 130.8018341064453, 239.98692321777344, 585.876708984375, 443.47039794921875, 55.97052764892578, 724.4403686523438, 860.2974243164062, 180.48924255371094, 806.4689331054688, 416.53668212890625, 323.0288391113281, 1285.4688720703125, 2525.812744140625, 382.83258056640625, -223.57907104492188, 275.9295959472656, -71.46296691894531, 680.71240234375, 554.3991088867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000657.npy"}
{"epoch": 0.9647577092511013, "step": 658, "batch_size": 64, "mean": 425.004638671875, "std": 627.71630859375, "min": -1123.6024169921875, "p10": -413.93569641113277, "median": 441.28082275390625, "p90": 1205.7565185546875, "max": 2196.323974609375, "pos_frac": 0.734375, "sample": [907.8508911132812, 856.5645751953125, 795.4217529296875, -339.563720703125, 440.8700866699219, -230.81085205078125, 1150.5654296875, 267.8132019042969, -423.98175048828125, 440.8798522949219, 341.87054443359375, 1496.888671875, -3.3876495361328125, 294.5151062011719, 329.17010498046875, -537.42041015625, 441.6817932128906, 467.4181823730469, 111.77433013916016, -54.5998649597168, 564.6148681640625, 185.4515380859375, 451.74163818359375, 149.63174438476562, -1123.6024169921875, 915.1700439453125, -464.8529357910156, 581.1267700195312, 518.7780151367188, -154.61386108398438, -149.0266571044922, 274.8717041015625, -65.802001953125, 479.7236328125, 301.1214599609375, 1542.8944091796875, -30.71109962463379, 1209.7406005859375, 703.90087890625, 1137.321533203125, 106.86524963378906, 387.7564697265625, 656.4102783203125, 1516.6156005859375, 1196.4603271484375, 586.8968505859375, -393.7590637207031, 565.0569458007812, 851.8216552734375, 849.88818359375, -574.541259765625, 1382.4381103515625, -422.58282470703125, 2196.323974609375, 712.4620361328125, 1196.3209228515625, 603.92724609375, 1468.352783203125, -297.5545349121094, 754.67578125, 52.33448791503906, 466.7868347167969, 184.17779541015625, -627.837890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000658.npy"}
{"epoch": 0.9662261380323054, "step": 659, "batch_size": 64, "mean": 410.4945068359375, "std": 606.286865234375, "min": -792.6492309570312, "p10": -232.67102355957027, "median": 360.42051696777344, "p90": 1162.0375244140628, "max": 2621.2294921875, "pos_frac": 0.75, "sample": [686.4600219726562, -24.092498779296875, 185.83660888671875, 943.46875, -91.98033142089844, -250.79579162597656, 1221.4151611328125, -347.4940185546875, -554.1766967773438, -141.5944061279297, 1106.8687744140625, 335.90655517578125, 551.7704467773438, -50.84013366699219, 76.67433166503906, 176.16766357421875, 614.4139404296875, 174.50485229492188, 225.9263916015625, 158.31423950195312, 346.9106140136719, 546.7618408203125, -550.890625, 2621.2294921875, -792.6492309570312, 1369.3828125, 391.6597595214844, 619.795654296875, 1253.0948486328125, 320.481201171875, 385.8417663574219, 381.8612365722656, 0.38999366760253906, -43.76826477050781, 575.9168701171875, 373.930419921875, 10.098800659179688, -0.4695701599121094, 59.741912841796875, 1185.6812744140625, -623.9349975585938, -11.458122253417969, 752.4548950195312, -190.37989807128906, 725.3429565429688, 75.93817138671875, -60.340110778808594, 684.6348266601562, 1792.7523193359375, 1839.853759765625, 849.4213256835938, 220.3753204345703, 811.7774658203125, 465.0611267089844, 397.82489013671875, 293.2962646484375, 1021.1118774414062, 1087.8514404296875, 547.7882080078125, -323.2558898925781, 832.4710693359375, 102.71607971191406, 444.90399169921875, 483.6865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000659.npy"}
{"epoch": 0.9676945668135095, "step": 660, "batch_size": 64, "mean": 535.7288818359375, "std": 686.8460693359375, "min": -1109.2269287109375, "p10": -210.54115905761716, "median": 456.9500274658203, "p90": 1485.5112548828126, "max": 2505.447021484375, "pos_frac": 0.765625, "sample": [-146.77210998535156, 302.09552001953125, 1149.1578369140625, 477.6556091308594, 471.52752685546875, 479.89837646484375, 660.6152954101562, 1493.1114501953125, 1160.358642578125, 889.4780883789062, 328.628662109375, -420.87005615234375, 1218.00927734375, 612.8610229492188, 906.37451171875, 1753.377197265625, -201.7771453857422, -41.79078674316406, -186.2024383544922, -289.4880065917969, 1153.4976806640625, 1587.18310546875, 81.27112579345703, 442.3725280761719, 804.5443725585938, 1964.881591796875, 329.3546142578125, -214.2971649169922, 780.9395751953125, 371.2578125, 111.5371322631836, 1467.7774658203125, -587.6585083007812, 2505.447021484375, 294.6852111816406, 649.3755493164062, 815.8823852539062, 72.20161437988281, 440.6597900390625, 835.3905639648438, 257.4973449707031, 110.43086242675781, 751.3684692382812, 850.4095458984375, -145.19189453125, -61.611114501953125, 123.46524810791016, 1684.071533203125, 961.8048706054688, 1124.8023681640625, -328.8509521484375, -20.400184631347656, -569.1102294921875, 701.675537109375, 1055.9508056640625, 636.4679565429688, 137.28463745117188, 326.2316589355469, 598.5558471679688, 2049.13232421875, -150.811279296875, -1109.2269287109375, 361.69110107421875, 418.4600524902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000660.npy"}
{"epoch": 0.9691629955947136, "step": 661, "batch_size": 64, "mean": 465.5032958984375, "std": 677.452392578125, "min": -772.16943359375, "p10": -112.08723831176756, "median": 369.09193420410156, "p90": 1090.4141967773446, "max": 3463.065673828125, "pos_frac": 0.828125, "sample": [206.01866149902344, -98.8824234008789, 851.2257080078125, -50.81776428222656, 370.9518127441406, 71.58134460449219, 586.7015380859375, -193.26889038085938, 676.8629150390625, 301.046875, 1173.67822265625, 329.30224609375, 651.1369018554688, 617.3248291015625, 3463.065673828125, 866.23828125, 192.8802490234375, 29.171051025390625, -772.16943359375, 130.9844970703125, -32.89048385620117, 840.1377563476562, 138.45419311523438, 402.54736328125, 253.7222900390625, 208.28553771972656, 319.82122802734375, 896.1314697265625, 1300.921142578125, 54.95771789550781, 379.16864013671875, 649.8655395507812, 548.1514892578125, 1226.051025390625, 532.5082397460938, 299.58184814453125, 144.05340576171875, 313.2959289550781, 243.025146484375, 595.6913452148438, -590.285400390625, 367.2320556640625, -117.74644470214844, 783.9092407226562, 864.0511474609375, -654.5482177734375, 695.404052734375, 712.7979736328125, -13.933618545532227, 670.4627075195312, 2972.087890625, -620.9369506835938, 356.2706298828125, 36.66447448730469, 828.7877197265625, 1509.48193359375, 84.01622772216797, 574.12353515625, -132.73309326171875, 1325.5045166015625, 480.714599609375, 496.52398681640625, 447.71185302734375, 0.13568878173828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000661.npy"}
{"epoch": 0.9706314243759178, "step": 662, "batch_size": 64, "mean": 375.7431335449219, "std": 716.1057739257812, "min": -1233.657470703125, "p10": -551.084161376953, "median": 413.9751281738281, "p90": 1176.2091796875002, "max": 2438.3154296875, "pos_frac": 0.71875, "sample": [-53.514366149902344, 901.9761962890625, 1267.05224609375, -1233.657470703125, 621.3800048828125, -504.52471923828125, 447.32061767578125, 1064.963134765625, 1909.563232421875, 24.441566467285156, 891.450927734375, -571.0382080078125, -1133.2783203125, -226.37013244628906, 1479.47265625, 77.97411346435547, -9.033456802368164, 417.8251953125, 2438.3154296875, 860.6705322265625, 1191.273193359375, -180.28680419921875, 410.12506103515625, -156.231689453125, 145.88720703125, 835.489501953125, 130.34434509277344, 1029.6907958984375, 475.0544128417969, 328.24249267578125, 391.9541015625, -257.3013916015625, 396.5936279296875, -347.0545959472656, 495.41119384765625, 1082.684814453125, 633.7841796875, 430.6678771972656, 690.4268798828125, 634.6729736328125, 34.50315856933594, 143.3159942626953, 966.5218505859375, 2083.486328125, 728.2730712890625, 1355.583740234375, -849.14404296875, 466.0769958496094, 1141.059814453125, 395.65960693359375, -619.76611328125, 50.11355209350586, -94.53526306152344, 587.7451782226562, -651.1109008789062, -210.54434204101562, 600.8978271484375, 578.4786376953125, 342.2068176269531, 548.1912841796875, 594.5272827148438, -892.094482421875, -296.406982421875, 12.104415893554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000662.npy"}
{"epoch": 0.9720998531571219, "step": 663, "batch_size": 64, "mean": 447.7265930175781, "std": 760.4644775390625, "min": -1725.1246337890625, "p10": -230.35004119873037, "median": 306.31959533691406, "p90": 1197.1114868164063, "max": 3335.766845703125, "pos_frac": 0.765625, "sample": [904.897705078125, 315.1590576171875, 856.2091674804688, 51.440948486328125, 414.2200927734375, 1154.1468505859375, 212.85679626464844, 297.4801330566406, -58.224853515625, 375.5027770996094, 80.79888153076172, 147.6477508544922, 1512.0777587890625, 883.9238891601562, 63.77702331542969, 950.1810913085938, 714.8406982421875, 30.9602108001709, -1725.1246337890625, 315.38885498046875, 280.522705078125, 580.0580444335938, -324.41265869140625, 769.8816528320312, 692.0972900390625, 373.5980224609375, 694.2766723632812, 143.02243041992188, 460.9901123046875, 3335.766845703125, -320.333740234375, 1148.02978515625, 1675.8509521484375, -5.8073577880859375, 599.8054809570312, 2279.760498046875, 1638.2628173828125, 73.72923278808594, 30.059175491333008, 1998.7388916015625, 1103.8428955078125, -267.8226623535156, 158.82958984375, 382.03948974609375, -136.8524169921875, 660.2676391601562, 1077.2554931640625, 154.7359161376953, 138.14126586914062, 1015.229736328125, 230.44935607910156, -73.6956558227539, 1215.52490234375, -1149.720458984375, 870.7950439453125, -48.44124221801758, -554.1622314453125, -142.91392517089844, 72.37826538085938, 714.9409790039062, -345.02801513671875, -72.98807525634766, -111.53463745117188, 141.1739501953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000663.npy"}
{"epoch": 0.973568281938326, "step": 664, "batch_size": 64, "mean": 511.424560546875, "std": 710.501220703125, "min": -849.461181640625, "p10": -229.18840179443356, "median": 422.4744567871094, "p90": 1339.481494140625, "max": 2515.443359375, "pos_frac": 0.734375, "sample": [1139.924560546875, -39.44793701171875, 597.1029052734375, 682.5516357421875, 891.042236328125, -81.2068862915039, 369.1539306640625, 2095.865234375, -122.2102279663086, 1603.1341552734375, -477.4599609375, 762.7429809570312, 472.05157470703125, 324.9580383300781, 564.7250366210938, 240.6344451904297, -849.461181640625, 445.1011962890625, -71.5020980834961, 551.8833618164062, 1350.6854248046875, 1430.333251953125, 87.40264892578125, 1197.98486328125, -302.4676818847656, 123.34835815429688, 2152.671875, -192.22250366210938, 580.115234375, -483.3280334472656, 1191.397216796875, 163.32498168945312, -168.19293212890625, 1214.402587890625, 519.7014770507812, 415.5909729003906, -766.6781616210938, 429.3579406738281, 703.5972900390625, 1147.284423828125, 155.0816192626953, 542.6842041015625, 1027.54833984375, -92.026123046875, 977.4496459960938, 2228.73095703125, 998.2362060546875, 239.26846313476562, 2515.443359375, -191.365478515625, -162.55728149414062, 1313.3389892578125, 1199.905029296875, 233.3221893310547, 218.20504760742188, 542.294189453125, 206.77920532226562, 330.79296875, -245.0309295654297, -53.733734130859375, 401.0891418457031, 5.175445556640625, 872.4642944335938, -425.81585693359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000664.npy"}
{"epoch": 0.9750367107195301, "step": 665, "batch_size": 64, "mean": 426.11090087890625, "std": 537.1138916015625, "min": -692.0691528320312, "p10": -124.4433479309082, "median": 371.4980010986328, "p90": 1165.7152587890628, "max": 1807.155029296875, "pos_frac": 0.78125, "sample": [-125.2343978881836, 25.89175033569336, 439.0529479980469, 405.1513977050781, -114.5959243774414, 452.9664306640625, 666.02978515625, -64.53271484375, 1573.406982421875, 441.8045654296875, 314.20806884765625, 561.2337646484375, 120.45553588867188, -519.1315307617188, 620.3045654296875, 82.6361083984375, -68.06314086914062, 238.93740844726562, -31.833847045898438, 545.4356079101562, 769.6808471679688, 211.98377990722656, 461.525146484375, 262.9261779785156, -84.85905456542969, 316.8825988769531, -692.0691528320312, 1568.9356689453125, 766.4189453125, -29.956119537353516, 780.9472045898438, 309.612548828125, 674.3849487304688, 394.1161804199219, 1189.218017578125, 229.6259765625, 1208.2513427734375, 544.60791015625, 465.1910095214844, 216.78892517089844, 261.2709045410156, 1732.669189453125, 591.09375, -340.5179443359375, 608.0776977539062, 906.507080078125, 348.87982177734375, -484.24725341796875, 561.8385009765625, 1110.87548828125, 182.7938995361328, 187.03074645996094, 939.8101196289062, 223.82302856445312, 1709.824951171875, -244.1142120361328, 704.26904296875, -192.51235961914062, 157.92965698242188, 904.7545166015625, 1807.155029296875, 51.93769836425781, 536.2088012695312, -122.59756469726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000665.npy"}
{"epoch": 0.9765051395007343, "step": 666, "batch_size": 64, "mean": 424.9931640625, "std": 626.6178588867188, "min": -996.3318481445312, "p10": -346.2975708007812, "median": 330.5122375488281, "p90": 1065.2236450195312, "max": 2719.95849609375, "pos_frac": 0.75, "sample": [762.211669921875, 373.90545654296875, 719.5465698242188, 411.97821044921875, -414.8540344238281, 705.4511108398438, 1067.625, 995.6845703125, 141.93399047851562, 1045.3856201171875, -36.13835906982422, 272.3442687988281, 371.5265808105469, -496.168212890625, -365.69342041015625, 223.57749938964844, 260.284423828125, -142.78045654296875, -114.47772216796875, -83.42303466796875, 1317.9093017578125, -7.335990905761719, 762.3317260742188, 455.31298828125, 2106.30029296875, -8.583511352539062, 812.3555297851562, 321.8121337890625, -164.21945190429688, -682.1165161132812, 328.6189270019531, 738.3411865234375, 304.55096435546875, 1160.1103515625, 918.0474853515625, 457.93145751953125, 937.9801635742188, -369.600830078125, 408.5360107421875, 599.4705810546875, 48.7943229675293, 757.2938842773438, -301.04058837890625, 1408.060791015625, 380.2408447265625, 256.61041259765625, -996.3318481445312, 1059.6204833984375, 329.1658630371094, 59.404937744140625, 424.00604248046875, 731.9417114257812, 836.089111328125, -28.531612396240234, 221.93701171875, 146.0821533203125, 242.8143310546875, 318.6068115234375, 2719.95849609375, 247.29347229003906, -532.271484375, 954.4364013671875, 331.8586120605469, 1487.846923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000666.npy"}
{"epoch": 0.9779735682819384, "step": 667, "batch_size": 64, "mean": 507.36602783203125, "std": 673.9800415039062, "min": -815.4957885742188, "p10": -269.30458374023436, "median": 486.3523406982422, "p90": 1294.4839965820313, "max": 2346.6611328125, "pos_frac": 0.8125, "sample": [466.0671691894531, -581.4059448242188, 684.3840942382812, -707.5418090820312, -77.86117553710938, 1004.9617919921875, 1010.1485595703125, 611.1516723632812, 529.1193237304688, 1301.7608642578125, 269.576904296875, 169.19314575195312, 105.66039276123047, 380.673583984375, 323.06304931640625, -815.4957885742188, 757.3226928710938, 546.3109741210938, 1791.1258544921875, 930.4663696289062, 782.0590209960938, 277.0075988769531, 1277.504638671875, 59.739967346191406, 647.21142578125, 310.23944091796875, 972.4899291992188, -199.2449951171875, 989.7244873046875, 843.4273071289062, 761.2156982421875, 635.871337890625, 1093.49267578125, 651.36767578125, 442.6544494628906, -244.86807250976562, 411.64117431640625, 506.63751220703125, 385.5777282714844, 6.621391296386719, -92.16697692871094, 1604.6085205078125, 1860.5169677734375, 1.9739990234375, 726.2849731445312, 1506.6998291015625, 727.9713745117188, 200.74728393554688, 134.8646240234375, 793.025634765625, 6.053306579589844, 877.2757568359375, -487.524658203125, 158.22877502441406, 2322.224853515625, 2346.6611328125, 79.07762908935547, 1047.86474609375, -642.048095703125, 262.3110656738281, -668.2569580078125, -279.7773742675781, -101.07431030273438, 776.8314819335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000667.npy"}
{"epoch": 0.9794419970631424, "step": 668, "batch_size": 64, "mean": 348.3375244140625, "std": 792.5498657226562, "min": -1351.2894287109375, "p10": -572.6430236816406, "median": 282.11138916015625, "p90": 1286.4211181640626, "max": 2743.81591796875, "pos_frac": 0.703125, "sample": [524.234619140625, 273.0933837890625, 1342.17138671875, -1348.9173583984375, 165.57447814941406, 966.7347412109375, 1463.1480712890625, 217.7235565185547, 1091.91748046875, 475.60467529296875, -679.4807739257812, 705.8533325195312, 18.55681610107422, 71.2088623046875, 358.43157958984375, 1074.2059326171875, 253.3233184814453, 27.453872680664062, -492.8890380859375, -606.8233032226562, -969.09228515625, 1005.0530395507812, -449.94964599609375, 827.9644165039062, -177.94271850585938, 837.2213745117188, 357.8525695800781, 461.2839050292969, 36.93272399902344, 693.05419921875, 336.025146484375, 654.6953735351562, 1250.1279296875, -475.2087707519531, 1061.600830078125, 412.19842529296875, -896.6778564453125, 1110.0653076171875, 74.13227081298828, -234.25961303710938, 184.17498779296875, -70.94306182861328, 1494.115234375, -175.15074157714844, -620.4187622070312, 1221.8338623046875, -135.52032470703125, -464.38409423828125, 299.7160339355469, -32.560707092285156, 309.6898193359375, 222.1920166015625, 291.12939453125, 247.63015747070312, -201.68734741210938, 1301.975341796875, 404.6623229980469, 338.23065185546875, 2420.83447265625, -1351.2894287109375, -95.68846893310547, 2085.578857421875, 59.464202880859375, 2743.81591796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000668.npy"}
{"epoch": 0.9809104258443465, "step": 669, "batch_size": 64, "mean": 507.1175842285156, "std": 676.0285034179688, "min": -1356.3023681640625, "p10": -228.6688720703125, "median": 434.3072052001953, "p90": 1442.2489013671877, "max": 2602.919677734375, "pos_frac": 0.8125, "sample": [194.2798309326172, 53.215240478515625, -67.4372787475586, 1055.9583740234375, -890.6073608398438, 298.13018798828125, 695.6976928710938, 740.9987182617188, 384.364501953125, -83.13042449951172, 2602.919677734375, -231.77798461914062, 26.762680053710938, 330.51153564453125, -299.9055480957031, 396.2580871582031, 1555.0626220703125, -62.94927215576172, 119.97272491455078, 210.0294952392578, 547.170654296875, 779.1165161132812, 1590.792236328125, 1321.590087890625, 77.89445495605469, 1750.38623046875, 50.8875732421875, 1521.00146484375, 1116.098876953125, 63.80051040649414, 582.3292236328125, 1296.5423583984375, -221.41427612304688, 579.6376953125, 1460.655029296875, -598.847900390625, -288.2176208496094, 824.8696899414062, 815.010009765625, 903.0178833007812, 1399.30126953125, 371.77593994140625, 129.13623046875, 324.2405700683594, -1356.3023681640625, 837.74853515625, 940.7730712890625, 623.7074584960938, 428.42376708984375, 268.24102783203125, -546.2671508789062, 1014.0685424804688, 440.1906433105469, 491.9889831542969, 606.8343505859375, 781.3505249023438, 72.47750091552734, 252.98208618164062, 506.3759460449219, 1349.8546142578125, -36.51565170288086, 1525.15380859375, 446.5715637207031, 382.7388610839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000669.npy"}
{"epoch": 0.9823788546255506, "step": 670, "batch_size": 64, "mean": 519.20654296875, "std": 697.5200805664062, "min": -1307.198974609375, "p10": -274.8091552734375, "median": 378.0685272216797, "p90": 1460.0151611328126, "max": 1915.34130859375, "pos_frac": 0.75, "sample": [204.33535766601562, 509.56219482421875, 240.0787353515625, 1462.798583984375, 479.2277526855469, -215.64776611328125, -209.83016967773438, -22.04136848449707, 1077.1431884765625, 617.1071166992188, 297.8890380859375, -112.50067138671875, -1307.198974609375, 1915.34130859375, 1249.6339111328125, 1801.450927734375, 752.5563354492188, 389.7464294433594, 1229.23193359375, 615.418212890625, 174.6421661376953, -571.0606079101562, 1417.2930908203125, -263.6898193359375, -579.7752075195312, 366.390625, 1551.4234619140625, 34.145511627197266, 1081.206787109375, -524.207275390625, 1551.2325439453125, 820.281005859375, -245.87709045410156, 252.56964111328125, 221.0182647705078, 1433.8634033203125, 1280.14794921875, -313.439453125, 201.24478149414062, 1401.4903564453125, 406.11871337890625, 1234.6026611328125, 1043.72265625, -234.75640869140625, 1910.6968994140625, 859.753662109375, 326.61541748046875, 333.2170104980469, -354.6768798828125, -279.5745849609375, -1.498443603515625, 1453.5205078125, 244.50726318359375, 363.53070068359375, 1531.5438232421875, 191.57571411132812, 290.22998046875, 1110.08984375, -17.8199462890625, 922.7213134765625, 458.6496887207031, 511.51031494140625, 67.7723388671875, 593.961669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000670.npy"}
{"epoch": 0.9838472834067548, "step": 671, "batch_size": 64, "mean": 397.42791748046875, "std": 700.272216796875, "min": -1929.19873046875, "p10": -219.7996841430664, "median": 349.6846618652344, "p90": 1284.2734130859376, "max": 1926.9512939453125, "pos_frac": 0.765625, "sample": [54.18247985839844, -1929.19873046875, 498.723876953125, 59.278106689453125, -151.50238037109375, 762.86865234375, 234.62863159179688, 322.65350341796875, 746.8355712890625, 333.71002197265625, 411.22821044921875, -202.37271118164062, 168.59449768066406, 1164.57666015625, 1004.6109008789062, 71.61119079589844, 262.5414733886719, 180.68646240234375, 879.8157958984375, 785.6517944335938, 100.91507720947266, 1452.1170654296875, -1659.353759765625, 1300.5736083984375, 817.1222534179688, -43.39649963378906, 1901.861083984375, 446.4599914550781, 230.509521484375, 577.2200317382812, -152.973876953125, -610.5736083984375, -425.4097595214844, 469.51715087890625, 1246.2396240234375, -200.10052490234375, 365.6593017578125, 321.1417541503906, 670.0642700195312, 1385.1365966796875, 1480.15234375, 420.2623596191406, -227.2683868408203, 106.08235931396484, 829.4771728515625, 4.9561004638671875, 266.4088134765625, 981.6412353515625, -443.8924255371094, -84.98233795166016, 729.9176635742188, 448.0680236816406, -94.4286880493164, 223.09735107421875, 1222.2752685546875, 923.553466796875, -167.90127563476562, -676.23095703125, 239.21546936035156, 1462.3822021484375, 516.9559326171875, 1926.9512939453125, 1116.746826171875, 380.0962219238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000671.npy"}
{"epoch": 0.9853157121879589, "step": 672, "batch_size": 64, "mean": 414.93212890625, "std": 764.0545043945312, "min": -1268.8248291015625, "p10": -526.590249633789, "median": 369.20790100097656, "p90": 1329.1379394531252, "max": 2800.21142578125, "pos_frac": 0.6875, "sample": [442.64849853515625, -2.808197021484375, -333.2720947265625, 171.29269409179688, -228.67042541503906, 78.43763732910156, 866.5220947265625, 391.37744140625, 1696.6351318359375, 385.3525695800781, 121.15605926513672, -123.29688262939453, -419.4098205566406, -258.8343811035156, 615.541259765625, 280.7091369628906, -151.11209106445312, -714.6356201171875, 436.05255126953125, 1756.4425048828125, 87.77911376953125, 842.64892578125, -27.81220245361328, -121.00595092773438, 1274.9669189453125, -1268.8248291015625, 1107.76171875, -953.31103515625, 455.94244384765625, 2800.21142578125, -159.62625122070312, 942.3049926757812, 1034.417724609375, 1286.30224609375, 1139.5968017578125, 898.438232421875, 553.974853515625, -572.5247192382812, 211.73263549804688, 2079.218994140625, -968.449951171875, -59.375511169433594, 353.063232421875, 1224.2401123046875, 849.4878540039062, 593.0718994140625, 908.4149780273438, 199.5380859375, 787.5089111328125, 636.7235107421875, 1495.587890625, 119.44839477539062, -88.49740600585938, 769.8221435546875, 58.279327392578125, 752.1035766601562, 1347.49609375, 82.67949676513672, -52.56712341308594, -758.9764404296875, 594.6610717773438, 333.53924560546875, 1498.9888916015625, -743.45361328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000672.npy"}
{"epoch": 0.986784140969163, "step": 673, "batch_size": 64, "mean": 385.8390808105469, "std": 664.5762939453125, "min": -1409.4495849609375, "p10": -317.77699279785156, "median": 397.81431579589844, "p90": 1088.81224975586, "max": 3374.142578125, "pos_frac": 0.78125, "sample": [-204.2537078857422, -682.4207153320312, 498.5799865722656, 939.5962524414062, 347.63995361328125, 493.44891357421875, 652.58251953125, 393.2644348144531, 1302.0126953125, 420.6733093261719, -182.93821716308594, 1513.52197265625, 499.3146057128906, 635.96484375, 128.9696502685547, 87.09814453125, -453.62432861328125, 801.7442016601562, 1546.114501953125, 336.8306579589844, -322.18621826171875, 373.06329345703125, 355.64825439453125, -93.49716186523438, 131.5520477294922, -1409.4495849609375, 216.40780639648438, 684.9635009765625, -454.7898864746094, 175.82017517089844, 750.9747314453125, 1437.152099609375, 170.7940216064453, -64.46888732910156, 757.7263793945312, 540.9125366210938, 402.36419677734375, -603.2490844726562, 516.857177734375, 861.412109375, 752.8140869140625, 51.60596466064453, 180.78042602539062, 771.72314453125, 405.3548583984375, 75.47284698486328, 562.2779541015625, -177.9340057373047, 477.5632019042969, 3374.142578125, 99.5886459350586, 94.34719848632812, 549.9451904296875, 648.173583984375, -802.0240478515625, 537.3780517578125, 461.79986572265625, 1152.761962890625, 157.77557373046875, -307.4888000488281, -92.5197982788086, 627.3168334960938, 1271.1246337890625, 319.59417724609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000673.npy"}
{"epoch": 0.9882525697503671, "step": 674, "batch_size": 64, "mean": 510.9061584472656, "std": 687.9755249023438, "min": -565.6873779296875, "p10": -278.332844543457, "median": 479.3643035888672, "p90": 1209.1786865234378, "max": 3177.885498046875, "pos_frac": 0.75, "sample": [-104.38787841796875, 38.279781341552734, 849.1300659179688, 739.0770874023438, 531.990966796875, 804.6478271484375, 46.13993835449219, -74.44035339355469, -517.0420532226562, 1237.594970703125, 1598.3402099609375, 952.3455810546875, -12.987651824951172, 537.1049194335938, -233.0305633544922, 508.6763916015625, -152.96231079101562, 2021.409423828125, 635.567138671875, 3177.885498046875, 564.0936889648438, 498.58734130859375, 12.262615203857422, 1018.9518432617188, 531.0086059570312, -297.74810791015625, 1091.99072265625, 460.1412658691406, 693.964599609375, -126.033203125, 1142.8740234375, 2073.91015625, -143.44131469726562, 336.2969665527344, 418.39898681640625, 812.6083984375, -344.46356201171875, -416.35687255859375, -378.0220642089844, 914.9451293945312, 643.401611328125, 434.0415954589844, 301.21087646484375, 1685.6875, 342.6893615722656, 1030.4102783203125, 892.6744384765625, 977.9464721679688, 538.823486328125, 114.69526672363281, -565.6873779296875, 185.2712860107422, 1865.4735107421875, -331.3750305175781, -39.767539978027344, 700.3242797851562, 175.97607421875, 790.462158203125, 414.9536437988281, 585.3206787109375, 147.64834594726562, -170.1992645263672, 193.71786499023438, 336.98724365234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000674.npy"}
{"epoch": 0.9897209985315712, "step": 675, "batch_size": 64, "mean": 518.9458618164062, "std": 716.4024658203125, "min": -820.1660766601562, "p10": -139.90013122558588, "median": 355.6718292236328, "p90": 1495.7395263671879, "max": 2914.3466796875, "pos_frac": 0.765625, "sample": [234.8284454345703, 1538.683837890625, 543.2412719726562, 105.46282958984375, 52.939537048339844, 156.7241668701172, 828.1640014648438, -49.37477111816406, 616.4024047851562, 306.9973449707031, 867.435546875, -187.3704833984375, 602.14013671875, 1150.376220703125, -159.252685546875, -33.821075439453125, 160.7373046875, 1418.8330078125, 477.4484558105469, 482.0223693847656, 746.3930053710938, -13.347999572753906, 2465.978515625, 786.1358642578125, -73.19721221923828, 381.0769348144531, -5.089725494384766, 723.8683471679688, 554.8684692382812, 467.97833251953125, 369.388916015625, 1609.33740234375, 303.9129638671875, 1964.106689453125, 15.567657470703125, 1213.9017333984375, 2914.3466796875, -212.5266876220703, 987.1287231445312, 2168.63623046875, 192.43067932128906, 128.88790893554688, -19.35663604736328, 83.29541015625, 313.72247314453125, -94.74417114257812, -820.1660766601562, 255.06027221679688, 668.0297241210938, 255.41714477539062, 181.71180725097656, 1528.699462890625, 674.8861083984375, -585.2145385742188, -605.2260131835938, 372.9097900390625, 96.597900390625, 735.349609375, -64.36520385742188, 1381.5592041015625, 341.9547424316406, 1169.383056640625, 1030.58056640625, -489.9493713378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000675.npy"}
{"epoch": 0.9911894273127754, "step": 676, "batch_size": 64, "mean": 493.9117431640625, "std": 616.9157104492188, "min": -922.5384521484375, "p10": -124.51998291015623, "median": 424.3007354736328, "p90": 1383.738037109375, "max": 2491.1787109375, "pos_frac": 0.75, "sample": [55.243858337402344, 1594.458740234375, 880.0453491210938, 1046.4659423828125, -63.67198181152344, -101.4223403930664, 416.9076232910156, 755.276123046875, 40.095577239990234, -922.5384521484375, 672.0281982421875, 32.465484619140625, -94.75544738769531, 1234.68603515625, 1755.204833984375, 2491.1787109375, -76.58069610595703, -190.0640106201172, -49.54148864746094, -167.86337280273438, -43.111167907714844, 530.690185546875, 696.5882568359375, -106.73001098632812, -132.14425659179688, 787.7349853515625, 99.44972229003906, 1379.520263671875, 301.8173522949219, 1691.8460693359375, -48.698089599609375, 592.8612060546875, 254.57424926757812, 769.716796875, 118.30017852783203, 857.4393920898438, 1552.1357421875, 452.83905029296875, 614.37939453125, 1431.85986328125, 706.3848876953125, -44.792808532714844, 199.4797821044922, -329.71295166015625, 1306.0018310546875, 967.802001953125, 63.980010986328125, 898.9541625976562, 491.611083984375, 431.69384765625, 951.1079711914062, 595.5706787109375, 263.7566223144531, 388.8267822265625, 59.89081573486328, 250.198974609375, 886.90380859375, -157.8240966796875, 1385.545654296875, 139.9707489013672, 500.4993896484375, 237.46612548828125, -250.9644317626953, 559.3146362304688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000676.npy"}
{"epoch": 0.9926578560939795, "step": 677, "batch_size": 64, "mean": 468.4746398925781, "std": 772.2254028320312, "min": -919.8201904296875, "p10": -254.76405334472653, "median": 343.50836181640625, "p90": 1247.7485229492188, "max": 3883.083251953125, "pos_frac": 0.765625, "sample": [140.1356201171875, 3127.23583984375, 488.5137023925781, 37.013877868652344, 469.8952331542969, -318.7032470703125, -157.57949829101562, -234.44540405273438, 410.3323059082031, -14.8131103515625, 556.4862060546875, 504.8413391113281, 73.94468688964844, 1362.751708984375, 278.8951416015625, 945.0103149414062, 854.889892578125, 47.77070617675781, 267.42669677734375, 245.8472137451172, 1213.613525390625, 245.1867218017578, 1093.252197265625, -264.54534912109375, 360.51312255859375, 1354.7662353515625, 100.73365783691406, 487.7247009277344, 351.49066162109375, 1158.3548583984375, 335.52606201171875, -47.627532958984375, -186.92506408691406, 218.65078735351562, 149.947998046875, -665.2377319335938, 892.7032470703125, 287.11175537109375, 3883.083251953125, 167.71575927734375, 209.10867309570312, 492.56561279296875, 935.0775756835938, 717.9618530273438, -174.0248565673828, 1262.3778076171875, 377.5679016113281, 286.0374450683594, 805.852294921875, 254.53211975097656, 1458.869873046875, -44.9471321105957, 790.9202880859375, 374.6562805175781, 590.5218505859375, -469.3191833496094, -919.8201904296875, 1801.6787109375, 597.7904663085938, 745.2075805664062, 860.1551513671875, -263.4720458984375, -827.5150756835938, -100.89456176757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000677.npy"}
{"epoch": 0.9941262848751835, "step": 678, "batch_size": 64, "mean": 566.3223876953125, "std": 802.6676635742188, "min": -1907.691650390625, "p10": -194.74255981445307, "median": 501.8385772705078, "p90": 1262.2265869140626, "max": 3234.33447265625, "pos_frac": 0.8125, "sample": [263.1174621582031, 1004.112060546875, 570.8531494140625, 632.111572265625, 552.010986328125, -123.56936645507812, 548.6631469726562, -1907.691650390625, 78.1695556640625, -218.72970581054688, -87.44309997558594, 1230.3099365234375, 40.88481521606445, 1222.3692626953125, -138.77255249023438, 572.41064453125, 892.6360473632812, -340.12420654296875, 935.3771362304688, 889.0678100585938, 1095.89794921875, 1275.9051513671875, 14.811874389648438, 2484.21435546875, 1213.3922119140625, 3234.33447265625, 97.08891296386719, 335.53570556640625, -13.049945831298828, 1186.5733642578125, -598.1207885742188, 154.24337768554688, 225.07232666015625, 813.5177612304688, 202.0040283203125, 1031.4803466796875, 170.56826782226562, 1208.8638916015625, 271.38909912109375, 934.2796020507812, -224.68821716308594, 1021.6325073242188, 1320.594482421875, 1668.0645751953125, 352.48492431640625, 271.047607421875, 2588.108642578125, 2360.363037109375, 127.317626953125, 542.3515625, 347.6397705078125, -650.035400390625, 465.6736145019531, 7.1812286376953125, 54.79766082763672, 113.93861389160156, 643.6760864257812, 1121.6707763671875, 564.2233276367188, 538.0035400390625, 393.7684020996094, -19.64449691772461, 956.877685546875, -274.1750183105469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000678.npy"}
{"epoch": 0.9955947136563876, "step": 679, "batch_size": 64, "mean": 384.89459228515625, "std": 635.8862915039062, "min": -784.001953125, "p10": -308.50939331054684, "median": 288.9309387207031, "p90": 1340.2936767578128, "max": 2074.14306640625, "pos_frac": 0.71875, "sample": [462.353515625, 8.557378768920898, 1276.865234375, 2074.14306640625, 446.2267150878906, 804.1776733398438, 896.5016479492188, 417.8753967285156, 1367.477294921875, 344.7701110839844, 404.8360595703125, 474.731201171875, 412.7898864746094, 135.2190399169922, 1035.573486328125, -390.0237121582031, -163.75375366210938, 102.73104858398438, 238.35003662109375, 915.2022705078125, 712.186767578125, 1551.4110107421875, -144.0060577392578, 110.49420166015625, 113.56715393066406, -318.7541809082031, 420.167236328125, 1571.04443359375, -482.0688781738281, 517.087890625, 832.55224609375, 64.55601501464844, -68.91690063476562, -46.67925262451172, 1133.119140625, 370.61138916015625, 1639.998779296875, 145.99453735351562, -219.40045166015625, 109.37064361572266, 317.3063659667969, -187.8017120361328, 464.9566955566406, -45.286224365234375, -397.2277526855469, 1535.259765625, 260.5555114746094, -284.6048889160156, -784.001953125, -198.92758178710938, 944.6167602539062, -676.5075073242188, 654.1292724609375, -228.3404998779297, 512.62744140625, 259.0180358886719, -141.33731079101562, 256.90728759765625, 724.8121948242188, 2029.886962890625, 234.09091186523438, 669.6287841796875, -692.72998046875, 129.2821807861328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000679.npy"}
{"epoch": 0.9970631424375918, "step": 680, "batch_size": 64, "mean": 513.0979614257812, "std": 775.045654296875, "min": -2445.012451171875, "p10": -207.76476745605464, "median": 387.95489501953125, "p90": 1565.10712890625, "max": 2389.80517578125, "pos_frac": 0.828125, "sample": [1197.55322265625, 392.0730895996094, 418.60906982421875, 1001.1041259765625, 187.37876892089844, 1157.172607421875, 1855.724853515625, 552.0760498046875, 171.79507446289062, 383.8367004394531, 459.1632080078125, 200.8091583251953, 835.004638671875, -443.1797180175781, 1116.8941650390625, 1560.41357421875, 668.5691528320312, -681.72265625, -304.61419677734375, 937.8544921875, 1139.093994140625, 201.60919189453125, -131.1861114501953, 334.4892272949219, -222.12570190429688, 1054.254150390625, 834.87255859375, 182.5266571044922, 718.995361328125, 1527.895263671875, 298.89990234375, 28.68146324157715, 152.1383056640625, 1778.139892578125, 1065.791015625, 195.32652282714844, 200.1466522216797, 520.0185546875, 1984.1297607421875, -726.5140991210938, 144.07537841796875, 310.7897033691406, 484.62139892578125, 628.468017578125, 150.91029357910156, -2445.012451171875, 1826.2982177734375, -174.25592041015625, -593.4236450195312, 1567.11865234375, 656.3828125, 2389.80517578125, 21.136337280273438, 68.02861022949219, 136.61398315429688, 1491.6572265625, 699.1219482421875, 1605.7669677734375, -71.00161743164062, 183.31607055664062, -125.11349487304688, 338.5984191894531, 9.341766357421875, 731.3280639648438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000680.npy"}
{"epoch": 0.9985315712187959, "step": 681, "batch_size": 64, "mean": 404.0209655761719, "std": 667.7122192382812, "min": -895.3251953125, "p10": -432.8394439697265, "median": 383.88702392578125, "p90": 1260.8903320312504, "max": 2256.61181640625, "pos_frac": 0.6875, "sample": [775.3059692382812, -329.9012451171875, 522.7007446289062, 98.95330810546875, 910.8958740234375, -895.3251953125, 52.790557861328125, 415.0904235839844, -458.0098571777344, 210.4754638671875, -205.30389404296875, 352.6836242675781, -453.401123046875, -458.61553955078125, 1299.167724609375, 1739.076416015625, 470.754638671875, -167.08282470703125, -274.99322509765625, 1080.9644775390625, -202.15185546875, -8.690574645996094, 882.35546875, 507.6947937011719, 1753.195556640625, 660.4850463867188, 221.06646728515625, 1071.638671875, 981.1290283203125, 696.8065795898438, 126.0271987915039, 469.9503173828125, 191.9859619140625, -108.75509643554688, -215.70849609375, 259.59234619140625, 229.61740112304688, -692.6953125, 453.6640319824219, -105.44889831542969, -405.04949951171875, 2256.61181640625, -731.147705078125, 922.3756713867188, -83.16683197021484, 557.3666381835938, -444.7494201660156, 205.5796661376953, 61.54408264160156, -338.3641357421875, 1067.67138671875, -335.3456726074219, 940.3013305664062, 562.0034790039062, 1373.5302734375, 527.7026977539062, 1171.576416015625, 981.903564453125, 1035.613525390625, 734.2894287109375, 178.99253845214844, 1393.9927978515625, 1406.0706787109375, 960.0538330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000681.npy"}